SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] 0: MOV OUT[0], IN[0] 1: MOV OUT[1], IN[1] 2: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = add i32 %5, %7 %14 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %12, i32 0, i32 %13) %15 = extractelement <4 x float> %14, i32 0 %16 = extractelement <4 x float> %14, i32 1 %17 = extractelement <4 x float> %14, i32 2 %18 = extractelement <4 x float> %14, i32 3 %19 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %20 = load <16 x i8>, <16 x i8> addrspace(2)* %19, align 16, !tbaa !0 %21 = add i32 %5, %7 %22 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %20, i32 0, i32 %21) %23 = extractelement <4 x float> %22, i32 0 %24 = extractelement <4 x float> %22, i32 1 %25 = extractelement <4 x float> %22, i32 2 %26 = extractelement <4 x float> %22, i32 3 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %23, float %24, float %25, float %26) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %15, float %16, float %17, float %18) ret void } ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[0:3], 0 idxen ; E00C2000 80000100 buffer_load_format_xyzw v[5:8], v0, s[4:7], 0 idxen ; E00C2000 80010500 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v5, v6, v7, v8 ; F800020F 08070605 exp 15, 12, 0, 1, 0, v1, v2, v3, v4 ; F80008CF 04030201 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 12 Code Size: 56 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT 0: TEX OUT[0], IN[0], SAMP[0], 2D 1: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %11) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %11) %28 = bitcast float %26 to i32 %29 = bitcast float %27 to i32 %30 = insertelement <2 x i32> undef, i32 %28, i32 0 %31 = insertelement <2 x i32> %30, i32 %29, i32 1 %32 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %31, <32 x i8> %23, <16 x i8> %25, i32 2) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = call i32 @llvm.SI.packf16(float %33, float %34) %38 = bitcast i32 %37 to float %39 = call i32 @llvm.SI.packf16(float %35, float %36) %40 = bitcast i32 %39 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %38, float %40, float %38, float %40) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800F00 00030002 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 4 Code Size: 68 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x0 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLAudioManager: Using ALSA: default Begin MonoManager ReloadAssembly Platform assembly: /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/UnityEngine.dll (this message is harmless) Loading /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/UnityEngine.dll into Unity Child Domain Platform assembly: /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/Assembly-CSharp-firstpass.dll (this message is harmless) Loading /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/Assembly-CSharp-firstpass.dll into Unity Child Domain Platform assembly: /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/Assembly-CSharp.dll (this message is harmless) Loading /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/Assembly-CSharp.dll into Unity Child Domain Platform assembly: /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/Assembly-UnityScript.dll (this message is harmless) Loading /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/Assembly-UnityScript.dll into Unity Child Domain Platform assembly: /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/UnityEngine.UI.dll (this message is harmless) Loading /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/UnityEngine.UI.dll into Unity Child Domain Platform assembly: /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/Pathfinding.ClipperLib.dll (this message is harmless) Loading /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/Pathfinding.ClipperLib.dll into Unity Child Domain Platform assembly: /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/Pathfinding.Ionic.Zip.Reduced.dll (this message is harmless) Loading /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/Pathfinding.Ionic.Zip.Reduced.dll into Unity Child Domain Platform assembly: /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/Pathfinding.JsonFx.dll (this message is harmless) Loading /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/Pathfinding.JsonFx.dll into Unity Child Domain Platform assembly: /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/Pathfinding.Poly2Tri.dll (this message is harmless) Loading /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/Pathfinding.Poly2Tri.dll into Unity Child Domain Platform assembly: /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/Jboy.Core.dll (this message is harmless) Loading /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/Jboy.Core.dll into Unity Child Domain Platform assembly: /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/Jboy.dll (this message is harmless) Loading /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/Jboy.dll into Unity Child Domain Platform assembly: /media/bigdata/games/steam/steamapps/common/PSHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], CONSTANT DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %5) %23 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %5) %24 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %5) %25 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %5) %26 = call i32 @llvm.SI.packf16(float %22, float %23) %27 = bitcast i32 %26 to float %28 = call i32 @llvm.SI.packf16(float %24, float %25) %29 = bitcast i32 %28 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %27, float %29, float %27, float %29) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.constant(i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_mov_b32 m0, s9 ; BEFC0309 v_interp_mov_f32 v0, P0, 0, 0, [m0] ; C8020002 v_interp_mov_f32 v1, P0, 1, 0, [m0] ; C8060102 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_interp_mov_f32 v1, P0, 2, 0, [m0] ; C8060202 v_interp_mov_f32 v2, P0, 3, 0, [m0] ; C80A0302 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 4 Code Size: 40 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] 0: MOV OUT[0], IN[0] 1: MOV OUT[1], IN[1] 2: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = add i32 %5, %7 %14 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %12, i32 0, i32 %13) %15 = extractelement <4 x float> %14, i32 0 %16 = extractelement <4 x float> %14, i32 1 %17 = extractelement <4 x float> %14, i32 2 %18 = extractelement <4 x float> %14, i32 3 %19 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %20 = load <16 x i8>, <16 x i8> addrspace(2)* %19, align 16, !tbaa !0 %21 = add i32 %5, %7 %22 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %20, i32 0, i32 %21) %23 = extractelement <4 x float> %22, i32 0 %24 = extractelement <4 x float> %22, i32 1 %25 = extractelement <4 x float> %22, i32 2 %26 = extractelement <4 x float> %22, i32 3 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %23, float %24, float %25, float %26) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %15, float %16, float %17, float %18) ret void } ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[0:3], 0 idxen ; E00C2000 80000100 buffer_load_format_xyzw v[5:8], v0, s[4:7], 0 idxen ; E00C2000 80010500 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v5, v6, v7, v8 ; F800020F 08070605 exp 15, 12, 0, 1, 0, v1, v2, v3, v4 ; F80008CF 04030201 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 12 Code Size: 56 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT 0: TEX OUT[0], IN[0], SAMP[0], 2D 1: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %11requesting resize 1920 x 1080 requesting fullscreen 1920 x 1080 at 0 Hz Desktop is 1920 x 1080 @ 60 Hz Using libudev for joystick management Importing game controller configs Platform assembly: /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/System.Core.dll (this message is harmless) Platform assembly: /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/System.dll (this message is harmless) Platform assembly: /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/System.Xml.dll (this message is harmless) Platform assembly: /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/System.Data.dll (this message is harmless) Platform assembly: /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/UnityScript.Lang.dll (this message is harmless) Platform assembly: /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/Boo.Lang.dll (this message is harmless) Steam Version (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) Resolution[1920X1080] fs[True] (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) Platform assembly: /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/System.Transactions.dll (this message is harmless) Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/sqlite3 Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libsqlite.0.dylib Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libsqlite.0.dylib.so Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/./libsqlite.0.dylib Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/./libsqlite.0.dylib.so Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libsqlite.0.dylib Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libsqlite.0.dylib Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libsqlite.0.dylib.so Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/./libsqlite.0.dylib Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/./libsqlite.0.dylib.so Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libsqlite.0.dylib Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libsqlite.0.dylib Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libsqlite.0.dylib.so Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/./libsqlite.0.dylib Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/./libsqlite.0.dylib.so Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libsqlite.0.dylib Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libsqlite.0.dylib Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libsqlite.0.dylib.so Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/./libsqlite.0.dylib Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/./libsqlite.0.dylib.so Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libsqlite.0.dylib Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/sqlite3 Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/sqlite3 Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/sqlite3 Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libsqlite.0.dylib Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libsqlite.0.dylib.so Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/./libsqlite.0.dylib Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/./libsqlite.0.dylib.so Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libsqlite.0.dylib Connected to db (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libc.dylib Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libc.dylib.so Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/./libc.dylib Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/./libc.dylib.so Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libc.dylib Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libc.dylib Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libc.dylib.so Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/./libc.dylib Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/./libc.dylib.so Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libc.dylib Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/sqlite3 Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libsqlite.0.dylib Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libsqlite.0.dylib.so Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/./libsqlite.0.dylib Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/./libsqlite.0.dylib.so Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libsqlite.0.dylib Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/sqlite3 Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libsqlite.0.dylib Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libsqlite.0.dylib.so Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/./libsqlite.0.dylib Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/./libsqlite.0.dylib.so Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libsqlite.0.dylib Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/sqlite3 Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/sqlite3 Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_DatSetting breakpad minidump AppID = 237870 Steam_SetMinidumpSteamID: Caching Steam ID: 76561198118363848 [API loaded no] SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], CONSTANT DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %5) %23 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %5) %24 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %5) %25 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %5) %26 = call i32 @llvm.SI.packf16(float %22, float %23) %27 = bitcast i32 %26 to float %28 = call i32 @llvm.SI.packf16(float %24, float %25) %29 = bitcast i32 %28 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %27, float %29, float %27, float %29) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.constant(i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_mov_b32 m0, s9 ; BEFC0309 v_interp_mov_f32 v0, P0, 0, 0, [m0] ; C8020002 v_interp_mov_f32 v1, P0, 1, 0, [m0] ; C8060102 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_interp_mov_f32 v1, P0, 2, 0, [m0] ; C8060202 v_interp_mov_f32 v2, P0, 3, 0, [m0] ; C80A0302 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 4 Code Size: 40 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], COLOR DCL OUT[2], TEXCOORD[0] DCL CONST[0..3] DCL TEMP[0] 0: MUL TEMP[0], IN[0].xxxx, CONST[0] 1: MAD TEMP[0], IN[0].yyyy, CONST[1], TEMP[0] 2: MAD TEMP[0], IN[0].zzzz, CONST[2], TEMP[0] 3: MAD OUT[0], IN[0].wwww, CONST[3], TEMP[0] 4: MOV_SAT OUT[1], IN[1] 5: MOV OUT[2], IN[2] 6: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !tbaa !0 %47 = add i32 %5, %7 %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = extractelement <4 x float> %48, i32 2 %52 = extractelement <4 x float> %48, i32 3 %53 = fmul float %33, %13 %54 = fmul float %33, %14 %55 = fmul float %33, %15 %56 = fmul float %33, %16 %57 = fmul float %34, %17 %58 = fadd float %57, %53 %59 = fmul float %34, %18 %60 = fadd float %59, %54 %61 = fmul float %34, %19 %62 = fadd float %61, %55 %63 = fmul float %34, %20 %64 = fadd float %63, %56 %65 = fmul float %35, %21 %66 = fadd float %65, %58 %67 = fmul float %35, %22 %68 = fadd float %67, %60 %69 = fmul float %35, %23 %70 = fadd float %69, %62 %71 = fmul float %35, %24 %72 = fadd float %71, %64 %73 = fmul float %36, %25 %74 = fadd float %73, %66 %75 = fmul float %36, %26 %76 = fadd float %75, %68 %77 = fmul float %36, %27 %78 = fadd float %77, %70 %79 = fmul float %36, %28 %80 = fadd float %79, %72 %81 = call float @llvm.AMDIL.clamp.(float %41, float 0.000000e+00, float 1.000000e+00) %82 = call float @llvm.AMDIL.clamp.(float %42, float 0.000000e+00, float 1.000000e+00) %83 = call float @llvm.AMDIL.clamp.(float %43, float 0.000000e+00, float 1.000000e+00) %84 = call float @llvm.AMDIL.clamp.(float %44, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %81, float %82, float %83, float %84) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %49, float %50, float %51, float %52) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %74, float %76, float %78, float %80) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 buffer_load_format_xyzw v[9:12], v0, s[8:11], 0 idxen ; E00C2000 80020900 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s4, v1 ; 10000204 v_mac_f32_e32 v0, s8, v2 ; 3E000408 v_mul_f32_e32 v13, s5, v1 ; 101A0205 v_mac_f32_e32 v13, s9, v2 ; 3E1A0409 v_mul_f32_e32 v14, s6, v1 ; 101C0206 v_mac_f32_e32 v14, s10, v2 ; 3E1C040A v_mul_f32_e32 v1, s7, v1 ; 10020207 v_mac_f32_e32 v1, s11, v2 ; 3E02040B v_mac_f32_e32 v0, s12, v3 ; 3E00060C v_mac_f32_e32 v13, s13, v3 ; 3E1A060D v_mac_f32_e32 v14, s14, v3 ; 3E1C060E v_mac_f32_e32 v1, s15, v3 ; 3E02060F v_mac_f32_e32 v0, s16, v4 ; 3E000810 v_mac_f32_e32 v13, s17, v4 ; 3E1A0811 v_mac_f32_e32 v14, s18, v4 ; 3E1C0812 v_mac_f32_e32 v1, s0, v4 ; 3E020800 v_add_f32_e64 v2, 0, v5 clamp ; D2060802 00020A80 v_add_f32_e64 v3, 0, v6 clamp ; D2060803 00020C80 v_add_f32_e64 v4, 0, v7 clamp ; D2060804 00requesting resize 1920 x 1080 requesting fullscreen 1920 x 1080 at 0 Hz Desktop is 1920 x 1080 @ 60 Hz MUTEX Created. (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) Changing real window size to 1920 x 1080 Unloading 6 Unused Serialized files (Serialized files now loaded: 0) Unloading 107 unused Assets to reduce memory usage. Loaded Objects now: 23007. Total: 85.608994 ms (FindLiveObjects: 2.482000 ms CreateObjectMapping: 1.400000 ms MarkObjects: 81.138000 ms DeleteObjects: 0.588000 ms) Unloading 3 Unused Serialized files (Serialized files now loaded: 0) UnloadTime: 2.956000 ms Unloading 1 unused Assets to reduce memory usage. Loaded Objects now: 25899. Total: 96.442001 ms (FindLiveObjects: 2.711000 ms CreateObjectMapping: 2.635000 ms MarkObjects: 90.958000 ms DeleteObjects: 0.138000 ms) Changing real window size to 1920 x 1080 Changing real window size to 1920 x 1080 error version:3; need version:4 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) -----------------------------register singleton:Pathea.PeGameSummary+Mgr (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) error version:3; need version:4 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) error version:3; need version:4 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) error version:2; need version:4 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) error version:3; need version:4 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) error version:2; need version:4 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) error version:1; need version:4 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) error version:1; need version:4 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) error version:1; need version:4 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) error version:2; need version:4 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) error version:0; need version:4 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) error version:0; need version:4 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) error version:0; need version:4 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) Unloading 3 Unused Serialized files (Serialized files now loaded: 0) Unloading 0 unused Assets to reduce memory usage. Loaded Objects now: 26114. Total: 93.065002 ms (FindLiveObjects: 3.224000 ms CreateObjectMapping: 1.868000 ms MarkObjects: 87.834999 ms DeleteObjects: 0.138000 ms) Unloading 4 Unused Serialized files (Serialized files now loaded: 0) WARNING: Shader Unsupported: 'Hidden/Dof/DX11Dof' - Setting to default shader. UnloadTime: 8.448000 ms -----------------------------clear singleton, count:6 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) -----------------------------register singleton:Pathea.ArchiveMgr (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) temp path:/tmp/ (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) -----------------------------register singleton:Pathea.SinglePlayerTypeArchiveMgr (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) ****************load saved adventure**************** (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) -----------------------------register singleton:Pathea.PeGameSummary+Mgr (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) game summary (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) Pathea.GameLoader.InitArchiveSummary, progress:0.03448276 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) Unloading 47 unused Assets to reduce memory usage. Loaded Objects now: 23297. Total: 87.361000 ms (FindLiveObjects: 1.398000 ms CreateObjectMapping: 1.325000 ms MarkObjects: 81.345001 ms DeleteObjects: 3.292000 ms) -----------------------------register singleton:Pathea.PeCreature (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) -----------------------------register singleton:Pathea.MainPlayer (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) -----------------------------register singleton:Pathea.EntityMgr (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) -----------------------------register singleton:InputManager (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) -----------------------------register singleton:ReputationSystem (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) Pathea.GameLoader.LoadReputation, progress:0.06896552 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) Pathea.GameLoader.LoadRandomItemMgr, progress:0.1034483 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) -----------------------------register singleton:Pathea.RandomMapConfigArchiveMgr (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) SeedString:Planet MariaterrainHeight:128mapsize: 4, riverdensity: 1, riverwidth: 1 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) Test: CT_Dry (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) Test: waterHeight:8.5 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) Test: c_fWaterLvl:8.5 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) VoxelPaintXMLParser: No sectionmap found. (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) Pathea.GameLoader.LoadRandomTerrainParam, progress:0.137931 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) loadIso Time: 763040 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) loadIso Time: 1658970 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) loadIso Time: 1323810 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) loadIso Time: 753380 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) loadIso Time: 1183810 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) loadIso Time: 865620 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) loadIso Time: 6855300 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) loadIso Time: 2840080 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) loadIso Time: 454560 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) loadIso Time: 555180 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) loadIso Time: 881780 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) loadIso Time: 1379770 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) loadIso Time: 1898380 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) loadIso Time: 215400 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) Pathea.GameLoader.LoadRandomTown, progress:0.1724138 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) set player spawn pos:(-203.0, 36.6, -423.0) (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) Pathea.GameLoader.LoadAdventurePlayerSpawnPos, progress:0.2068966 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) Pathea.GameLoader.LoadCamera, progress:0.2413793 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) -----------------------------register singleton:Pathea.VArtifactTownArchiveMgr (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) -----------------------------register singleton:DetectedTownMgr (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) -----------------------------register singleton:PeMap.LabelMgr (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) -----------------------------register singleton:Pathea.TownNpcArchiveMgr (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) -----------------------------register singleton:Pathea.VABuildingArchiveMgr (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) -----------------------------register singleton:Pathea.VoxelTerrainArchiveMgr (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) Pathea.GameLoader.LoadRandomTerrainWithTown, progress:0.2758621 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) [VFDataRTGen]Error: Unrecognized voxel tile,discard the following data.[-2,-17,0,128]:0 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) [VFDataRTGen]Error: Unrecognized voxel tile,discard the following data.[-2,-17,0,128]:0 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) -----------------------------register singleton:Pathea.CreationDataArchiveMgr (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) Pathea.GameLoader.LoadCreationData, progress:0.3103448 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) -----------------------------register singleton:Pathea.ItemAssetArchiveMgr (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) -----------------------------register singleton:ItemAsset.ItemMgr (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) -----------------------------register singleton:ItemAsset.ItemProto+Mgr (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) Adjust befor:(-224.0, -4.0, -448.0) (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) Adjust befor:(-224.0, 28.0, -448.0) (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) Adjust befor:(-224.0, 60.0, -448.0) (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) Pathea.GameLoader.LoadItemAsset, progress:0.3448276 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) Pathea.GameLoader.LoadWaveSystem, progress:0.3793103 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) -----------------------------register singleton:Pathea.GrassDataSLArchiveMgr (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) Pathea.GameLoader.LoadGrassRandom, progress:0.4137931 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) Pathea.GameLoader.LoadVETreeProtos, progress:0.4482759 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) Creating RSubTerrainMgr! (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) -----------------------------register singleton:Pathea.RSubTerrSLArchiveMgr (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) Pathea.GameLoader.LoadRandomTree, progress:0.4827586 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) Coroutine RSubTerrCreator[2]::RefreshRegion started (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) Coroutine RSubTerrCreator[1]::RefreshRegion started (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) Coroutine RSubTerrCreator[0]::RefreshRegion started (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) Pathea.GameLoader.LoadEnvironment, progress:0.5172414 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) -----------------------------register singleton:Pathea.WorldInfoMgr (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) Pathea.GameLoader.LoadWorldInfo, progress:0.5517241 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) -----------------------------register singleton:Railway.Manager (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) Pathea.GameLoader.LoadRailway, progress:0.5862069 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) Pathea.GameLoader.LoadEntityCreator, progress:0.6206896 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) Pathea.GameLoader.LoadCreature, progress:0.6551724 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) [Singleton] An instance of Behave.Runtime.BTLauncher is needed in the scene, so '(singleton) Behave.Runtime.BTLauncher (UnityEngine.GameObject)' was created with DontDestroyOnLoad. (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) -----------------------------register singleton:MousePicker (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) Pathea.GameLoader.LoadPathFinding, progress:0.6896552 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1560) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) -----------------------------register singleton:PeTipsMsgMan (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) -----------------------------register singleton:PeMap.MaskTile+Mgr (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) Pathea.GameLoader.LoadGUI, progress:0.7241379 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) -----------------------------register singleton:DraggingMgr (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) -----------------------------register singleton:Pathea.CSDataMgrArchiveMgr (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) The CSClodsMgr is null. (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) Pathea.GameLoader.LoadCSData, progress:0.7586207 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) Pathea.GameLoader.LoadFarm, progress:0.7931035 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) The CSCloMgr is areadly. (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) Still have this data inst. (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) Still have this data inst. (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) Pathea.GameLoader.LoadColony, progress:0.8275862 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) -----------------------------register singleton:Pathea.UiHelpArchiveMgr (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) Pathea.GameLoader.LoadUiHelp, progress:0.862069 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) -----------------------------register singleton:PeMap.StaticPoint+Mgr (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) -----------------------------register singleton:PeMap.UserLabel+Mgr (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) 5 3 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) 5 4 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) 5 5 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) 6 3 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) 6 4 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) 6 5 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) 7 3 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) 7 4 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) 7 5 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) Pathea.GameLoader.LoadRandomMap, progress:0.8965517 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) -----------------------------register singleton:UIBlockSaver (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) Pathea.GameLoader.InitBuildManager, progress:0.9310345 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) [Singleton] An instance of ForceSetting is needed in the scene, so '(singleton) ForceSetting (UnityEngine.GameObject)' was created with DontDestroyOnLoad. (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) -----------------------------register singleton:MisRepositoryArchiveMgr (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) -----------------------------register singleton:NpcUserDataArchiveMgr (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) -----------------------------register singleton:EntityCreatedArchiveMgr (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) Pathea.GameLoader.LoadRandomStory, progress:0.9655172 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) -----------------------------register singleton:PeMap.MapIcon+Mgr (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) Pathea.GameLoader.LoadSingleAdventureInitData, progress:1 (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) born_pos(-203.0, 36.6, -423.0) (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) -----------------------------register singleton:Pathea.NpcAbility+Mgr (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) The referenced script on this Behaviour is missing! (Filename: Line: 1713) Aim Transform unassigned in Aim IK solver. Please Assign a Transform (lineal descendant to the last bone in the spine) that you want to be aimed at IKPosition (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) Unloading 8 Unused Serialized files (Serialized files now loaded: 0) Unloading 2011 unused Assets to reduce memory usage. Loaded Objects now: 129033. Total: 267.863983 ms (FindLiveObjects: 18.658998 ms CreateObjectMapping: 7.872000 ms MarkObjects: 232.768005 ms DeleteObjects: 8.565001 ms) [Singleton] An instance of MouseOpMgr is needed in the scene, so '(singleton) MouseOpMgr (UnityEngine.GameObject)' was created with DontDestroyOnLoad. (Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56) ad.const(<16 x i8> %12, i32 112) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !tbaa !0 %47 = add i32 %5, %7 %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = extractelement <4 x float> %48, i32 2 %52 = extractelement <4 x float> %48, i32 3 %53 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %54 = load <16 x i8>, <16 x i8> addrspace(2)* %53, align 16, !tbaa !0 %55 = add i32 %5, %7 %56 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %55) %57 = extractelement <4 x float> %56, i32 0 %58 = extractelement <4 x float> %56, i32 1 %59 = extractelement <4 x float> %56, i32 2 %60 = extractelement <4 x float> %56, i32 3 %61 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %62 = load <16 x i8>, <16 x i8> addrspace(2)* %61, align 16, !tbaa !0 %63 = add i32 %5, %7 %64 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %62, i32 0, i32 %63) %65 = extractelement <4 x float> %64, i32 0 %66 = extractelement <4 x float> %64, i32 1 %67 = extractelement <4 x float> %64, i32 2 %68 = extractelement <4 x float> %64, i32 3 %69 = fmul float %49, %13 %70 = fmul float %49, %14 %71 = fmul float %49, %15 %72 = fmul float %49, %16 %73 = fmul float %50, %17 %74 = fadd float %73, %69 %75 = fmul float %50, %18 %76 = fadd float %75, %70 %77 = fmul float %50, %19 %78 = fadd float %77, %71 %79 = fmul float %50, %20 %80 = fadd float %79, %72 %81 = fmul float %51, %21 %82 = fadd float %81, %74 %83 = fmul float %51, %22 %84 = fadd float %83, %76 %85 = fmul float %51, %23 %86 = fadd float %85, %78 %87 = fmul float %51, %24 %88 = fadd float %87, %80 %89 = fmul float %52, %25 %90 = fadd float %89, %82 %91 = fmul float %52, %26 %92 = fadd float %91, %84 %93 = fmul float %52, %27 %94 = fadd float %93, %86 %95 = fmul float %52, %28 %96 = fadd float %95, %88 %97 = call float @llvm.AMDIL.clamp.(float %57, float 0.000000e+00, float 1.000000e+00) %98 = call float @llvm.AMDIL.clamp.(float %58, float 0.000000e+00, float 1.000000e+00) %99 = call float @llvm.AMDIL.clamp.(float %59, float 0.000000e+00, float 1.000000e+00) %100 = call float @llvm.AMDIL.clamp.(float %60, float 0.000000e+00, float 1.000000e+00) %101 = fmul float %65, %29 %102 = fmul float %65, %30 %103 = fmul float %65, %31 %104 = fmul float %65, %32 %105 = fmul float %66, %33 %106 = fadd float %105, %101 %107 = fmul float %66, %34 %108 = fadd float %107, %102 %109 = fmul float %66, %35 %110 = fadd float %109, %103 %111 = fmul float %66, %36 %112 = fadd float %111, %104 %113 = fmul float %67, %37 %114 = fadd float %113, %106 %115 = fmul float %67, %38 %116 = fadd float %115, %108 %117 = fmul float %67, %39 %118 = fadd float %117, %110 %119 = fmul float %67, %40 %120 = fadd float %119, %112 %121 = fmul float %68, %41 %122 = fadd float %121, %114 %123 = fmul float %68, %42 %124 = fadd float %123, %116 %125 = fmul float %68, %43 %126 = fadd float %125, %118 %127 = fmul float %68, %44 %128 = fadd float %127, %120 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %97, float %98, float %99, float %100) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %122, float %124, float %126, float %128) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %90, float %92, float %94, float %96) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 buffer_load_format_xyzw v[9:12], v0, s[8:11], 0 idxen ; E00C2000 80020900 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x4 ; C2028104 s_buffer_load_dword s6, s[0:3], 0x1 ; C2030101 s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105 s_buffer_load_dword s8, s[0:3], 0x2 ; C2040102 s_buffer_load_dword s9, s[0:3], 0x6 ; C2048106 s_buffer_load_dword s10, s[0:3], 0x3 ; C2050103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s4, v1 ; 10000204 v_mac_f32_e32 v0, s5, v2 ; 3E000405 v_mul_f32_e32 v13, s6, v1 ; 101A0206 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_buffer_load_dword s5, s[0:3], 0x8 ; C2028108 s_buffer_load_dword s6, s[0:3], 0x9 ; C2030109 v_mac_f32_e32 v13, s7, v2 ; 3E1A0407 v_mul_f32_e32 v14, s8, v1 ; 101C0208 v_mac_f32_e32 v14, s9, v2 ; 3E1C0409 s_buffer_load_dword s7, s[0:3], 0xa ; C203810A s_buffer_load_dword s8, s[0:3], 0xb ; C204010B s_buffer_load_dword s9, s[0:3], 0xc ; C204810C s_buffer_load_dword s11, s[0:3], 0xd ; C205810D s_buffer_load_dword s12, s[0:3], 0xe ; C206010E v_mul_f32_e32 v1, s10, v1 ; 1002020A s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v1, s4, v2 ; 3E020404 v_mac_f32_e32 v0, s5, v3 ; 3E000605 v_mac_f32_e32 v13, s6, v3 ; 3E1A0606 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F v_mac_f32_e32 v14, s7, v3 ; 3E1C0607 v_mac_f32_e32 v1, s8, v3 ; 3E020608 v_mac_f32_e32 v0, s9, v4 ; 3E000809 v_mac_f32_e32 v13, s11, v4 ; 3E1A080B v_mac_f32_e32 v14, s12, v4 ; 3E1C080C s_buffer_load_dword s5, s[0:3], 0x10 ; C2028110 s_buffer_load_dword s6, s[0:3], 0x11 ; C2030111 s_buffer_load_dword s7, s[0:3], 0x12 ; C2038112 s_buffer_load_dword s8, s[0:3], 0x13 ; C2040113 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v1, s4, v4 ; 3E020804 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_buffer_load_dword s9, s[0:3], 0x15 ; C2048115 s_buffer_load_dword s10, s[0:3], 0x16 ; C2050116 s_buffer_load_dword s11, s[0:3], 0x17 ; C2058117 s_buffer_load_dword s12, s[0:3], 0x18 ; C2060118 s_buffer_load_dword s13, s[0:3], 0x19 ; C2068119 s_buffer_load_dword s14, s[0:3], 0x1a ; C207011A s_buffer_load_dword s15, s[0:3], 0x1b ; C207811B s_buffer_load_dword s16, s[0:3], 0x1c ; C208011C s_buffer_load_dword s17, s[0:3], 0x1d ; C208811D s_buffer_load_dword s18, s[0:3], 0x1e ; C209011E s_buffer_load_dword s0, s[0:3], 0x1f ; C200011F v_mul_f32_e32 v2, s5, v9 ; 10041205 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v2, s4, v10 ; 3E041404 v_mul_f32_e32 v3, s6, v9 ; 10061206 v_mac_f32_e32 v3, s9, v10 ; 3E061409 v_mul_f32_e32 v4, s7, v9 ; 10081207 v_mac_f32_e32 v4, s10, v10 ; 3E08140A v_mul_f32_e32 v9, s8, v9 ; 10121208 v_mac_f32_e32 v9, s11, v10 ; 3E12140B v_mac_f32_e32 v2, s12, v11 ; 3E04160C v_mac_f32_e32 v3, s13, v11 ; 3E06160D v_mac_f32_e32 v4, s14, v11 ; 3E08160E v_mac_f32_e32 v9, s15, v11 ; 3E12160F v_mac_f32_e32 v2, s16, v12 ; 3E041810 v_mac_f32_e32 v3, s17, v12 ; 3E061811 v_mac_f32_e32 v4, s18, v12 ; 3E081812 v_mac_f32_e32 v9, s0, v12 ; 3E121800 v_add_f32_e64 v5, 0, v5 clamp ; D2060805 00020A80 v_add_f32_e64 v6, 0, v6 clamp ; D2060806 00020C80 v_add_f32_e64 v7, 0, v7 clamp ; D2060807 00020E80 v_add_f32_e64 v8, 0, v8 clamp ; D2060808 00021080 exp 15, 32, 0, 0, 0, v5, v6, v7, v8 ; F800020F 08070605 exp 15, 33, 0, 0, 0, v2, v3, v4, v9 ; F800021F 09040302 exp 15, 12, 0, 1, 0, v0, v13, v14, v1 ; F80008CF 010E0D00 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 16 Code Size: 384 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], COLOR, COLOR DCL IN[1], TEXCOORD[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0], LOCAL 0: MOV TEMP[0].xy, IN[1].xyyy 1: MOV TEMP[0].w, IN[1].wwww 2: TXP TEMP[0], TEMP[0], SAMP[0], 2D 3: MUL TEMP[0], TEMP[0], IN[0] 4: MOV OUT[0], TEMP[0] 5: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %32 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %33 = fdiv float %30, %32 %34 = fdiv float %31, %32 %35 = bitcast float %33 to i32 %36 = bitcast float %34 to i32 %37 = insertelement <2 x i32> undef, i32 %35, i32 0 %38 = insertelement <2 x i32> %37, i32 %36, i32 1 %39 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %38, <32 x i8> %23, <16 x i8> %25, i32 2) %40 = extractelement <4 x float> %39, i32 0 %41 = extractelement <4 x float> %39, i32 1 %42 = extractelement <4 x float> %39, i32 2 %43 = extractelement <4 x float> %39, i32 3 %44 = fmul float %40, %26 %45 = fmul float %41, %27 %46 = fmul float %42, %28 %47 = fmul float %43, %29 %48 = call i32 @llvm.SI.packf16(float %44, float %45) %49 = bitcast i32 %48 to float %50 = call i32 @llvm.SI.packf16(float %46, float %47) %51 = bitcast i32 %50 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %49, float %51, float %49, float %51) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 v_interp_p1_f32 v0, v0, 3, 1, [m0] ; C8000700 v_interp_p2_f32 v0, [v0], v1, 3, 1, [m0] ; C8010701 v_mov_b32_e32 v1, 0x6f800000 ; 7E0202FF 6F800000 v_cmp_gt_f32_e64 vcc, |v0|, v1 ; D008016A 00020300 v_mov_b32_e32 v1, 0x2f800000 ; 7E0202FF 2F800000 v_cndmask_b32_e32 v1, 1.0, v1 ; 000202F2 s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_mul_f32_e32 v0, v1, v0 ; 10000101 v_rcp_f32_e32 v0, v0 ; 7E005500 v_mul_f32_e32 v6, v0, v6 ; 100C0D00 v_mul_f32_e32 v0, v0, v7 ; 10000F00 v_mul_f32_e32 v6, v6, v1 ; 100C0306 v_mul_f32_e32 v7, v0, v1 ; 100E0300 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[6:9], 15, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[8:15], s[0:3] ; F0800F00 00020606 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v2, v6 ; 10000D02 v_mul_f32_e32 v1, v3, v7 ; 10020F03 v_mul_f32_e32 v2, v4, v8 ; 10041104 v_mul_f32_e32 v3, v5, v9 ; 10061305 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 176 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL CONST[0..7] DCL TEMP[0..3], LOCAL 0: MOV TEMP[0].x, CONST[0].xxxx 1: MOV TEMP[0].y, CONST[1].xxxx 2: MOV TEMP[0].z, CONST[2].xxxx 3: MOV TEMP[1].x, CONST[0].yyyy 4: MOV TEMP[1].y, CONST[1].yyyy 5: MOV TEMP[1].z, CONST[2].yyyy 6: MOV TEMP[2].x, CONST[0].zzzz 7: MOV TEMP[2].y, CONST[1].zzzz 8: MOV TEMP[2].z, CONST[2].zzzz 9: MUL TEMP[3], CONST[4], IN[0].xxxx 10: MAD TEMP[3], CONST[5], IN[0].yyyy, TEMP[3] 11: MAD TEMP[3], CONST[6], IN[0].zzzz, TEMP[3] 12: MAD TEMP[3], CONST[7], IN[0].wwww, TEMP[3] 13: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[1].xxxx 14: MAD TEMP[0].xyz, TEMP[1].xyzz, IN[1].yyyy, TEMP[0].xyzz 15: MAD TEMP[0].xyz, TEMP[2].xyzz, IN[1].zzzz, TEMP[0].xyzz 16: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[0].xyzz 17: RSQ TEMP[1].x, TEMP[1].xxxx 18: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xxxx 19: MOV OUT[1], TEMP[0] 20: MOV OUT[0], TEMP[3] 21: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %38 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %39 = load <16 x i8>, <16 x i8> addrspace(2)* %38, align 16, !tbaa !0 %40 = add i32 %5, %7 %41 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %39, i32 0, i32 %40) %42 = extractelement <4 x float> %41, i32 0 %43 = extractelement <4 x float> %41, i32 1 %44 = extractelement <4 x float> %41, i32 2 %45 = extractelement <4 x float> %41, i32 3 %46 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %47 = load <16 x i8>, <16 x i8> addrspace(2)* %46, align 16, !tbaa !0 %48 = add i32 %5, %7 %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %48) %50 = extractelement <4 x float> %49, i32 0 %51 = extractelement <4 x float> %49, i32 1 %52 = extractelement <4 x float> %49, i32 2 %53 = fmul float %22, %42 %54 = fmul float %23, %42 %55 = fmul float %24, %42 %56 = fmul float %25, %42 %57 = fmul float %26, %43 %58 = fadd float %57, %53 %59 = fmul float %27, %43 %60 = fadd float %59, %54 %61 = fmul float %28, %43 %62 = fadd float %61, %55 %63 = fmul float %29, %43 %64 = fadd float %63, %56 %65 = fmul float %30, %44 %66 = fadd float %65, %58 %67 = fmul float %31, %44 %68 = fadd float %67, %60 %69 = fmul float %32, %44 %70 = fadd float %69, %62 %71 = fmul float %33, %44 %72 = fadd float %71, %64 %73 = fmul float %34, %45 %74 = fadd float %73, %66 %75 = fmul float %35, %45 %76 = fadd float %75, %68 %77 = fmul float %36, %45 %78 = fadd float %77, %70 %79 = fmul float %37, %45 %80 = fadd float %79, %72 %81 = fmul float %13, %50 %82 = fmul float %16, %50 %83 = fmul float %19, %50 %84 = fmul float %14, %51 %85 = fadd float %84, %81 %86 = fmul float %17, %51 %87 = fadd float %86, %82 %88 = fmul float %20, %51 %89 = fadd float %88, %83 %90 = fmul float %15, %52 %91 = fadd float %90, %85 %92 = fmul float %18, %52 %93 = fadd float %92, %87 %94 = fmul float %21, %52 %95 = fadd float %94, %89 %96 = fmul float %91, %91 %97 = fmul float %93, %93 %98 = fadd float %97, %96 %99 = fmul float %95, %95 %100 = fadd float %98, %99 %101 = call float @llvm.AMDGPU.rsq.clamped.f32(float %100) %102 = fmul float %91, %101 %103 = fmul float %93, %101 %104 = fmul float %95, %101 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %102, float %103, float %104, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %74, float %76, float %78, float %80) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[0:3], 0x0 ; C2060100 s_buffer_load_dword s13, s[0:3], 0x1 ; C2068101 buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[8:11], 0 idxen ; E00C2000 80020500 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_buffer_load_dword s5, s[0:3], 0x4 ; C2028104 s_buffer_load_dword s6, s[0:3], 0x5 ; C2030105 s_buffer_load_dword s7, s[0:3], 0x6 ; C2038106 s_buffer_load_dword s8, s[0:3], 0x8 ; C2040108 s_buffer_load_dword s9, s[0:3], 0x9 ; C2048109 s_buffer_load_dword s10, s[0:3], 0xa ; C205010A s_buffer_load_dword s11, s[0:3], 0x10 ; C2058110 s_buffer_load_dword s14, s[0:3], 0x11 ; C2070111 s_buffer_load_dword s15, s[0:3], 0x12 ; C2078112 s_buffer_load_dword s16, s[0:3], 0x13 ; C2080113 s_buffer_load_dword s17, s[0:3], 0x14 ; C2088114 s_buffer_load_dword s18, s[0:3], 0x15 ; C2090115 s_buffer_load_dword s19, s[0:3], 0x16 ; C2098116 s_buffer_load_dword s20, s[0:3], 0x17 ; C20A0117 s_buffer_load_dword s21, s[0:3], 0x18 ; C20A8118 s_buffer_load_dword s22, s[0:3], 0x19 ; C20B0119 s_buffer_load_dword s23, s[0:3], 0x1a ; C20B811A s_buffer_load_dword s24, s[0:3], 0x1b ; C20C011B s_buffer_load_dword s25, s[0:3], 0x1c ; C20C811C s_buffer_load_dword s26, s[0:3], 0x1d ; C20D011D s_buffer_load_dword s27, s[0:3], 0x1e ; C20D811E s_buffer_load_dword s0, s[0:3], 0x1f ; C200011F s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v0, s11, v1 ; 1000020B v_mul_f32_e32 v8, s12, v5 ; 10100A0C v_mac_f32_e32 v8, s13, v6 ; 3E100C0D v_mul_f32_e32 v9, s5, v5 ; 10120A05 v_mac_f32_e32 v9, s6, v6 ; 3E120C06 v_mul_f32_e32 v5, s8, v5 ; 100A0A08 v_mac_f32_e32 v5, s9, v6 ; 3E0A0C09 v_mac_f32_e32 v8, s4, v7 ; 3E100E04 v_mac_f32_e32 v9, s7, v7 ; 3E120E07 v_mac_f32_e32 v5, s10, v7 ; 3E0A0E0A v_mac_f32_e32 v0, s17, v2 ; 3E000411 v_mul_f32_e32 v6, s14, v1 ; 100C020E v_mac_f32_e32 v6, s18, v2 ; 3E0C0412 v_mul_f32_e32 v7, s15, v1 ; 100E020F v_mac_f32_e32 v7, s19, v2 ; 3E0E0413 v_mul_f32_e32 v1, s16, v1 ; 10020210 v_mac_f32_e32 v1, s20, v2 ; 3E020414 v_mac_f32_e32 v0, s21, v3 ; 3E000615 v_mac_f32_e32 v6, s22, v3 ; 3E0C0616 v_mac_f32_e32 v7, s23, v3 ; 3E0E0617 v_mac_f32_e32 v1, s24, v3 ; 3E020618 v_mac_f32_e32 v0, s25, v4 ; 3E000819 v_mul_f32_e32 v2, v8, v8 ; 10041108 v_mac_f32_e32 v2, v9, v9 ; 3E041309 v_mac_f32_e32 v2, v5, v5 ; 3E040B05 v_rsq_clamp_f32_e32 v2, v2 ; 7E045902 v_mac_f32_e32 v6, s26, v4 ; 3E0C081A v_mac_f32_e32 v7, s27, v4 ; 3E0E081B v_mac_f32_e32 v1, s0, v4 ; 3E020800 v_mul_f32_e32 v3, v2, v8 ; 10061102 v_mul_f32_e32 v4, v2, v9 ; 10081302 v_mul_f32_e32 v2, v2, v5 ; 10040B02 v_mov_b32_e32 v5, 0 ; 7E0A0280 exp 15, 32, 0, 0, 0, v3, v4, v2, v5 ; F800020F 05020403 exp 15, 12, 0, 1, 0, v0, v6, v7, v1 ; F80008CF 01070600 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 12 Code Size: 292 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL CONST[0] DCL TEMP[0], LOCAL IMM[0] FLT32 { 0.5000, 0.0000, 0.0000, 0.0000} 0: MAD TEMP[0].xyz, IN[0].xyzz, IMM[0].xxxx, IMM[0].xxxx 1: MOV TEMP[0].w, CONST[0].xxxx 2: MOV OUT[0], TEMP[0] 3: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %26 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %28 = fmul float %25, 5.000000e-01 %29 = fadd float %28, 5.000000e-01 %30 = fmul float %26, 5.000000e-01 %31 = fadd float %30, 5.000000e-01 %32 = fmul float %27, 5.000000e-01 %33 = fadd float %32, 5.000000e-01 %34 = call i32 @llvm.SI.packf16(float %29, float %31) %35 = bitcast i32 %34 to float %36 = call i32 @llvm.SI.packf16(float %33, float %24) %37 = bitcast i32 %36 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %35, float %37, float %35, float %37) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_mov_b32 m0, s9 ; BEFC0309 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s0, s[0:3], 0x0 ; C2000100 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v0, v0, 2, 0, [m0] ; C8000200 v_interp_p2_f32 v0, [v0], v1, 2, 0, [m0] ; C8010201 v_mad_f32 v1, 0.5, v2, 0.5 ; D2820001 03C204F0 v_mad_f32 v2, 0.5, v3, 0.5 ; D2820002 03C206F0 v_mad_f32 v0, 0.5, v0, 0.5 ; D2820000 03C200F0 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 s_waitcnt lgkmcnt(0) ; BF8C007F v_cvt_pkrtz_f16_f32_e64 v0, v0, s0 ; D25E0000 00000100 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 4 Code Size: 92 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x0 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG 0: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: call void @llvm.SI.export(i32 0, i32 1, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) ret void } declare void @llvm.SI.export(i32, i32, i32, i32, i32, i32, i32, i32, i32) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } Shader Disassembly: v_mov_b32_e32 v0, 0 ; 7E000280 exp 0, 0, 0, 1, 1, v0, v0, v0, v0 ; F8001800 00000000 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 8 VGPRS: 4 Code Size: 16 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x0 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL OUT[1], POSITION DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV OUT[0], IMM[0].xxxy 1: TEX OUT[1].z, IN[0], SAMP[0], 2D 2: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %11) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %11) %28 = bitcast float %26 to i32 %29 = bitcast float %27 to i32 %30 = insertelement <2 x i32> undef, i32 %28, i32 0 %31 = insertelement <2 x i32> %30, i32 %29, i32 1 %32 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %31, <32 x i8> %23, <16 x i8> %25, i32 2) %33 = extractelement <4 x float> %32, i32 2 call void @llvm.SI.export(i32 1, i32 0, i32 0, i32 8, i32 0, float %33, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_mov_b32_e32 v0, 0 ; 7E000280 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v1, 4, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800400 00030102 s_waitcnt vmcnt(0) ; BF8C0770 exp 1, 8, 0, 0, 0, v1, v0, v0, v0 ; F8000081 00000001 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, 1.0 ; 7E0202F2 exp 15, 0, 0, 1, 1, v0, v0, v0, v1 ; F800180F 01000000 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 4 Code Size: 80 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..9] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 0.5000, -1.0000, 1.0000, 0.0000} 0: MUL TEMP[0], CONST[2], IN[0].xxxx 1: MAD TEMP[0], CONST[3], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[4], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[5], IN[0].wwww, TEMP[0] 4: MUL TEMP[1].xyw, TEMP[0], IMM[0].xxxx 5: MOV TEMP[2].x, TEMP[1].xxxx 6: MUL TEMP[3].x, TEMP[1].yyyy, CONST[0].xxxx 7: MOV TEMP[2].y, TEMP[3].xxxx 8: ADD TEMP[1].xy, TEMP[2].xyyy, TEMP[1].wwww 9: MOV TEMP[1].zw, TEMP[0].wwzw 10: MUL TEMP[2], CONST[6], IN[0].xxxx 11: MAD TEMP[2], CONST[7], IN[0].yyyy, TEMP[2] 12: MAD TEMP[2], CONST[8], IN[0].zzzz, TEMP[2] 13: MAD TEMP[2].xyz, CONST[9], IN[0].wwww, TEMP[2] 14: MUL TEMP[2].xyz, TEMP[2].xyzz, IMM[0].yyzz 15: LRP TEMP[2].xyz, CONST[1].xxxx, IN[1].xyzz, TEMP[2].xyzz 16: MOV OUT[1], TEMP[1] 17: MOV OUT[2], TEMP[2] 18: MOV OUT[0], TEMP[0] 19: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %46 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %47 = load <16 x i8>, <16 x i8> addrspace(2)* %46, align 16, !tbaa !0 %48 = add i32 %5, %7 %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %48) %50 = extractelement <4 x float> %49, i32 0 %51 = extractelement <4 x float> %49, i32 1 %52 = extractelement <4 x float> %49, i32 2 %53 = extractelement <4 x float> %49, i32 3 %54 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %55 = load <16 x i8>, <16 x i8> addrspace(2)* %54, align 16, !tbaa !0 %56 = add i32 %5, %7 %57 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %55, i32 0, i32 %56) %58 = extractelement <4 x float> %57, i32 0 %59 = extractelement <4 x float> %57, i32 1 %60 = extractelement <4 x float> %57, i32 2 %61 = fmul float %15, %50 %62 = fmul float %16, %50 %63 = fmul float %17, %50 %64 = fmul float %18, %50 %65 = fmul float %19, %51 %66 = fadd float %65, %61 %67 = fmul float %20, %51 %68 = fadd float %67, %62 %69 = fmul float %21, %51 %70 = fadd float %69, %63 %71 = fmul float %22, %51 %72 = fadd float %71, %64 %73 = fmul float %23, %52 %74 = fadd float %73, %66 %75 = fmul float %24, %52 %76 = fadd float %75, %68 %77 = fmul float %25, %52 %78 = fadd float %77, %70 %79 = fmul float %26, %52 %80 = fadd float %79, %72 %81 = fmul float %27, %53 %82 = fadd float %81, %74 %83 = fmul float %28, %53 %84 = fadd float %83, %76 %85 = fmul float %29, %53 %86 = fadd float %85, %78 %87 = fmul float %30, %53 %88 = fadd float %87, %80 %89 = fmul float %82, 5.000000e-01 %90 = fmul float %84, 5.000000e-01 %91 = fmul float %88, 5.000000e-01 %92 = fmul float %90, %13 %93 = fadd float %89, %91 %94 = fadd float %92, %91 %95 = fmul float %31, %50 %96 = fmul float %32, %50 %97 = fmul float %33, %50 %98 = fmul float %34, %50 %99 = fmul float %35, %51 %100 = fadd float %99, %95 %101 = fmul float %36, %51 %102 = fadd float %101, %96 %103 = fmul float %37, %51 %104 = fadd float %103, %97 %105 = fmul float %38, %51 %106 = fadd float %105, %98 %107 = fmul float %39, %52 %108 = fadd float %107, %100 %109 = fmul float %40, %52 %110 = fadd float %109, %102 %111 = fmul float %41, %52 %112 = fadd float %111, %104 %113 = fmul float %42, %52 %114 = fadd float %113, %106 %115 = fmul float %43, %53 %116 = fadd float %115, %108 %117 = fmul float %44, %53 %118 = fadd float %117, %110 %119 = fmul float %45, %53 %120 = fadd float %119, %112 %121 = fsub float -0.000000e+00, %116 %122 = fsub float -0.000000e+00, %118 %123 = call float @llvm.AMDGPU.lrp(float %14, float %58, float %121) %124 = call float @llvm.AMDGPU.lrp(float %14, float %59, float %122) %125 = call float @llvm.AMDGPU.lrp(float %14, float %60, float %120) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %93, float %94, float %86, float %88) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %123, float %124, float %125, float %114) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %82, float %84, float %86, float %88) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[8:11], 0 idxen ; E00C2000 80020500 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_buffer_load_dword s5, s[0:3], 0xc ; C202810C s_buffer_load_dword s6, s[0:3], 0x9 ; C2030109 s_buffer_load_dword s7, s[0:3], 0xd ; C203810D s_buffer_load_dword s8, s[0:3], 0xa ; C204010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s4, v1 ; 10000204 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_buffer_load_dword s9, s[0:3], 0xf ; C204810F v_mac_f32_e32 v0, s5, v2 ; 3E000405 v_mul_f32_e32 v8, s6, v1 ; 10100206 v_mac_f32_e32 v8, s7, v2 ; 3E100407 s_buffer_load_dword s5, s[0:3], 0xb ; C202810B v_mul_f32_e32 v9, s8, v1 ; 10120208 s_buffer_load_dword s6, s[0:3], 0x18 ; C2030118 s_buffer_load_dword s7, s[0:3], 0x1c ; C203811C s_buffer_load_dword s8, s[0:3], 0x19 ; C2040119 s_buffer_load_dword s10, s[0:3], 0x1d ; C205011D s_buffer_load_dword s11, s[0:3], 0x1a ; C205811A s_buffer_load_dword s12, s[0:3], 0x1e ; C206011E s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v9, s4, v2 ; 3E120404 s_buffer_load_dword s4, s[0:3], 0x1b ; C202011B v_mul_f32_e32 v10, s5, v1 ; 10140205 v_mac_f32_e32 v10, s9, v2 ; 3E140409 v_mul_f32_e32 v11, s6, v1 ; 10160206 v_mac_f32_e32 v11, s7, v2 ; 3E160407 v_mul_f32_e32 v12, s8, v1 ; 10180208 v_mac_f32_e32 v12, s10, v2 ; 3E18040A v_mul_f32_e32 v13, s11, v1 ; 101A020B v_mac_f32_e32 v13, s12, v2 ; 3E1A040C s_buffer_load_dword s5, s[0:3], 0x1f ; C202811F s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s4, v1 ; 10020204 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_buffer_load_dword s6, s[0:3], 0x11 ; C2030111 s_buffer_load_dword s7, s[0:3], 0x12 ; C2038112 s_buffer_load_dword s8, s[0:3], 0x13 ; C2040113 s_buffer_load_dword s9, s[0:3], 0x20 ; C2048120 s_buffer_load_dword s10, s[0:3], 0x21 ; C2050121 s_buffer_load_dword s11, s[0:3], 0x22 ; C2058122 s_buffer_load_dword s12, s[0:3], 0x23 ; C2060123 v_mac_f32_e32 v1, s5, v2 ; 3E020405 s_buffer_load_dword s5, s[0:3], 0x14 ; C2028114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v0, s4, v3 ; 3E000604 v_mac_f32_e32 v8, s6, v3 ; 3E100606 v_mac_f32_e32 v9, s7, v3 ; 3E120607 v_mac_f32_e32 v10, s8, v3 ; 3E140608 v_mac_f32_e32 v11, s9, v3 ; 3E160609 v_mac_f32_e32 v12, s10, v3 ; 3E18060A s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_buffer_load_dword s6, s[0:3], 0x16 ; C2030116 s_buffer_load_dword s7, s[0:3], 0x17 ; C2038117 s_buffer_load_dword s8, s[0:3], 0x24 ; C2040124 s_buffer_load_dword s9, s[0:3], 0x25 ; C2048125 s_buffer_load_dword s10, s[0:3], 0x26 ; C2050126 v_mac_f32_e32 v13, s11, v3 ; 3E1A060B s_buffer_load_dword s11, s[0:3], 0x4 ; C2058104 v_mac_f32_e32 v1, s12, v3 ; 3E02060C v_mac_f32_e32 v0, s5, v4 ; 3E000805 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v8, s4, v4 ; 3E100804 v_mac_f32_e32 v9, s6, v4 ; 3E120806 v_mac_f32_e32 v10, s7, v4 ; 3E140807 v_mac_f32_e32 v11, s8, v4 ; 3E160808 v_mac_f32_e32 v12, s9, v4 ; 3E180809 v_mac_f32_e32 v13, s10, v4 ; 3E1A080A s_buffer_load_dword s0, s[0:3], 0x0 ; C2000100 v_sub_f32_e64 v2, 1.0, s11 ; D2080002 000016F2 v_mul_f32_e32 v3, v11, v2 ; 1006050B v_mul_f32_e32 v4, v12, v2 ; 1008050C v_mul_f32_e32 v2, v13, v2 ; 1004050D v_mad_f32 v3, s11, v5, -v3 ; D2820003 840E0A0B v_mad_f32 v4, s11, v6, -v4 ; D2820004 84120C0B v_mac_f32_e32 v2, s11, v7 ; 3E040E0B v_mul_f32_e32 v5, 0.5, v8 ; 100A10F0 v_mul_f32_e32 v6, 0.5, v10 ; 100C14F0 v_mad_f32 v7, 0.5, v0, v6 ; D2820007 041A00F0 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v6, s0, v5 ; 3E0C0A00 exp 15, 32, 0, 0, 0, v7, v6, v9, v10 ; F800020F 0A090607 exp 15, 33, 0, 0, 0, v3, v4, v2, v1 ; F800021F 01020403 exp 15, 12, 0, 1, 0, v0, v8, v9, v10 ; F80008CF 0A090800 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 16 Code Size: 408 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL CONST[0..4] DCL CONST[6..12] DCL TEMP[0..9], LOCAL IMM[0] FLT32 { 1.0000, 2.0000, -1.0000, 0.0000} IMM[1] FLT32 { 128.0000, 0.0000, 0.0000, 0.0000} 0: RCP TEMP[0].x, IN[0].wwww 1: MUL TEMP[0].xy, IN[0].xyyy, TEMP[0].xxxx 2: RCP TEMP[1].x, IN[1].zzzz 3: MUL TEMP[2].x, CONST[1].zzzz, TEMP[1].xxxx 4: MUL TEMP[2].xyz, IN[1].xyzz, TEMP[2].xxxx 5: MOV TEMP[3].xy, TEMP[0].xyyy 6: TEX TEMP[3].x, TEMP[3], SAMP[0], 2D 7: MAD TEMP[3].x, CONST[2].xxxx, TEMP[3].xxxx, CONST[2].yyyy 8: RCP TEMP[3].x, TEMP[3].xxxx 9: MUL TEMP[1].xyz, TEMP[2].xyzz, TEMP[3].xxxx 10: MUL TEMP[2], CONST[9], TEMP[1].xxxx 11: MAD TEMP[2], CONST[10], TEMP[1].yyyy, TEMP[2] 12: MAD TEMP[2], CONST[11], TEMP[1].zzzz, TEMP[2] 13: ADD TEMP[2].xyz, TEMP[2], CONST[12] 14: ADD TEMP[3].xyz, TEMP[2].xyzz, -CONST[3].xyzz 15: ADD TEMP[4].xyz, TEMP[2].xyzz, -CONST[6].xyzz 16: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz 17: RSQ TEMP[5].x, TEMP[5].xxxx 18: MUL TEMP[5].xyz, TEMP[4].xyzz, TEMP[5].xxxx 19: MOV TEMP[5].xyz, -TEMP[5].xyzx 20: DP3 TEMP[4].x, TEMP[4].xyzz, TEMP[4].xyzz 21: MUL TEMP[4].x, TEMP[4].xxxx, CONST[6].wwww 22: MOV TEMP[4].xy, TEMP[4].xxxx 23: TEX TEMP[4].w, TEMP[4], SAMP[1], 2D 24: MOV TEMP[0].xy, TEMP[0].xyyy 25: TEX TEMP[0], TEMP[0], SAMP[2], 2D 26: MAD TEMP[6].xyz, TEMP[0].xyzz, IMM[0].yyyy, IMM[0].zzzz 27: DP3 TEMP[7].x, TEMP[6].xyzz, TEMP[6].xyzz 28: RSQ TEMP[7].x, TEMP[7].xxxx 29: MUL TEMP[6].xyz, TEMP[6].xyzz, TEMP[7].xxxx 30: DP3 TEMP[7].x, TEMP[5].xyzz, TEMP[6].xyzz 31: MAX TEMP[7].x, IMM[0].wwww, TEMP[7].xxxx 32: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[4].wwww 33: MUL TEMP[7].xyz, CONST[7].xyzz, TEMP[7].xxxx 34: MUL TEMP[8].xyz, CONST[7].xyzz, CONST[4].xyzz 35: ADD TEMP[2].xyz, TEMP[2].xyzz, -CONST[0].xyzz 36: DP3 TEMP[9].x, TEMP[2].xyzz, TEMP[2].xyzz 37: RSQ TEMP[9].x, TEMP[9].xxxx 38: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[9].xxxx 39: ADD TEMP[2].xyz, TEMP[5].xyzz, -TEMP[2].xyzz 40: DP3 TEMP[5].x, TEMP[2].xyzz, TEMP[2].xyzz 41: RSQ TEMP[5].x, TEMP[5].xxxx 42: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[5].xxxx 43: DP3 TEMP[2].x, TEMP[2].xyzz, TEMP[6].xyzz 44: MAX TEMP[2].x, IMM[0].wwww, TEMP[2].xxxx 45: MUL TEMP[0].x, TEMP[0].wwww, IMM[1].xxxx 46: POW TEMP[0].x, TEMP[2].xxxx, TEMP[0].xxxx 47: MOV_SAT TEMP[2].x, TEMP[4].wwww 48: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[2].xxxx 49: ADD TEMP[2].x, TEMP[8].xxxx, TEMP[8].zzzz 50: MUL TEMP[2].x, TEMP[8].yyyy, TEMP[2].xxxx 51: SQRT TEMP[2].x, TEMP[2].xxxx 52: MUL TEMP[2].x, IMM[0].yyyy, TEMP[2].xxxx 53: ADD TEMP[4].x, TEMP[8].xxxx, TEMP[8].yyyy 54: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[8].zzzz 55: MAD TEMP[2].x, TEMP[2].xxxx, CONST[4].wwww, TEMP[4].xxxx 56: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[2].xxxx 57: MOV TEMP[7].w, TEMP[0].xxxx 58: DP3 TEMP[0].x, TEMP[3].xyzz, TEMP[3].xyzz 59: SQRT TEMP[0].x, TEMP[0].xxxx 60: LRP TEMP[0].x, CONST[3].wwww, TEMP[0].xxxx, TEMP[1].zzzz 61: MAD TEMP[0].x, TEMP[0].xxxx, CONST[8].zzzz, CONST[8].wwww 62: ADD TEMP[0].x, IMM[0].xxxx, -TEMP[0].xxxx 63: MOV_SAT TEMP[0].x, TEMP[0].xxxx 64: MUL TEMP[0], TEMP[7], TEMP[0].xxxx 65: MOV OUT[0], TEMP[0] 66: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 140) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 184) %56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200) %59 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %60 = load <32 x i8>, <32 x i8> addrspace(2)* %59, align 32, !tbaa !0 %61 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %62 = load <16 x i8>, <16 x i8> addrspace(2)* %61, align 16, !tbaa !0 %63 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %64 = bitcast <8 x i32> addrspace(2)* %63 to <32 x i8> addrspace(2)* %65 = load <32 x i8>, <32 x i8> addrspace(2)* %64, align 32, !tbaa !0 %66 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %67 = bitcast <4 x i32> addrspace(2)* %66 to <16 x i8> addrspace(2)* %68 = load <16 x i8>, <16 x i8> addrspace(2)* %67, align 16, !tbaa !0 %69 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %70 = bitcast <8 x i32> addrspace(2)* %69 to <32 x i8> addrspace(2)* %71 = load <32 x i8>, <32 x i8> addrspace(2)* %70, align 32, !tbaa !0 %72 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %73 = bitcast <4 x i32> addrspace(2)* %72 to <16 x i8> addrspace(2)* %74 = load <16 x i8>, <16 x i8> addrspace(2)* %73, align 16, !tbaa !0 %75 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %76 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %77 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %78 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %79 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %80 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %81 = fdiv float 1.000000e+00, %77 %82 = fmul float %75, %81 %83 = fmul float %76, %81 %84 = fdiv float 1.000000e+00, %80 %85 = fmul float %27, %84 %86 = fmul float %78, %85 %87 = fmul float %79, %85 %88 = fmul float %80, %85 %89 = bitcast float %82 to i32 %90 = bitcast float %83 to i32 %91 = insertelement <2 x i32> undef, i32 %89, i32 0 %92 = insertelement <2 x i32> %91, i32 %90, i32 1 %93 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %92, <32 x i8> %60, <16 x i8> %62, i32 2) %94 = extractelement <4 x float> %93, i32 0 %95 = fmul float %28, %94 %96 = fadd float %95, %29 %97 = fdiv float 1.000000e+00, %96 %98 = fmul float %86, %97 %99 = fmul float %87, %97 %100 = fmul float %88, %97 %101 = fmul float %47, %98 %102 = fmul float %48, %98 %103 = fmul float %49, %98 %104 = fmul float %50, %99 %105 = fadd float %104, %101 %106 = fmul float %51, %99 %107 = fadd float %106, %102 %108 = fmul float %52, %99 %109 = fadd float %108, %103 %110 = fmul float %53, %100 %111 = fadd float %110, %105 %112 = fmul float %54, %100 %113 = fadd float %112, %107 %114 = fmul float %55, %100 %115 = fadd float %114, %109 %116 = fadd float %111, %56 %117 = fadd float %113, %57 %118 = fadd float %115, %58 %119 = fsub float %116, %30 %120 = fsub float %117, %31 %121 = fsub float %118, %32 %122 = fsub float %116, %38 %123 = fsub float %117, %39 %124 = fsub float %118, %40 %125 = fmul float %122, %122 %126 = fmul float %123, %123 %127 = fadd float %126, %125 %128 = fmul float %124, %124 %129 = fadd float %127, %128 %130 = call float @llvm.AMDGPU.rsq.clamped.f32(float %129) %131 = fmul float %122, %130 %132 = fmul float %123, %130 %133 = fmul float %124, %130 %134 = fmul float %122, %122 %135 = fmul float %123, %123 %136 = fadd float %135, %134 %137 = fmul float %124, %124 %138 = fadd float %136, %137 %139 = fmul float %138, %41 %140 = bitcast float %139 to i32 %141 = bitcast float %139 to i32 %142 = insertelement <2 x i32> undef, i32 %140, i32 0 %143 = insertelement <2 x i32> %142, i32 %141, i32 1 %144 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %143, <32 x i8> %65, <16 x i8> %68, i32 2) %145 = extractelement <4 x float> %144, i32 3 %146 = bitcast float %82 to i32 %147 = bitcast float %83 to i32 %148 = insertelement <2 x i32> undef, i32 %146, i32 0 %149 = insertelement <2 x i32> %148, i32 %147, i32 1 %150 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %149, <32 x i8> %71, <16 x i8> %74, i32 2) %151 = extractelement <4 x float> %150, i32 0 %152 = extractelement <4 x float> %150, i32 1 %153 = extractelement <4 x float> %150, i32 2 %154 = extractelement <4 x float> %150, i32 3 %155 = fmul float %151, 2.000000e+00 %156 = fadd float %155, -1.000000e+00 %157 = fmul float %152, 2.000000e+00 %158 = fadd float %157, -1.000000e+00 %159 = fmul float %153, 2.000000e+00 %160 = fadd float %159, -1.000000e+00 %161 = fmul float %156, %156 %162 = fmul float %158, %158 %163 = fadd float %162, %161 %164 = fmul float %160, %160 %165 = fadd float %163, %164 %166 = call float @llvm.AMDGPU.rsq.clamped.f32(float %165) %167 = fmul float %156, %166 %168 = fmul float %158, %166 %169 = fmul float %160, %166 %170 = fmul float %131, %167 %171 = fsub float -0.000000e+00, %170 %172 = fmul float %132, %168 %173 = fsub float %171, %172 %174 = fmul float %133, %169 %175 = fsub float %173, %174 %176 = call float @llvm.maxnum.f32(float %175, float 0.000000e+00) %177 = fmul float %176, %145 %178 = fmul float %42, %177 %179 = fmul float %43, %177 %180 = fmul float %44, %177 %181 = fmul float %42, %34 %182 = fmul float %43, %35 %183 = fmul float %44, %36 %184 = fsub float %116, %24 %185 = fsub float %117, %25 %186 = fsub float %118, %26 %187 = fmul float %184, %184 %188 = fmul float %185, %185 %189 = fadd float %188, %187 %190 = fmul float %186, %186 %191 = fadd float %189, %190 %192 = call float @llvm.AMDGPU.rsq.clamped.f32(float %191) %193 = fmul float %184, %192 %194 = fmul float %185, %192 %195 = fmul float %186, %192 %196 = fsub float -0.000000e+00, %193 %197 = fsub float %196, %131 %198 = fsub float -0.000000e+00, %194 %199 = fsub float %198, %132 %200 = fsub float -0.000000e+00, %195 %201 = fsub float %200, %133 %202 = fmul float %197, %197 %203 = fmul float %199, %199 %204 = fadd float %203, %202 %205 = fmul float %201, %201 %206 = fadd float %204, %205 %207 = call float @llvm.AMDGPU.rsq.clamped.f32(float %206) %208 = fmul float %197, %207 %209 = fmul float %199, %207 %210 = fmul float %201, %207 %211 = fmul float %208, %167 %212 = fmul float %209, %168 %213 = fadd float %212, %211 %214 = fmul float %210, %169 %215 = fadd float %213, %214 %216 = call float @llvm.maxnum.f32(float %215, float 0.000000e+00) %217 = fmul float %154, 1.280000e+02 %218 = call float @llvm.pow.f32(float %216, float %217) %219 = call float @llvm.AMDIL.clamp.(float %145, float 0.000000e+00, float 1.000000e+00) %220 = fmul float %218, %219 %221 = fadd float %181, %183 %222 = fmul float %182, %221 %223 = call float @llvm.sqrt.f32(float %222) %224 = fmul float %223, 2.000000e+00 %225 = fadd float %181, %182 %226 = fadd float %225, %183 %227 = fmul float %224, %37 %228 = fadd float %227, %226 %229 = fmul float %220, %228 %230 = fmul float %119, %119 %231 = fmul float %120, %120 %232 = fadd float %231, %230 %233 = fmul float %121, %121 %234 = fadd float %232, %233 %235 = call float @llvm.sqrt.f32(float %234) %236 = call float @llvm.AMDGPU.lrp(float %33, float %235, float %100) %237 = fmul float %236, %45 %238 = fadd float %237, %46 %239 = fsub float 1.000000e+00, %238 %240 = call float @llvm.AMDIL.clamp.(float %239, float 0.000000e+00, float 1.000000e+00) %241 = fmul float %178, %240 %242 = fmul float %179, %240 %243 = fmul float %180, %240 %244 = fmul float %229, %240 %245 = call i32 @llvm.SI.packf16(float %241, float %242) %246 = bitcast i32 %245 to float %247 = call i32 @llvm.SI.packf16(float %243, float %244) %248 = bitcast i32 %247 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %246, float %248, float %246, float %248) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 3, 0, [m0] ; C8100300 v_interp_p2_f32 v4, [v4], v1, 3, 0, [m0] ; C8110301 v_interp_p1_f32 v5, v0, 0, 1, [m0] ; C8140400 v_interp_p2_f32 v5, [v5], v1, 0, 1, [m0] ; C8150401 v_interp_p1_f32 v6, v0, 1, 1, [m0] ; C8180500 v_interp_p2_f32 v6, [v6], v1, 1, 1, [m0] ; C8190501 v_interp_p1_f32 v0, v0, 2, 1, [m0] ; C8000600 v_rcp_f32_e32 v4, v4 ; 7E085504 v_interp_p2_f32 v0, [v0], v1, 2, 1, [m0] ; C8010601 s_load_dwordx4 s[24:27], s[4:5], 0x0 ; C08C0500 s_load_dwordx8 s[28:35], s[6:7], 0x0 ; C0CE0700 v_mul_f32_e32 v1, v4, v2 ; 10020504 v_mul_f32_e32 v2, v4, v3 ; 10040704 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s36, s[0:3], 0x9 ; C2120109 s_buffer_load_dword s37, s[0:3], 0x6 ; C2128106 s_buffer_load_dword s38, s[0:3], 0x8 ; C2130108 s_load_dwordx4 s[40:43], s[4:5], 0x4 ; C0940504 s_load_dwordx4 s[44:47], s[4:5], 0x8 ; C0960508 s_load_dwordx8 s[8:15], s[6:7], 0x8 ; C0C40708 s_load_dwordx8 s[16:23], s[6:7], 0x10 ; C0C80710 image_sample v3, 1, 0, 0, 0, 0, 0, 0, 0, v[1:2], s[28:35], s[24:27] ; F0800100 00C70301 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_buffer_load_dword s5, s[0:3], 0xd ; C202810D s_buffer_load_dword s6, s[0:3], 0xe ; C203010E s_buffer_load_dword s7, s[0:3], 0xf ; C203810F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v4, s36 ; 7E080224 v_rcp_f32_e32 v7, v0 ; 7E0E5500 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v4, s38, v3 ; 3E080626 s_buffer_load_dword s24, s[0:3], 0x24 ; C20C0124 s_buffer_load_dword s25, s[0:3], 0x25 ; C20C8125 v_mul_f32_e32 v3, s37, v7 ; 10060E25 s_buffer_load_dword s26, s[0:3], 0x26 ; C20D0126 v_rcp_f32_e32 v4, v4 ; 7E085504 v_mul_f32_e32 v5, v3, v5 ; 100A0B03 s_buffer_load_dword s27, s[0:3], 0x28 ; C20D8128 s_buffer_load_dword s28, s[0:3], 0x29 ; C20E0129 v_mul_f32_e32 v5, v4, v5 ; 100A0B04 s_buffer_load_dword s29, s[0:3], 0x2a ; C20E812A s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s24, v5 ; 100E0A18 v_mul_f32_e32 v8, s25, v5 ; 10100A19 s_buffer_load_dword s24, s[0:3], 0x2c ; C20C012C v_mul_f32_e32 v5, s26, v5 ; 100A0A1A v_mul_f32_e32 v6, v3, v6 ; 100C0D03 v_mul_f32_e32 v6, v4, v6 ; 100C0D04 v_mac_f32_e32 v7, s27, v6 ; 3E0E0C1B v_mac_f32_e32 v8, s28, v6 ; 3E100C1C s_buffer_load_dword s25, s[0:3], 0x2d ; C20C812D v_mac_f32_e32 v5, s29, v6 ; 3E0A0C1D v_mul_f32_e32 v0, v3, v0 ; 10000103 s_buffer_load_dword s26, s[0:3], 0x2e ; C20D012E s_buffer_load_dword s27, s[0:3], 0x30 ; C20D8130 s_buffer_load_dword s28, s[0:3], 0x31 ; C20E0131 v_mul_f32_e32 v0, v4, v0 ; 10000104 s_buffer_load_dword s29, s[0:3], 0x32 ; C20E8132 s_buffer_load_dword s30, s[0:3], 0x18 ; C20F0118 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v7, s24, v0 ; 3E0E0018 v_mac_f32_e32 v8, s25, v0 ; 3E100019 s_buffer_load_dword s24, s[0:3], 0x19 ; C20C0119 s_buffer_load_dword s25, s[0:3], 0x1a ; C20C811A v_mac_f32_e32 v5, s26, v0 ; 3E0A001A v_add_f32_e32 v3, s27, v7 ; 06060E1B v_add_f32_e32 v4, s28, v8 ; 0608101C s_buffer_load_dword s26, s[0:3], 0x1b ; C20D011B v_add_f32_e32 v5, s29, v5 ; 060A0A1D v_subrev_f32_e32 v6, s30, v3 ; 0A0C061E s_buffer_load_dword s27, s[0:3], 0x1c ; C20D811C s_buffer_load_dword s28, s[0:3], 0x1d ; C20E011D s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v7, s24, v4 ; 0A0E0818 v_subrev_f32_e32 v8, s25, v5 ; 0A100A19 v_mul_f32_e32 v9, v6, v6 ; 10120D06 v_mac_f32_e32 v9, v7, v7 ; 3E120F07 v_mac_f32_e32 v9, v8, v8 ; 3E121108 v_mul_f32_e32 v10, s26, v9 ; 1014121A v_mov_b32_e32 v11, v10 ; 7E16030A s_buffer_load_dword s24, s[0:3], 0x12 ; C20C0112 s_buffer_load_dword s25, s[0:3], 0x1e ; C20C811E s_buffer_load_dword s26, s[0:3], 0x10 ; C20D0110 s_buffer_load_dword s29, s[0:3], 0x11 ; C20E8111 image_sample v10, 8, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[8:15], s[40:43] ; F0800800 01420A0A image_sample v[11:14], 15, 0, 0, 0, 0, 0, 0, 0, v[1:2], s[16:23], s[44:47] ; F0800F00 01640B01 s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100 s_buffer_load_dword s9, s[0:3], 0x1 ; C2048101 s_buffer_load_dword s10, s[0:3], 0x2 ; C2050102 s_buffer_load_dword s11, s[0:3], 0x13 ; C2058113 s_buffer_load_dword s12, s[0:3], 0x22 ; C2060122 s_buffer_load_dword s0, s[0:3], 0x23 ; C2000123 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s24 ; 7E020218 v_mul_f32_e32 v1, s25, v1 ; 10020219 v_mov_b32_e32 v2, s26 ; 7E04021A v_mac_f32_e32 v1, s27, v2 ; 3E02041B v_mov_b32_e32 v2, s26 ; 7E04021A v_mov_b32_e32 v15, s29 ; 7E1E021D v_mul_f32_e32 v15, s28, v15 ; 101E1E1C v_mul_f32_e32 v1, v1, v15 ; 10021F01 v_mac_f32_e32 v15, s27, v2 ; 3E1E041B v_mov_b32_e32 v2, s24 ; 7E040218 v_mac_f32_e32 v15, s25, v2 ; 3E1E0419 s_waitcnt vmcnt(0) ; BF8C0770 v_subrev_f32_e32 v2, s8, v3 ; 0A040608 v_subrev_f32_e32 v16, s9, v4 ; 0A200809 v_subrev_f32_e32 v17, s10, v5 ; 0A220A0A v_mul_f32_e32 v18, v2, v2 ; 10240502 v_mac_f32_e32 v18, v16, v16 ; 3E242110 v_mac_f32_e32 v18, v17, v17 ; 3E242311 v_rsq_clamp_f32_e32 v18, v18 ; 7E245912 v_sqrt_f32_e32 v1, v1 ; 7E026701 v_add_f32_e32 v1, v1, v1 ; 06020301 v_mac_f32_e32 v15, s11, v1 ; 3E1E020B v_mul_f32_e32 v1, v18, v2 ; 10020512 v_mul_f32_e32 v2, v18, v16 ; 10042112 v_mul_f32_e32 v16, v18, v17 ; 10202312 v_rsq_clamp_f32_e32 v9, v9 ; 7E125909 v_mad_f32 v11, 2.0, v11, -1.0 ; D282000B 03CE16F4 v_mad_f32 v12, 2.0, v12, -1.0 ; D282000C 03CE18F4 v_mul_f32_e32 v17, v11, v11 ; 1022170B v_mac_f32_e32 v17, v12, v12 ; 3E22190C v_mad_f32 v13, 2.0, v13, -1.0 ; D282000D 03CE1AF4 v_mac_f32_e32 v17, v13, v13 ; 3E221B0D v_rsq_clamp_f32_e32 v17, v17 ; 7E225911 v_mad_f32 v1, -v6, v9, -v1 ; D2820001 A4061306 v_mul_f32_e32 v6, v9, v6 ; 100C0D09 v_mad_f32 v2, -v7, v9, -v2 ; D2820002 A40A1307 v_mul_f32_e32 v7, v9, v7 ; 100E0F09 v_mul_f32_e32 v11, v17, v11 ; 10161711 v_mad_f32 v16, -v8, v9, -v16 ; D2820010 A4421308 v_mul_f32_e32 v18, v1, v1 ; 10240301 v_mac_f32_e32 v18, v2, v2 ; 3E240502 v_mac_f32_e32 v18, v16, v16 ; 3E242110 v_rsq_clamp_f32_e32 v18, v18 ; 7E245912 v_mul_f32_e32 v12, v17, v12 ; 10181911 v_mul_f32_e32 v6, v11, v6 ; 100C0D0B v_mad_f32 v6, -v7, v12, -v6 ; D2820006 A41A1907 v_mul_f32_e32 v1, v18, v1 ; 10020312 v_mul_f32_e32 v1, v11, v1 ; 1002030B v_mul_f32_e32 v2, v18, v2 ; 10040512 v_mac_f32_e32 v1, v12, v2 ; 3E02050C v_mul_f32_e32 v2, v9, v8 ; 10041109 v_mul_f32_e32 v7, v17, v13 ; 100E1B11 v_mad_f32 v2, -v2, v7, v6 ; D2820002 241A0F02 v_mul_f32_e32 v6, v18, v16 ; 100C2112 v_mac_f32_e32 v1, v7, v6 ; 3E020D07 v_subrev_f32_e32 v3, s4, v3 ; 0A060604 v_subrev_f32_e32 v4, s5, v4 ; 0A080805 v_subrev_f32_e32 v5, s6, v5 ; 0A0A0A06 v_max_f32_e32 v2, 0, v2 ; 20040480 v_mul_f32_e32 v2, v10, v2 ; 1004050A v_mul_f32_e32 v6, s27, v2 ; 100C041B v_mul_f32_e32 v7, s28, v2 ; 100E041C v_mul_f32_e32 v2, s25, v2 ; 10040419 v_mul_f32_e32 v3, v3, v3 ; 10060703 v_mac_f32_e32 v3, v4, v4 ; 3E060904 v_mac_f32_e32 v3, v5, v5 ; 3E060B05 v_sub_f32_e64 v4, 1.0, s7 ; D2080004 00000EF2 v_mul_f32_e32 v0, v0, v4 ; 10000900 v_max_f32_e32 v1, 0, v1 ; 20020280 v_log_f32_e32 v1, v1 ; 7E024F01 v_sqrt_f32_e32 v3, v3 ; 7E066703 v_mac_f32_e32 v0, s7, v3 ; 3E000607 v_mov_b32_e32 v3, s0 ; 7E060200 v_mac_f32_e32 v3, s12, v0 ; 3E06000C v_mul_f32_e32 v0, 0x43000000, v14 ; 10001CFF 43000000 v_mul_legacy_f32_e32 v0, v0, v1 ; 0E000300 v_exp_f32_e32 v0, v0 ; 7E004B00 v_add_f32_e64 v1, 0, v10 clamp ; D2060801 00021480 v_mul_f32_e32 v0, v1, v0 ; 10000101 v_mul_f32_e32 v0, v15, v0 ; 1000010F v_sub_f32_e32 v1, 1.0, v3 ; 080206F2 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_mul_f32_e32 v3, v1, v6 ; 10060D01 v_mul_f32_e32 v4, v1, v7 ; 10080F01 v_mul_f32_e32 v2, v1, v2 ; 10040501 v_mul_f32_e32 v0, v1, v0 ; 10000101 v_cvt_pkrtz_f16_f32_e32 v1, v3, v4 ; 5E020903 v_cvt_pkrtz_f16_f32_e32 v0, v2, v0 ; 5E000102 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 48 VGPRS: 20 Code Size: 824 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL CONST[0..3] DCL TEMP[0], LOCAL 0: MUL TEMP[0], CONST[0], IN[0].xxxx 1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0] 4: MOV OUT[0], TEMP[0] 5: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = fmul float %13, %33 %38 = fmul float %14, %33 %39 = fmul float %15, %33 %40 = fmul float %16, %33 %41 = fmul float %17, %34 %42 = fadd float %41, %37 %43 = fmul float %18, %34 %44 = fadd float %43, %38 %45 = fmul float %19, %34 %46 = fadd float %45, %39 %47 = fmul float %20, %34 %48 = fadd float %47, %40 %49 = fmul float %21, %35 %50 = fadd float %49, %42 %51 = fmul float %22, %35 %52 = fadd float %51, %44 %53 = fmul float %23, %35 %54 = fadd float %53, %46 %55 = fmul float %24, %35 %56 = fadd float %55, %48 %57 = fmul float %25, %36 %58 = fadd float %57, %50 %59 = fmul float %26, %36 %60 = fadd float %59, %52 %61 = fmul float %27, %36 %62 = fadd float %61, %54 %63 = fmul float %28, %36 %64 = fadd float %63, %56 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %58, float %60, float %62, float %64) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v0 ; 10080004 v_mac_f32_e32 v4, s8, v1 ; 3E080208 v_mul_f32_e32 v5, s5, v0 ; 100A0005 v_mac_f32_e32 v5, s9, v1 ; 3E0A0209 v_mul_f32_e32 v6, s6, v0 ; 100C0006 v_mac_f32_e32 v6, s10, v1 ; 3E0C020A v_mul_f32_e32 v0, s7, v0 ; 10000007 v_mac_f32_e32 v0, s11, v1 ; 3E00020B v_mac_f32_e32 v4, s12, v2 ; 3E08040C v_mac_f32_e32 v5, s13, v2 ; 3E0A040D v_mac_f32_e32 v6, s14, v2 ; 3E0C040E v_mac_f32_e32 v0, s15, v2 ; 3E00040F v_mac_f32_e32 v4, s16, v3 ; 3E080610 v_mac_f32_e32 v5, s17, v3 ; 3E0A0611 v_mac_f32_e32 v6, s18, v3 ; 3E0C0612 v_mac_f32_e32 v0, s0, v3 ; 3E000600 exp 15, 12, 0, 1, 0, v4, v5, v6, v0 ; F80008CF 00060504 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 8 Code Size: 172 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL OUT[0], COLOR IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MOV OUT[0], IMM[0].xyxx 1: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call i32 @llvm.SI.packf16(float 1.000000e+00, float 0.000000e+00) %23 = bitcast i32 %22 to float %24 = call i32 @llvm.SI.packf16(float 1.000000e+00, float 1.000000e+00) %25 = bitcast i32 %24 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %23, float %25, float %23, float %25) ret void } ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: v_cvt_pkrtz_f16_f32_e64 v0, 1.0, 0 ; D25E0000 000100F2 v_cvt_pkrtz_f16_f32_e64 v1, 1.0, 1.0 ; D25E0001 0001E4F2 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 8 VGPRS: 4 Code Size: 28 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..9] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 0.5000, -1.0000, 1.0000, 0.0000} 0: MUL TEMP[0], CONST[2], IN[0].xxxx 1: MAD TEMP[0], CONST[3], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[4], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[5], IN[0].wwww, TEMP[0] 4: MUL TEMP[1].xyw, TEMP[0], IMM[0].xxxx 5: MOV TEMP[2].x, TEMP[1].xxxx 6: MUL TEMP[3].x, TEMP[1].yyyy, CONST[0].xxxx 7: MOV TEMP[2].y, TEMP[3].xxxx 8: ADD TEMP[1].xy, TEMP[2].xyyy, TEMP[1].wwww 9: MOV TEMP[1].zw, TEMP[0].wwzw 10: MUL TEMP[2], CONST[6], IN[0].xxxx 11: MAD TEMP[2], CONST[7], IN[0].yyyy, TEMP[2] 12: MAD TEMP[2], CONST[8], IN[0].zzzz, TEMP[2] 13: MAD TEMP[2].xyz, CONST[9], IN[0].wwww, TEMP[2] 14: MUL TEMP[2].xyz, TEMP[2].xyzz, IMM[0].yyzz 15: LRP TEMP[2].xyz, CONST[1].xxxx, IN[1].xyzz, TEMP[2].xyzz 16: MOV OUT[1], TEMP[1] 17: MOV OUT[2], TEMP[2] 18: MOV OUT[0], TEMP[0] 19: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %46 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %47 = load <16 x i8>, <16 x i8> addrspace(2)* %46, align 16, !tbaa !0 %48 = add i32 %5, %7 %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %48) %50 = extractelement <4 x float> %49, i32 0 %51 = extractelement <4 x float> %49, i32 1 %52 = extractelement <4 x float> %49, i32 2 %53 = extractelement <4 x float> %49, i32 3 %54 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %55 = load <16 x i8>, <16 x i8> addrspace(2)* %54, align 16, !tbaa !0 %56 = add i32 %5, %7 %57 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %55, i32 0, i32 %56) %58 = extractelement <4 x float> %57, i32 0 %59 = extractelement <4 x float> %57, i32 1 %60 = extractelement <4 x float> %57, i32 2 %61 = fmul float %15, %50 %62 = fmul float %16, %50 %63 = fmul float %17, %50 %64 = fmul float %18, %50 %65 = fmul float %19, %51 %66 = fadd float %65, %61 %67 = fmul float %20, %51 %68 = fadd float %67, %62 %69 = fmul float %21, %51 %70 = fadd float %69, %63 %71 = fmul float %22, %51 %72 = fadd float %71, %64 %73 = fmul float %23, %52 %74 = fadd float %73, %66 %75 = fmul float %24, %52 %76 = fadd float %75, %68 %77 = fmul float %25, %52 %78 = fadd float %77, %70 %79 = fmul float %26, %52 %80 = fadd float %79, %72 %81 = fmul float %27, %53 %82 = fadd float %81, %74 %83 = fmul float %28, %53 %84 = fadd float %83, %76 %85 = fmul float %29, %53 %86 = fadd float %85, %78 %87 = fmul float %30, %53 %88 = fadd float %87, %80 %89 = fmul float %82, 5.000000e-01 %90 = fmul float %84, 5.000000e-01 %91 = fmul float %88, 5.000000e-01 %92 = fmul float %90, %13 %93 = fadd float %89, %91 %94 = fadd float %92, %91 %95 = fmul float %31, %50 %96 = fmul float %32, %50 %97 = fmul float %33, %50 %98 = fmul float %34, %50 %99 = fmul float %35, %51 %100 = fadd float %99, %95 %101 = fmul float %36, %51 %102 = fadd float %101, %96 %103 = fmul float %37, %51 %104 = fadd float %103, %97 %105 = fmul float %38, %51 %106 = fadd float %105, %98 %107 = fmul float %39, %52 %108 = fadd float %107, %100 %109 = fmul float %40, %52 %110 = fadd float %109, %102 %111 = fmul float %41, %52 %112 = fadd float %111, %104 %113 = fmul float %42, %52 %114 = fadd float %113, %106 %115 = fmul float %43, %53 %116 = fadd float %115, %108 %117 = fmul float %44, %53 %118 = fadd float %117, %110 %119 = fmul float %45, %53 %120 = fadd float %119, %112 %121 = fsub float -0.000000e+00, %116 %122 = fsub float -0.000000e+00, %118 %123 = call float @llvm.AMDGPU.lrp(float %14, float %58, float %121) %124 = call float @llvm.AMDGPU.lrp(float %14, float %59, float %122) %125 = call float @llvm.AMDGPU.lrp(float %14, float %60, float %120) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %93, float %94, float %86, float %88) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %123, float %124, float %125, float %114) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %82, float %84, float %86, float %88) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[8:11], 0 idxen ; E00C2000 80020500 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_buffer_load_dword s5, s[0:3], 0xc ; C202810C s_buffer_load_dword s6, s[0:3], 0x9 ; C2030109 s_buffer_load_dword s7, s[0:3], 0xd ; C203810D s_buffer_load_dword s8, s[0:3], 0xa ; C204010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s4, v1 ; 10000204 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_buffer_load_dword s9, s[0:3], 0xf ; C204810F v_mac_f32_e32 v0, s5, v2 ; 3E000405 v_mul_f32_e32 v8, s6, v1 ; 10100206 v_mac_f32_e32 v8, s7, v2 ; 3E100407 s_buffer_load_dword s5, s[0:3], 0xb ; C202810B v_mul_f32_e32 v9, s8, v1 ; 10120208 s_buffer_load_dword s6, s[0:3], 0x18 ; C2030118 s_buffer_load_dword s7, s[0:3], 0x1c ; C203811C s_buffer_load_dword s8, s[0:3], 0x19 ; C2040119 s_buffer_load_dword s10, s[0:3], 0x1d ; C205011D s_buffer_load_dword s11, s[0:3], 0x1a ; C205811A s_buffer_load_dword s12, s[0:3], 0x1e ; C206011E s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v9, s4, v2 ; 3E120404 s_buffer_load_dword s4, s[0:3], 0x1b ; C202011B v_mul_f32_e32 v10, s5, v1 ; 10140205 v_mac_f32_e32 v10, s9, v2 ; 3E140409 v_mul_f32_e32 v11, s6, v1 ; 10160206 v_mac_f32_e32 v11, s7, v2 ; 3E160407 v_mul_f32_e32 v12, s8, v1 ; 10180208 v_mac_f32_e32 v12, s10, v2 ; 3E18040A v_mul_f32_e32 v13, s11, v1 ; 101A020B v_mac_f32_e32 v13, s12, v2 ; 3E1A040C s_buffer_load_dword s5, s[0:3], 0x1f ; C202811F s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s4, v1 ; 10020204 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_buffer_load_dword s6, s[0:3], 0x11 ; C2030111 s_buffer_load_dword s7, s[0:3], 0x12 ; C2038112 s_buffer_load_dword s8, s[0:3], 0x13 ; C2040113 s_buffer_load_dword s9, s[0:3], 0x20 ; C2048120 s_buffer_load_dword s10, s[0:3], 0x21 ; C2050121 s_buffer_load_dword s11, s[0:3], 0x22 ; C2058122 s_buffer_load_dword s12, s[0:3], 0x23 ; C2060123 v_mac_f32_e32 v1, s5, v2 ; 3E020405 s_buffer_load_dword s5, s[0:3], 0x14 ; C2028114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v0, s4, v3 ; 3E000604 v_mac_f32_e32 v8, s6, v3 ; 3E100606 v_mac_f32_e32 v9, s7, v3 ; 3E120607 v_mac_f32_e32 v10, s8, v3 ; 3E140608 v_mac_f32_e32 v11, s9, v3 ; 3E160609 v_mac_f32_e32 v12, s10, v3 ; 3E18060A s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_buffer_load_dword s6, s[0:3], 0x16 ; C2030116 s_buffer_load_dword s7, s[0:3], 0x17 ; C2038117 s_buffer_load_dword s8, s[0:3], 0x24 ; C2040124 s_buffer_load_dword s9, s[0:3], 0x25 ; C2048125 s_buffer_load_dword s10, s[0:3], 0x26 ; C2050126 v_mac_f32_e32 v13, s11, v3 ; 3E1A060B s_buffer_load_dword s11, s[0:3], 0x4 ; C2058104 v_mac_f32_e32 v1, s12, v3 ; 3E02060C v_mac_f32_e32 v0, s5, v4 ; 3E000805 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v8, s4, v4 ; 3E100804 v_mac_f32_e32 v9, s6, v4 ; 3E120806 v_mac_f32_e32 v10, s7, v4 ; 3E140807 v_mac_f32_e32 v11, s8, v4 ; 3E160808 v_mac_f32_e32 v12, s9, v4 ; 3E180809 v_mac_f32_e32 v13, s10, v4 ; 3E1A080A s_buffer_load_dword s0, s[0:3], 0x0 ; C2000100 v_sub_f32_e64 v2, 1.0, s11 ; D2080002 000016F2 v_mul_f32_e32 v3, v11, v2 ; 1006050B v_mul_f32_e32 v4, v12, v2 ; 1008050C v_mul_f32_e32 v2, v13, v2 ; 1004050D v_mad_f32 v3, s11, v5, -v3 ; D2820003 840E0A0B v_mad_f32 v4, s11, v6, -v4 ; D2820004 84120C0B v_mac_f32_e32 v2, s11, v7 ; 3E040E0B v_mul_f32_e32 v5, 0.5, v8 ; 100A10F0 v_mul_f32_e32 v6, 0.5, v10 ; 100C14F0 v_mad_f32 v7, 0.5, v0, v6 ; D2820007 041A00F0 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v6, s0, v5 ; 3E0C0A00 exp 15, 32, 0, 0, 0, v7, v6, v9, v10 ; F800020F 0A090607 exp 15, 33, 0, 0, 0, v3, v4, v2, v1 ; F800021F 01020403 exp 15, 12, 0, 1, 0, v0, v8, v9, v10 ; F80008CF 0A090800 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 16 Code Size: 408 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL CONST[0..4] DCL CONST[6..12] DCL TEMP[0..8], LOCAL IMM[0] FLT32 { 1.0000, 2.0000, -1.0000, 0.0000} IMM[1] FLT32 { 128.0000, 0.0000, 0.0000, 0.0000} 0: RCP TEMP[0].x, IN[0].wwww 1: MUL TEMP[0].xy, IN[0].xyyy, TEMP[0].xxxx 2: RCP TEMP[1].x, IN[1].zzzz 3: MUL TEMP[2].x, CONST[1].zzzz, TEMP[1].xxxx 4: MUL TEMP[2].xyz, IN[1].xyzz, TEMP[2].xxxx 5: MOV TEMP[3].xy, TEMP[0].xyyy 6: TEX TEMP[3].x, TEMP[3], SAMP[0], 2D 7: MAD TEMP[3].x, CONST[2].xxxx, TEMP[3].xxxx, CONST[2].yyyy 8: RCP TEMP[3].x, TEMP[3].xxxx 9: MUL TEMP[1].xyz, TEMP[2].xyzz, TEMP[3].xxxx 10: MUL TEMP[2], CONST[9], TEMP[1].xxxx 11: MAD TEMP[2], CONST[10], TEMP[1].yyyy, TEMP[2] 12: MAD TEMP[2], CONST[11], TEMP[1].zzzz, TEMP[2] 13: ADD TEMP[2].xyz, TEMP[2], CONST[12] 14: ADD TEMP[3].xyz, TEMP[2].xyzz, -CONST[3].xyzz 15: MOV TEMP[4].xyz, -CONST[6].xyzx 16: MOV TEMP[0].xy, TEMP[0].xyyy 17: TEX TEMP[0], TEMP[0], SAMP[1], 2D 18: MAD TEMP[5].xyz, TEMP[0].xyzz, IMM[0].yyyy, IMM[0].zzzz 19: DP3 TEMP[6].x, TEMP[5].xyzz, TEMP[5].xyzz 20: RSQ TEMP[6].x, TEMP[6].xxxx 21: MUL TEMP[5].xyz, TEMP[5].xyzz, TEMP[6].xxxx 22: DP3 TEMP[6].x, TEMP[4].xyzz, TEMP[5].xyzz 23: MAX TEMP[6].x, IMM[0].wwww, TEMP[6].xxxx 24: MUL TEMP[6].xyz, CONST[7].xyzz, TEMP[6].xxxx 25: MUL TEMP[7].xyz, CONST[7].xyzz, CONST[4].xyzz 26: ADD TEMP[2].xyz, TEMP[2].xyzz, -CONST[0].xyzz 27: DP3 TEMP[8].x, TEMP[2].xyzz, TEMP[2].xyzz 28: RSQ TEMP[8].x, TEMP[8].xxxx 29: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[8].xxxx 30: ADD TEMP[2].xyz, TEMP[4].xyzz, -TEMP[2].xyzz 31: DP3 TEMP[4].x, TEMP[2].xyzz, TEMP[2].xyzz 32: RSQ TEMP[4].x, TEMP[4].xxxx 33: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[4].xxxx 34: DP3 TEMP[2].x, TEMP[2].xyzz, TEMP[5].xyzz 35: MAX TEMP[2].x, IMM[0].wwww, TEMP[2].xxxx 36: MUL TEMP[0].x, TEMP[0].wwww, IMM[1].xxxx 37: POW TEMP[0].x, TEMP[2].xxxx, TEMP[0].xxxx 38: ADD TEMP[2].x, TEMP[7].xxxx, TEMP[7].zzzz 39: MUL TEMP[2].x, TEMP[7].yyyy, TEMP[2].xxxx 40: SQRT TEMP[2].x, TEMP[2].xxxx 41: MUL TEMP[2].x, IMM[0].yyyy, TEMP[2].xxxx 42: ADD TEMP[4].x, TEMP[7].xxxx, TEMP[7].yyyy 43: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[7].zzzz 44: MAD TEMP[2].x, TEMP[2].xxxx, CONST[4].wwww, TEMP[4].xxxx 45: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[2].xxxx 46: MOV TEMP[6].w, TEMP[0].xxxx 47: DP3 TEMP[0].x, TEMP[3].xyzz, TEMP[3].xyzz 48: SQRT TEMP[0].x, TEMP[0].xxxx 49: LRP TEMP[0].x, CONST[3].wwww, TEMP[0].xxxx, TEMP[1].zzzz 50: MAD TEMP[0].x, TEMP[0].xxxx, CONST[8].zzzz, CONST[8].wwww 51: ADD TEMP[0].x, IMM[0].xxxx, -TEMP[0].xxxx 52: MOV_SAT TEMP[0].x, TEMP[0].xxxx 53: MUL TEMP[0], TEMP[6], TEMP[0].xxxx 54: MOV OUT[0], TEMP[0] 55: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 140) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 184) %55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200) %58 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %59 = load <32 x i8>, <32 x i8> addrspace(2)* %58, align 32, !tbaa !0 %60 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %61 = load <16 x i8>, <16 x i8> addrspace(2)* %60, align 16, !tbaa !0 %62 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %63 = bitcast <8 x i32> addrspace(2)* %62 to <32 x i8> addrspace(2)* %64 = load <32 x i8>, <32 x i8> addrspace(2)* %63, align 32, !tbaa !0 %65 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %66 = bitcast <4 x i32> addrspace(2)* %65 to <16 x i8> addrspace(2)* %67 = load <16 x i8>, <16 x i8> addrspace(2)* %66, align 16, !tbaa !0 %68 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %69 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %70 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %71 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %72 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %73 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %74 = fdiv float 1.000000e+00, %70 %75 = fmul float %68, %74 %76 = fmul float %69, %74 %77 = fdiv float 1.000000e+00, %73 %78 = fmul float %27, %77 %79 = fmul float %71, %78 %80 = fmul float %72, %78 %81 = fmul float %73, %78 %82 = bitcast float %75 to i32 %83 = bitcast float %76 to i32 %84 = insertelement <2 x i32> undef, i32 %82, i32 0 %85 = insertelement <2 x i32> %84, i32 %83, i32 1 %86 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %85, <32 x i8> %59, <16 x i8> %61, i32 2) %87 = extractelement <4 x float> %86, i32 0 %88 = fmul float %28, %87 %89 = fadd float %88, %29 %90 = fdiv float 1.000000e+00, %89 %91 = fmul float %79, %90 %92 = fmul float %80, %90 %93 = fmul float %81, %90 %94 = fmul float %46, %91 %95 = fmul float %47, %91 %96 = fmul float %48, %91 %97 = fmul float %49, %92 %98 = fadd float %97, %94 %99 = fmul float %50, %92 %100 = fadd float %99, %95 %101 = fmul float %51, %92 %102 = fadd float %101, %96 %103 = fmul float %52, %93 %104 = fadd float %103, %98 %105 = fmul float %53, %93 %106 = fadd float %105, %100 %107 = fmul float %54, %93 %108 = fadd float %107, %102 %109 = fadd float %104, %55 %110 = fadd float %106, %56 %111 = fadd float %108, %57 %112 = fsub float %109, %30 %113 = fsub float %110, %31 %114 = fsub float %111, %32 %115 = bitcast float %75 to i32 %116 = bitcast float %76 to i32 %117 = insertelement <2 x i32> undef, i32 %115, i32 0 %118 = insertelement <2 x i32> %117, i32 %116, i32 1 %119 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %118, <32 x i8> %64, <16 x i8> %67, i32 2) %120 = extractelement <4 x float> %119, i32 0 %121 = extractelement <4 x float> %119, i32 1 %122 = extractelement <4 x float> %119, i32 2 %123 = extractelement <4 x float> %119, i32 3 %124 = fmul float %120, 2.000000e+00 %125 = fadd float %124, -1.000000e+00 %126 = fmul float %121, 2.000000e+00 %127 = fadd float %126, -1.000000e+00 %128 = fmul float %122, 2.000000e+00 %129 = fadd float %128, -1.000000e+00 %130 = fmul float %125, %125 %131 = fmul float %127, %127 %132 = fadd float %131, %130 %133 = fmul float %129, %129 %134 = fadd float %132, %133 %135 = call float @llvm.AMDGPU.rsq.clamped.f32(float %134) %136 = fmul float %125, %135 %137 = fmul float %127, %135 %138 = fmul float %129, %135 %139 = fmul float %38, %136 %140 = fsub float -0.000000e+00, %139 %141 = fmul float %39, %137 %142 = fsub float %140, %141 %143 = fmul float %40, %138 %144 = fsub float %142, %143 %145 = call float @llvm.maxnum.f32(float %144, float 0.000000e+00) %146 = fmul float %41, %145 %147 = fmul float %42, %145 %148 = fmul float %43, %145 %149 = fmul float %41, %34 %150 = fmul float %42, %35 %151 = fmul float %43, %36 %152 = fsub float %109, %24 %153 = fsub float %110, %25 %154 = fsub float %111, %26 %155 = fmul float %152, %152 %156 = fmul float %153, %153 %157 = fadd float %156, %155 %158 = fmul float %154, %154 %159 = fadd float %157, %158 %160 = call float @llvm.AMDGPU.rsq.clamped.f32(float %159) %161 = fmul float %152, %160 %162 = fmul float %153, %160 %163 = fmul float %154, %160 %164 = fsub float -0.000000e+00, %161 %165 = fsub float %164, %38 %166 = fsub float -0.000000e+00, %162 %167 = fsub float %166, %39 %168 = fsub float -0.000000e+00, %163 %169 = fsub float %168, %40 %170 = fmul float %165, %165 %171 = fmul float %167, %167 %172 = fadd float %171, %170 %173 = fmul float %169, %169 %174 = fadd float %172, %173 %175 = call float @llvm.AMDGPU.rsq.clamped.f32(float %174) %176 = fmul float %165, %175 %177 = fmul float %167, %175 %178 = fmul float %169, %175 %179 = fmul float %176, %136 %180 = fmul float %177, %137 %181 = fadd float %180, %179 %182 = fmul float %178, %138 %183 = fadd float %181, %182 %184 = call float @llvm.maxnum.f32(float %183, float 0.000000e+00) %185 = fmul float %123, 1.280000e+02 %186 = call float @llvm.pow.f32(float %184, float %185) %187 = fadd float %149, %151 %188 = fmul float %150, %187 %189 = call float @llvm.sqrt.f32(float %188) %190 = fmul float %189, 2.000000e+00 %191 = fadd float %149, %150 %192 = fadd float %191, %151 %193 = fmul float %190, %37 %194 = fadd float %193, %192 %195 = fmul float %186, %194 %196 = fmul float %112, %112 %197 = fmul float %113, %113 %198 = fadd float %197, %196 %199 = fmul float %114, %114 %200 = fadd float %198, %199 %201 = call float @llvm.sqrt.f32(float %200) %202 = call float @llvm.AMDGPU.lrp(float %33, float %201, float %93) %203 = fmul float %202, %44 %204 = fadd float %203, %45 %205 = fsub float 1.000000e+00, %204 %206 = call float @llvm.AMDIL.clamp.(float %205, float 0.000000e+00, float 1.000000e+00) %207 = fmul float %146, %206 %208 = fmul float %147, %206 %209 = fmul float %148, %206 %210 = fmul float %195, %206 %211 = call i32 @llvm.SI.packf16(float %207, float %208) %212 = bitcast i32 %211 to float %213 = call i32 @llvm.SI.packf16(float %209, float %210) %214 = bitcast i32 %213 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %212, float %214, float %212, float %214) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 3, 0, [m0] ; C8100300 v_interp_p2_f32 v4, [v4], v1, 3, 0, [m0] ; C8110301 v_interp_p1_f32 v5, v0, 0, 1, [m0] ; C8140400 v_interp_p2_f32 v5, [v5], v1, 0, 1, [m0] ; C8150401 v_interp_p1_f32 v6, v0, 1, 1, [m0] ; C8180500 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504 s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700 s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708 v_interp_p2_f32 v6, [v6], v1, 1, 1, [m0] ; C8190501 v_interp_p1_f32 v0, v0, 2, 1, [m0] ; C8000600 v_rcp_f32_e32 v4, v4 ; 7E085504 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_buffer_load_dword s5, s[0:3], 0x8 ; C2028108 v_interp_p2_f32 v0, [v0], v1, 2, 1, [m0] ; C8010601 v_mul_f32_e32 v1, v4, v2 ; 10020504 v_mul_f32_e32 v2, v4, v3 ; 10040704 image_sample v3, 1, 0, 0, 0, 0, 0, 0, 0, v[1:2], s[16:23], s[8:11] ; F0800100 00440301 image_sample v[7:10], 15, 0, 0, 0, 0, 0, 0, 0, v[1:2], s[24:31], s[12:15] ; F0800F00 00660701 s_buffer_load_dword s6, s[0:3], 0xc ; C203010C s_buffer_load_dword s7, s[0:3], 0xd ; C203810D s_buffer_load_dword s8, s[0:3], 0xe ; C204010E s_buffer_load_dword s9, s[0:3], 0xf ; C204810F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s4 ; 7E020204 s_waitcnt vmcnt(1) ; BF8C0771 v_mac_f32_e32 v1, s5, v3 ; 3E020605 v_rcp_f32_e32 v1, v1 ; 7E025501 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s10, s[0:3], 0x2 ; C2050102 s_buffer_load_dword s11, s[0:3], 0x6 ; C2058106 s_buffer_load_dword s12, s[0:3], 0x10 ; C2060110 s_buffer_load_dword s13, s[0:3], 0x11 ; C2068111 s_buffer_load_dword s14, s[0:3], 0x12 ; C2070112 s_buffer_load_dword s15, s[0:3], 0x13 ; C2078113 s_buffer_load_dword s16, s[0:3], 0x18 ; C2080118 s_buffer_load_dword s17, s[0:3], 0x19 ; C2088119 s_buffer_load_dword s18, s[0:3], 0x1a ; C209011A s_buffer_load_dword s19, s[0:3], 0x1c ; C209811C s_buffer_load_dword s20, s[0:3], 0x1d ; C20A011D s_buffer_load_dword s21, s[0:3], 0x1e ; C20A811E s_buffer_load_dword s22, s[0:3], 0x22 ; C20B0122 s_buffer_load_dword s23, s[0:3], 0x23 ; C20B8123 s_buffer_load_dword s24, s[0:3], 0x24 ; C20C0124 s_buffer_load_dword s25, s[0:3], 0x25 ; C20C8125 s_buffer_load_dword s26, s[0:3], 0x26 ; C20D0126 s_buffer_load_dword s27, s[0:3], 0x28 ; C20D8128 s_buffer_load_dword s28, s[0:3], 0x29 ; C20E0129 s_buffer_load_dword s29, s[0:3], 0x2a ; C20E812A s_buffer_load_dword s30, s[0:3], 0x2c ; C20F012C s_buffer_load_dword s31, s[0:3], 0x2d ; C20F812D v_rcp_f32_e32 v2, v0 ; 7E045500 s_buffer_load_dword s32, s[0:3], 0x2e ; C210012E s_buffer_load_dword s33, s[0:3], 0x30 ; C2108130 s_buffer_load_dword s34, s[0:3], 0x31 ; C2110131 s_buffer_load_dword s0, s[0:3], 0x32 ; C2000132 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v2, s11, v2 ; 1004040B v_mul_f32_e32 v3, v2, v5 ; 10060B02 v_mul_f32_e32 v3, v1, v3 ; 10060701 v_mul_f32_e32 v4, s24, v3 ; 10080618 v_mul_f32_e32 v5, s25, v3 ; 100A0619 v_mul_f32_e32 v3, s26, v3 ; 1006061A v_mul_f32_e32 v6, v2, v6 ; 100C0D02 v_mul_f32_e32 v6, v1, v6 ; 100C0D01 v_mac_f32_e32 v4, s27, v6 ; 3E080C1B v_mac_f32_e32 v5, s28, v6 ; 3E0A0C1C v_mac_f32_e32 v3, s29, v6 ; 3E060C1D v_mov_b32_e32 v6, s14 ; 7E0C020E v_mul_f32_e32 v6, s21, v6 ; 100C0C15 v_mov_b32_e32 v11, s12 ; 7E16020C v_mac_f32_e32 v6, s19, v11 ; 3E0C1613 v_mov_b32_e32 v11, s12 ; 7E16020C v_mov_b32_e32 v12, s13 ; 7E18020D v_mul_f32_e32 v12, s20, v12 ; 10181814 v_mul_f32_e32 v6, v6, v12 ; 100C1906 v_mac_f32_e32 v12, s19, v11 ; 3E181613 v_mov_b32_e32 v11, s14 ; 7E16020E v_mac_f32_e32 v12, s21, v11 ; 3E181615 v_sqrt_f32_e32 v6, v6 ; 7E0C6706 v_add_f32_e32 v6, v6, v6 ; 060C0D06 v_mac_f32_e32 v12, s15, v6 ; 3E180C0F v_mul_f32_e32 v0, v2, v0 ; 10000102 v_mul_f32_e32 v0, v1, v0 ; 10000101 v_mac_f32_e32 v4, s30, v0 ; 3E08001E v_mac_f32_e32 v5, s31, v0 ; 3E0A001F v_mac_f32_e32 v3, s32, v0 ; 3E060020 v_add_f32_e32 v1, s33, v4 ; 06020821 v_add_f32_e32 v2, s34, v5 ; 06040A22 v_add_f32_e32 v3, s0, v3 ; 06060600 v_subrev_f32_e32 v4, s4, v1 ; 0A080204 v_subrev_f32_e32 v5, s5, v2 ; 0A0A0405 v_subrev_f32_e32 v6, s10, v3 ; 0A0C060A v_mad_f32 v7, 2.0, v7, -1.0 ; D2820007 03CE0EF4 v_mad_f32 v8, 2.0, v8, -1.0 ; D2820008 03CE10F4 v_mul_f32_e32 v11, v7, v7 ; 10160F07 v_mac_f32_e32 v11, v8, v8 ; 3E161108 v_mul_f32_e32 v13, v4, v4 ; 101A0904 v_mac_f32_e32 v13, v5, v5 ; 3E1A0B05 v_mac_f32_e32 v13, v6, v6 ; 3E1A0D06 v_rsq_clamp_f32_e32 v13, v13 ; 7E1A590D v_mad_f32 v9, 2.0, v9, -1.0 ; D2820009 03CE12F4 v_mac_f32_e32 v11, v9, v9 ; 3E161309 v_rsq_clamp_f32_e32 v11, v11 ; 7E16590B v_mad_f32 v4, -v4, v13, -s16 ; D2820004 A0421B04 v_mad_f32 v5, -v5, v13, -s17 ; D2820005 A0461B05 v_mad_f32 v6, -v6, v13, -s18 ; D2820006 A04A1B06 v_mul_f32_e32 v7, v11, v7 ; 100E0F0B v_mul_f32_e32 v13, v4, v4 ; 101A0904 v_mac_f32_e32 v13, v5, v5 ; 3E1A0B05 v_mac_f32_e32 v13, v6, v6 ; 3E1A0D06 v_rsq_clamp_f32_e32 v13, v13 ; 7E1A590D v_mul_f32_e32 v14, s16, v7 ; 101C0E10 v_mul_f32_e32 v8, v11, v8 ; 1010110B v_mad_f32 v14, -s17, v8, -v14 ; D282000E A43A1011 v_mul_f32_e32 v4, v13, v4 ; 1008090D v_mul_f32_e32 v4, v7, v4 ; 10080907 v_mul_f32_e32 v5, v13, v5 ; 100A0B0D v_mac_f32_e32 v4, v8, v5 ; 3E080B08 v_subrev_f32_e32 v1, s6, v1 ; 0A020206 v_subrev_f32_e32 v2, s7, v2 ; 0A040407 v_subrev_f32_e32 v3, s8, v3 ; 0A060608 v_mul_f32_e32 v5, v11, v9 ; 100A130B v_mad_f32 v7, -s18, v5, v14 ; D2820007 243A0A12 v_max_f32_e32 v7, 0, v7 ; 200E0E80 v_mul_f32_e32 v8, s19, v7 ; 10100E13 v_mul_f32_e32 v9, s20, v7 ; 10120E14 v_mul_f32_e32 v7, s21, v7 ; 100E0E15 v_mul_f32_e32 v6, v13, v6 ; 100C0D0D v_mac_f32_e32 v4, v5, v6 ; 3E080D05 v_mul_f32_e32 v1, v1, v1 ; 10020301 v_mac_f32_e32 v1, v2, v2 ; 3E020502 v_mac_f32_e32 v1, v3, v3 ; 3E020703 v_sub_f32_e64 v2, 1.0, s9 ; D2080002 000012F2 v_mul_f32_e32 v0, v0, v2 ; 10000500 v_max_f32_e32 v2, 0, v4 ; 20040880 v_log_f32_e32 v2, v2 ; 7E044F02 v_sqrt_f32_e32 v1, v1 ; 7E026701 v_mac_f32_e32 v0, s9, v1 ; 3E000209 v_mov_b32_e32 v1, s23 ; 7E020217 v_mac_f32_e32 v1, s22, v0 ; 3E020016 v_mul_f32_e32 v0, 0x43000000, v10 ; 100014FF 43000000 v_mul_legacy_f32_e32 v0, v0, v2 ; 0E000500 v_exp_f32_e32 v0, v0 ; 7E004B00 v_mul_f32_e32 v0, v12, v0 ; 1000010C v_sub_f32_e32 v1, 1.0, v1 ; 080202F2 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_mul_f32_e32 v2, v1, v8 ; 10041101 v_mul_f32_e32 v3, v1, v9 ; 10061301 v_mul_f32_e32 v4, v1, v7 ; 10080F01 v_mul_f32_e32 v0, v1, v0 ; 10000101 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 v_cvt_pkrtz_f16_f32_e32 v0, v4, v0 ; 5E000104 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 16 Code Size: 712 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL CONST[0..17] DCL TEMP[0..6], LOCAL IMM[0] FLT32 { 0.5000, 1.0000, 0.0000, 0.0000} 0: MUL TEMP[0], CONST[14], IN[0].xxxx 1: MAD TEMP[0], CONST[15], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[16], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[17], IN[0].wwww, TEMP[0] 4: MUL TEMP[1].xyw, TEMP[0], IMM[0].xxxx 5: MOV TEMP[2].x, TEMP[1].xxxx 6: MUL TEMP[3].x, TEMP[1].yyyy, CONST[0].xxxx 7: MOV TEMP[2].y, TEMP[3].xxxx 8: ADD TEMP[1].xy, TEMP[2].xyyy, TEMP[1].wwww 9: MOV TEMP[1].zw, TEMP[0].wwzw 10: MOV TEMP[2].x, CONST[8].xxxx 11: MOV TEMP[2].y, CONST[9].xxxx 12: MOV TEMP[2].z, CONST[10].xxxx 13: MOV TEMP[3].x, CONST[8].yyyy 14: MOV TEMP[3].y, CONST[9].yyyy 15: MOV TEMP[3].z, CONST[10].yyyy 16: MOV TEMP[4].x, CONST[8].zzzz 17: MOV TEMP[4].y, CONST[9].zzzz 18: MOV TEMP[4].z, CONST[10].zzzz 19: MUL TEMP[2].xyz, TEMP[2].xyzz, IN[1].xxxx 20: MAD TEMP[2].xyz, TEMP[3].xyzz, IN[1].yyyy, TEMP[2].xyzz 21: MAD TEMP[2].xyz, TEMP[4].xyzz, IN[1].zzzz, TEMP[2].xyzz 22: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz 23: RSQ TEMP[3].x, TEMP[3].xxxx 24: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx 25: MOV TEMP[3].w, IMM[0].yyyy 26: MOV TEMP[3].xyz, TEMP[2].xyzx 27: DP4 TEMP[4].x, CONST[1], TEMP[3] 28: DP4 TEMP[5].x, CONST[2], TEMP[3] 29: MOV TEMP[4].y, TEMP[5].xxxx 30: DP4 TEMP[3].x, CONST[3], TEMP[3] 31: MOV TEMP[4].z, TEMP[3].xxxx 32: MUL TEMP[3], TEMP[2].xyzz, TEMP[2].yzzx 33: DP4 TEMP[5].x, CONST[4], TEMP[3] 34: DP4 TEMP[6].x, CONST[5], TEMP[3] 35: MOV TEMP[5].y, TEMP[6].xxxx 36: DP4 TEMP[3].x, CONST[6], TEMP[3] 37: MOV TEMP[5].z, TEMP[3].xxxx 38: MUL TEMP[3].x, TEMP[2].yyyy, TEMP[2].yyyy 39: MAD TEMP[2].x, TEMP[2].xxxx, TEMP[2].xxxx, -TEMP[3].xxxx 40: MAD TEMP[2].xyz, CONST[7].xyzz, TEMP[2].xxxx, TEMP[5].xyzz 41: ADD TEMP[2].xyz, TEMP[2].xyzz, TEMP[4].xyzz 42: MAD TEMP[3].xy, IN[2].xyyy, CONST[13].xyyy, CONST[13].zwww 43: MOV TEMP[3].w, TEMP[2].xxxx 44: MOV TEMP[2].xy, TEMP[2].yzyy 45: MAD TEMP[4].x, TEMP[0].zzzz, CONST[12].zzzz, CONST[12].wwww 46: MOV TEMP[3].z, TEMP[4].xxxx 47: MOV OUT[2], TEMP[3] 48: MOV OUT[1], TEMP[1] 49: MOV OUT[0], TEMP[0] 50: MOV OUT[3], TEMP[2] 51: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 204) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248) %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 252) %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256) %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260) %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264) %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 268) %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272) %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276) %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280) %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284) %72 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %73 = load <16 x i8>, <16 x i8> addrspace(2)* %72, align 16, !tbaa !0 %74 = add i32 %5, %7 %75 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %73, i32 0, i32 %74) %76 = extractelement <4 x float> %75, i32 0 %77 = extractelement <4 x float> %75, i32 1 %78 = extractelement <4 x float> %75, i32 2 %79 = extractelement <4 x float> %75, i32 3 %80 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %81 = load <16 x i8>, <16 x i8> addrspace(2)* %80, align 16, !tbaa !0 %82 = add i32 %5, %7 %83 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %81, i32 0, i32 %82) %84 = extractelement <4 x float> %83, i32 0 %85 = extractelement <4 x float> %83, i32 1 %86 = extractelement <4 x float> %83, i32 2 %87 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %88 = load <16 x i8>, <16 x i8> addrspace(2)* %87, align 16, !tbaa !0 %89 = add i32 %5, %7 %90 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %88, i32 0, i32 %89) %91 = extractelement <4 x float> %90, i32 0 %92 = extractelement <4 x float> %90, i32 1 %93 = fmul float %56, %76 %94 = fmul float %57, %76 %95 = fmul float %58, %76 %96 = fmul float %59, %76 %97 = fmul float %60, %77 %98 = fadd float %97, %93 %99 = fmul float %61, %77 %100 = fadd float %99, %94 %101 = fmul float %62, %77 %102 = fadd float %101, %95 %103 = fmul float %63, %77 %104 = fadd float %103, %96 %105 = fmul float %64, %78 %106 = fadd float %105, %98 %107 = fmul float %65, %78 %108 = fadd float %107, %100 %109 = fmul float %66, %78 %110 = fadd float %109, %102 %111 = fmul float %67, %78 %112 = fadd float %111, %104 %113 = fmul float %68, %79 %114 = fadd float %113, %106 %115 = fmul float %69, %79 %116 = fadd float %115, %108 %117 = fmul float %70, %79 %118 = fadd float %117, %110 %119 = fmul float %71, %79 %120 = fadd float %119, %112 %121 = fmul float %114, 5.000000e-01 %122 = fmul float %116, 5.000000e-01 %123 = fmul float %120, 5.000000e-01 %124 = fmul float %122, %13 %125 = fadd float %121, %123 %126 = fadd float %124, %123 %127 = fmul float %41, %84 %128 = fmul float %44, %84 %129 = fmul float %47, %84 %130 = fmul float %42, %85 %131 = fadd float %130, %127 %132 = fmul float %45, %85 %133 = fadd float %132, %128 %134 = fmul float %48, %85 %135 = fadd float %134, %129 %136 = fmul float %43, %86 %137 = fadd float %136, %131 %138 = fmul float %46, %86 %139 = fadd float %138, %133 %140 = fmul float %49, %86 %141 = fadd float %140, %135 %142 = fmul float %137, %137 %143 = fmul float %139, %139 %144 = fadd float %143, %142 %145 = fmul float %141, %141 %146 = fadd float %144, %145 %147 = call float @llvm.AMDGPU.rsq.clamped.f32(float %146) %148 = fmul float %137, %147 %149 = fmul float %139, %147 %150 = fmul float %141, %147 %151 = fmul float %14, %148 %152 = fmul float %15, %149 %153 = fadd float %151, %152 %154 = fmul float %16, %150 %155 = fadd float %153, %154 %156 = fadd float %155, %17 %157 = fmul float %18, %148 %158 = fmul float %19, %149 %159 = fadd float %157, %158 %160 = fmul float %20, %150 %161 = fadd float %159, %160 %162 = fadd float %161, %21 %163 = fmul float %22, %148 %164 = fmul float %23, %149 %165 = fadd float %163, %164 %166 = fmul float %24, %150 %167 = fadd float %165, %166 %168 = fadd float %167, %25 %169 = fmul float %148, %149 %170 = fmul float %149, %150 %171 = fmul float %150, %150 %172 = fmul float %150, %148 %173 = fmul float %26, %169 %174 = fmul float %27, %170 %175 = fadd float %173, %174 %176 = fmul float %28, %171 %177 = fadd float %175, %176 %178 = fmul float %29, %172 %179 = fadd float %177, %178 %180 = fmul float %30, %169 %181 = fmul float %31, %170 %182 = fadd float %180, %181 %183 = fmul float %32, %171 %184 = fadd float %182, %183 %185 = fmul float %33, %172 %186 = fadd float %184, %185 %187 = fmul float %34, %169 %188 = fmul float %35, %170 %189 = fadd float %187, %188 %190 = fmul float %36, %171 %191 = fadd float %189, %190 %192 = fmul float %37, %172 %193 = fadd float %191, %192 %194 = fmul float %149, %149 %195 = fmul float %148, %148 %196 = fsub float %195, %194 %197 = fmul float %38, %196 %198 = fadd float %197, %179 %199 = fmul float %39, %196 %200 = fadd float %199, %186 %201 = fmul float %40, %196 %202 = fadd float %201, %193 %203 = fadd float %198, %156 %204 = fadd float %200, %162 %205 = fadd float %202, %168 %206 = fmul float %91, %52 %207 = fadd float %206, %54 %208 = fmul float %92, %53 %209 = fadd float %208, %55 %210 = fmul float %118, %50 %211 = fadd float %210, %51 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %125, float %126, float %118, float %120) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %207, float %209, float %211, float %203) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %204, float %205, float %205, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %114, float %116, float %118, float %120) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[12:15], s[2:3], 0x0 ; C0860300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[16:19], s[8:9], 0x4 ; C0880904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s0, s[12:15], 0x1c ; C2000D1C buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[16:19], 0 idxen ; E00C2000 80040500 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[8:11], v0, s[8:11], 0 idxen ; E00C2000 80020800 s_buffer_load_dword s1, s[12:15], 0x1d ; C2008D1D s_buffer_load_dword s2, s[12:15], 0x1e ; C2010D1E s_buffer_load_dword s5, s[12:15], 0x20 ; C2028D20 s_buffer_load_dword s6, s[12:15], 0x21 ; C2030D21 s_buffer_load_dword s7, s[12:15], 0x22 ; C2038D22 s_buffer_load_dword s8, s[12:15], 0x24 ; C2040D24 s_buffer_load_dword s9, s[12:15], 0x25 ; C2048D25 s_buffer_load_dword s10, s[12:15], 0x26 ; C2050D26 s_buffer_load_dword s11, s[12:15], 0x28 ; C2058D28 s_buffer_load_dword s16, s[12:15], 0x29 ; C2080D29 s_buffer_load_dword s17, s[12:15], 0x2a ; C2088D2A s_buffer_load_dword s3, s[12:15], 0x32 ; C2018D32 s_buffer_load_dword s4, s[12:15], 0x33 ; C2020D33 s_buffer_load_dword s18, s[12:15], 0x34 ; C2090D34 s_buffer_load_dword s19, s[12:15], 0x35 ; C2098D35 s_buffer_load_dword s20, s[12:15], 0x36 ; C20A0D36 s_buffer_load_dword s21, s[12:15], 0x37 ; C20A8D37 s_buffer_load_dword s22, s[12:15], 0x38 ; C20B0D38 s_buffer_load_dword s23, s[12:15], 0x39 ; C20B8D39 s_buffer_load_dword s24, s[12:15], 0x3a ; C20C0D3A s_buffer_load_dword s25, s[12:15], 0x3b ; C20C8D3B s_buffer_load_dword s26, s[12:15], 0x3c ; C20D0D3C s_buffer_load_dword s27, s[12:15], 0x3d ; C20D8D3D s_buffer_load_dword s28, s[12:15], 0x3e ; C20E0D3E s_buffer_load_dword s29, s[12:15], 0x3f ; C20E8D3F s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v0, s20 ; 7E000214 s_buffer_load_dword s20, s[12:15], 0x40 ; C20A0D40 v_mov_b32_e32 v10, s21 ; 7E140215 s_buffer_load_dword s21, s[12:15], 0x41 ; C20A8D41 s_buffer_load_dword s30, s[12:15], 0x42 ; C20F0D42 s_buffer_load_dword s31, s[12:15], 0x43 ; C20F8D43 s_buffer_load_dword s32, s[12:15], 0x44 ; C2100D44 s_buffer_load_dword s33, s[12:15], 0x45 ; C2108D45 s_buffer_load_dword s34, s[12:15], 0x46 ; C2110D46 s_buffer_load_dword s35, s[12:15], 0x47 ; C2118D47 v_mul_f32_e32 v11, s22, v1 ; 10160216 v_mul_f32_e32 v12, s5, v5 ; 10180A05 v_mac_f32_e32 v0, s18, v8 ; 3E001012 v_mac_f32_e32 v10, s19, v9 ; 3E141213 v_mac_f32_e32 v12, s6, v6 ; 3E180C06 v_mul_f32_e32 v8, s8, v5 ; 10100A08 v_mac_f32_e32 v8, s9, v6 ; 3E100C09 v_mul_f32_e32 v5, s11, v5 ; 100A0A0B v_mac_f32_e32 v5, s16, v6 ; 3E0A0C10 v_mac_f32_e32 v12, s7, v7 ; 3E180E07 v_mac_f32_e32 v8, s10, v7 ; 3E100E0A v_mac_f32_e32 v5, s17, v7 ; 3E0A0E11 v_mac_f32_e32 v11, s26, v2 ; 3E16041A v_mul_f32_e32 v6, s23, v1 ; 100C0217 v_mac_f32_e32 v6, s27, v2 ; 3E0C041B v_mul_f32_e32 v7, s24, v1 ; 100E0218 v_mac_f32_e32 v7, s28, v2 ; 3E0E041C v_mul_f32_e32 v1, s25, v1 ; 10020219 v_mac_f32_e32 v1, s29, v2 ; 3E02041D s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v11, s20, v3 ; 3E160614 v_mac_f32_e32 v6, s21, v3 ; 3E0C0615 v_mac_f32_e32 v7, s30, v3 ; 3E0E061E v_mac_f32_e32 v1, s31, v3 ; 3E02061F v_mac_f32_e32 v11, s32, v4 ; 3E160820 v_mac_f32_e32 v6, s33, v4 ; 3E0C0821 v_mac_f32_e32 v7, s34, v4 ; 3E0E0822 v_mac_f32_e32 v1, s35, v4 ; 3E020823 s_buffer_load_dword s5, s[12:15], 0x0 ; C2028D00 s_buffer_load_dword s6, s[12:15], 0x4 ; C2030D04 s_buffer_load_dword s7, s[12:15], 0x5 ; C2038D05 s_buffer_load_dword s8, s[12:15], 0x6 ; C2040D06 s_buffer_load_dword s9, s[12:15], 0x7 ; C2048D07 s_buffer_load_dword s10, s[12:15], 0x8 ; C2050D08 s_buffer_load_dword s11, s[12:15], 0x9 ; C2058D09 s_buffer_load_dword s16, s[12:15], 0xa ; C2080D0A s_buffer_load_dword s17, s[12:15], 0xb ; C2088D0B s_buffer_load_dword s18, s[12:15], 0xc ; C2090D0C s_buffer_load_dword s19, s[12:15], 0xd ; C2098D0D s_buffer_load_dword s20, s[12:15], 0xe ; C20A0D0E s_buffer_load_dword s21, s[12:15], 0xf ; C20A8D0F s_buffer_load_dword s22, s[12:15], 0x10 ; C20B0D10 s_buffer_load_dword s23, s[12:15], 0x11 ; C20B8D11 s_buffer_load_dword s24, s[12:15], 0x12 ; C20C0D12 s_buffer_load_dword s25, s[12:15], 0x13 ; C20C8D13 s_buffer_load_dword s26, s[12:15], 0x14 ; C20D0D14 s_buffer_load_dword s27, s[12:15], 0x15 ; C20D8D15 s_buffer_load_dword s28, s[12:15], 0x16 ; C20E0D16 s_buffer_load_dword s29, s[12:15], 0x17 ; C20E8D17 s_buffer_load_dword s30, s[12:15], 0x18 ; C20F0D18 s_buffer_load_dword s31, s[12:15], 0x19 ; C20F8D19 s_buffer_load_dword s32, s[12:15], 0x1a ; C2100D1A s_buffer_load_dword s12, s[12:15], 0x1b ; C2060D1B v_mul_f32_e32 v2, v12, v12 ; 1004190C v_mac_f32_e32 v2, v8, v8 ; 3E041108 v_mac_f32_e32 v2, v5, v5 ; 3E040B05 v_rsq_clamp_f32_e32 v2, v2 ; 7E045902 v_mul_f32_e32 v3, 0.5, v6 ; 10060CF0 v_mul_f32_e32 v4, 0.5, v1 ; 100802F0 v_mad_f32 v9, 0.5, v11, v4 ; D2820009 041216F0 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v4, s5, v3 ; 3E080605 exp 15, 32, 0, 0, 0, v9, v4, v7, v1 ; F800020F 01070409 v_mul_f32_e32 v3, v2, v8 ; 10061102 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v4, v2, v5 ; 10080B02 v_mul_f32_e32 v5, v4, v3 ; 100A0704 v_mul_f32_e32 v8, s23, v5 ; 10100A17 v_mul_f32_e32 v9, s27, v5 ; 10120A1B v_mul_f32_e32 v5, s31, v5 ; 100A0A1F v_mul_f32_e32 v2, v2, v12 ; 10041902 v_mul_f32_e32 v12, v3, v2 ; 10180503 v_mac_f32_e32 v8, s22, v12 ; 3E101816 v_mac_f32_e32 v9, s26, v12 ; 3E12181A v_mac_f32_e32 v5, s30, v12 ; 3E0A181E v_mul_f32_e32 v12, v4, v4 ; 10180904 v_mac_f32_e32 v8, s24, v12 ; 3E101818 v_mac_f32_e32 v9, s28, v12 ; 3E12181C v_mac_f32_e32 v5, s32, v12 ; 3E0A1820 v_mul_f32_e32 v12, s7, v3 ; 10180607 v_mac_f32_e32 v12, s6, v2 ; 3E180406 v_mul_f32_e32 v13, s11, v3 ; 101A060B v_mac_f32_e32 v13, s10, v2 ; 3E1A040A v_mul_f32_e32 v14, s19, v3 ; 101C0613 v_mac_f32_e32 v14, s18, v2 ; 3E1C0412 v_mac_f32_e32 v12, s8, v4 ; 3E180808 v_mac_f32_e32 v13, s16, v4 ; 3E1A0810 v_mac_f32_e32 v14, s20, v4 ; 3E1C0814 v_mul_f32_e32 v4, v2, v4 ; 10080902 v_mac_f32_e32 v8, s25, v4 ; 3E100819 v_mac_f32_e32 v9, s29, v4 ; 3E12081D v_mac_f32_e32 v5, s12, v4 ; 3E0A080C v_mov_b32_e32 v4, s4 ; 7E080204 v_mac_f32_e32 v4, s3, v7 ; 3E080E03 v_mul_f32_e32 v3, v3, v3 ; 10060703 v_mad_f32 v2, v2, v2, -v3 ; D2820002 840E0502 v_add_f32_e32 v3, s9, v12 ; 06061809 v_mac_f32_e32 v8, s0, v2 ; 3E100400 v_mac_f32_e32 v9, s1, v2 ; 3E120401 v_mac_f32_e32 v5, s2, v2 ; 3E0A0402 v_add_f32_e32 v2, v3, v8 ; 06041103 exp 15, 33, 0, 0, 0, v0, v10, v4, v2 ; F800021F 02040A00 s_waitcnt expcnt(0) ; BF8C070F v_add_f32_e32 v0, s17, v13 ; 06001A11 v_add_f32_e32 v2, s21, v14 ; 06041C15 v_add_f32_e32 v0, v0, v9 ; 06001300 v_add_f32_e32 v2, v2, v5 ; 06040B02 v_mov_b32_e32 v3, 0 ; 7E060280 exp 15, 34, 0, 0, 0, v0, v2, v2, v3 ; F800022F 03020200 exp 15, 12, 0, 1, 0, v11, v6, v7, v1 ; F80008CF 0107060B s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 16 Code Size: 668 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL CONST[0..1] DCL CONST[3] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].x, IN[1].wwww 1: MOV TEMP[0].yz, IN[2].yxyy 2: MOV TEMP[1].xy, IN[1].xyyy 3: TEX TEMP[1], TEMP[1], SAMP[0], 2D 4: MOV TEMP[2].xy, IN[0].xyyy 5: MOV TEMP[2].w, IN[0].wwww 6: TXP TEMP[2], TEMP[2], SAMP[1], 2D 7: ADD TEMP[0].xyz, TEMP[2].xyzz, TEMP[0].xyzz 8: MUL TEMP[3].xyz, TEMP[0].xyzz, CONST[1].xyzz 9: MUL TEMP[2].x, TEMP[2].wwww, TEMP[1].wwww 10: MUL TEMP[1].xyz, TEMP[1].xyzz, CONST[3].xyzz 11: MUL TEMP[0].xyz, TEMP[1].xyzz, TEMP[0].xyzz 12: MAD TEMP[0].xyz, TEMP[3].xyzz, TEMP[2].xxxx, TEMP[0].xyzz 13: MOV_SAT TEMP[1].x, IN[1].zzzz 14: LRP TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[0].xyzz 15: MOV TEMP[0].w, IMM[0].xxxx 16: MOV OUT[0], TEMP[0] 17: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %33 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %34 = load <32 x i8>, <32 x i8> addrspace(2)* %33, align 32, !tbaa !0 %35 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0 %37 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %38 = bitcast <8 x i32> addrspace(2)* %37 to <32 x i8> addrspace(2)* %39 = load <32 x i8>, <32 x i8> addrspace(2)* %38, align 32, !tbaa !0 %40 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %41 = bitcast <4 x i32> addrspace(2)* %40 to <16 x i8> addrspace(2)* %42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0 %43 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %44 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %45 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %46 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %47 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %48 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %49 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %50 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %51 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %52 = bitcast float %46 to i32 %53 = bitcast float %47 to i32 %54 = insertelement <2 x i32> undef, i32 %52, i32 0 %55 = insertelement <2 x i32> %54, i32 %53, i32 1 %56 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %55, <32 x i8> %34, <16 x i8> %36, i32 2) %57 = extractelement <4 x float> %56, i32 0 %58 = extractelement <4 x float> %56, i32 1 %59 = extractelement <4 x float> %56, i32 2 %60 = extractelement <4 x float> %56, i32 3 %61 = fdiv float %43, %45 %62 = fdiv float %44, %45 %63 = bitcast float %61 to i32 %64 = bitcast float %62 to i32 %65 = insertelement <2 x i32> undef, i32 %63, i32 0 %66 = insertelement <2 x i32> %65, i32 %64, i32 1 %67 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %66, <32 x i8> %39, <16 x i8> %42, i32 2) %68 = extractelement <4 x float> %67, i32 0 %69 = extractelement <4 x float> %67, i32 1 %70 = extractelement <4 x float> %67, i32 2 %71 = extractelement <4 x float> %67, i32 3 %72 = fadd float %68, %49 %73 = fadd float %69, %50 %74 = fadd float %70, %51 %75 = fmul float %72, %27 %76 = fmul float %73, %28 %77 = fmul float %74, %29 %78 = fmul float %71, %60 %79 = fmul float %57, %30 %80 = fmul float %58, %31 %81 = fmul float %59, %32 %82 = fmul float %79, %72 %83 = fmul float %80, %73 %84 = fmul float %81, %74 %85 = fmul float %75, %78 %86 = fadd float %85, %82 %87 = fmul float %76, %78 %88 = fadd float %87, %83 %89 = fmul float %77, %78 %90 = fadd float %89, %84 %91 = call float @llvm.AMDIL.clamp.(float %48, float 0.000000e+00, float 1.000000e+00) %92 = call float @llvm.AMDGPU.lrp(float %91, float %86, float %24) %93 = call float @llvm.AMDGPU.lrp(float %91, float %88, float %25) %94 = call float @llvm.AMDGPU.lrp(float %91, float %90, float %26) %95 = call i32 @llvm.SI.packf16(float %92, float %93) %96 = bitcast i32 %95 to float %97 = call i32 @llvm.SI.packf16(float %94, float 1.000000e+00) %98 = bitcast i32 %97 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %96, float %98, float %96, float %98) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500 s_load_dwordx4 s[8:11], s[4:5], 0x4 ; C0840504 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 3, 0, [m0] ; C8100300 v_interp_p2_f32 v4, [v4], v1, 3, 0, [m0] ; C8110301 v_mov_b32_e32 v5, 0x6f800000 ; 7E0A02FF 6F800000 v_cmp_gt_f32_e64 vcc, |v4|, v5 ; D008016A 00020B04 v_mov_b32_e32 v5, 0x2f800000 ; 7E0A02FF 2F800000 v_cndmask_b32_e32 v5, 1.0, v5 ; 000A0AF2 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600 v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601 s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700 v_interp_p1_f32 v9, v0, 3, 1, [m0] ; C8240700 v_interp_p2_f32 v9, [v9], v1, 3, 1, [m0] ; C8250701 v_interp_p1_f32 v10, v0, 0, 2, [m0] ; C8280800 v_interp_p2_f32 v10, [v10], v1, 0, 2, [m0] ; C8290801 v_mul_f32_e32 v4, v5, v4 ; 10080905 s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708 v_rcp_f32_e32 v4, v4 ; 7E085504 v_interp_p1_f32 v0, v0, 1, 2, [m0] ; C8000900 v_interp_p2_f32 v0, [v0], v1, 1, 2, [m0] ; C8010901 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[11:14], 15, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[16:23], s[12:15] ; F0800F00 00640B06 v_mul_f32_e32 v1, v4, v2 ; 10020504 v_mul_f32_e32 v2, v4, v3 ; 10040704 v_mul_f32_e32 v3, v1, v5 ; 10060B01 v_mul_f32_e32 v4, v2, v5 ; 10080B02 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C image_sample v[1:4], 15, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[24:31], s[8:11] ; F0800F00 00460103 s_buffer_load_dword s5, s[0:3], 0xd ; C202810D s_buffer_load_dword s6, s[0:3], 0xe ; C203010E s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_add_f32_e32 v1, v9, v1 ; 06020309 v_add_f32_e32 v2, v10, v2 ; 0604050A v_add_f32_e32 v0, v0, v3 ; 06000700 s_buffer_load_dword s7, s[0:3], 0x4 ; C2038104 v_mul_f32_e32 v3, v14, v4 ; 1006090E s_buffer_load_dword s8, s[0:3], 0x5 ; C2040105 v_mul_f32_e32 v4, s4, v11 ; 10081604 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 v_mul_f32_e32 v5, s5, v12 ; 100A1805 v_mul_f32_e32 v6, s6, v13 ; 100C1A06 s_buffer_load_dword s5, s[0:3], 0x0 ; C2028100 s_buffer_load_dword s6, s[0:3], 0x1 ; C2030101 s_buffer_load_dword s0, s[0:3], 0x2 ; C2000102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s7, v1 ; 100E0207 v_mul_f32_e32 v1, v1, v4 ; 10020901 v_mul_f32_e32 v4, s8, v2 ; 10080408 v_mul_f32_e32 v2, v2, v5 ; 10040B02 v_mul_f32_e32 v5, s4, v0 ; 100A0004 v_mul_f32_e32 v0, v0, v6 ; 10000D00 v_mac_f32_e32 v1, v3, v7 ; 3E020F03 v_mac_f32_e32 v2, v3, v4 ; 3E040903 v_mac_f32_e32 v0, v3, v5 ; 3E000B03 v_add_f32_e64 v3, 0, v8 clamp ; D2060803 00021080 v_sub_f32_e32 v4, 1.0, v3 ; 080806F2 v_mul_f32_e32 v5, s5, v4 ; 100A0805 v_mac_f32_e32 v5, v1, v3 ; 3E0A0701 v_mul_f32_e32 v1, s6, v4 ; 10020806 v_mac_f32_e32 v1, v2, v3 ; 3E020702 v_mul_f32_e32 v2, s0, v4 ; 10040800 v_mac_f32_e32 v2, v0, v3 ; 3E040700 v_cvt_pkrtz_f16_f32_e32 v0, v5, v1 ; 5E000305 v_cvt_pkrtz_f16_f32_e64 v1, v2, 1.0 ; D25E0001 0001E502 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 16 Code Size: 340 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL CONST[0..3] DCL TEMP[0..1], LOCAL 0: MUL TEMP[0], CONST[0], IN[0].xxxx 1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0] 4: MOV TEMP[1].xyz, IN[1].xyzx 5: MOV OUT[1], TEMP[1] 6: MOV OUT[0], TEMP[0] 7: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = fmul float %13, %33 %45 = fmul float %14, %33 %46 = fmul float %15, %33 %47 = fmul float %16, %33 %48 = fmul float %17, %34 %49 = fadd float %48, %44 %50 = fmul float %18, %34 %51 = fadd float %50, %45 %52 = fmul float %19, %34 %53 = fadd float %52, %46 %54 = fmul float %20, %34 %55 = fadd float %54, %47 %56 = fmul float %21, %35 %57 = fadd float %56, %49 %58 = fmul float %22, %35 %59 = fadd float %58, %51 %60 = fmul float %23, %35 %61 = fadd float %60, %53 %62 = fmul float %24, %35 %63 = fadd float %62, %55 %64 = fmul float %25, %36 %65 = fadd float %64, %57 %66 = fmul float %26, %36 %67 = fadd float %66, %59 %68 = fmul float %27, %36 %69 = fadd float %68, %61 %70 = fmul float %28, %36 %71 = fadd float %70, %63 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float %43, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %65, float %67, float %69, float %71) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_mov_b32_e32 v1, 0 ; 7E020280 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[0:3], 0x0 ; C2060100 s_buffer_load_dword s13, s[0:3], 0x1 ; C2068101 buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[8:11], 0 idxen ; E00C2000 80020600 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_buffer_load_dword s5, s[0:3], 0x3 ; C2028103 s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104 s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105 s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106 s_buffer_load_dword s9, s[0:3], 0x7 ; C2048107 s_buffer_load_dword s10, s[0:3], 0x8 ; C2050108 s_buffer_load_dword s11, s[0:3], 0x9 ; C2058109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 v_mul_f32_e32 v0, s12, v2 ; 1000040C s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v0, s6, v3 ; 3E000606 v_mul_f32_e32 v9, s13, v2 ; 1012040D v_mac_f32_e32 v9, s7, v3 ; 3E120607 v_mul_f32_e32 v10, s4, v2 ; 10140404 v_mac_f32_e32 v10, s8, v3 ; 3E140608 v_mul_f32_e32 v2, s5, v2 ; 10040405 v_mac_f32_e32 v2, s9, v3 ; 3E040609 v_mac_f32_e32 v0, s10, v4 ; 3E00080A v_mac_f32_e32 v9, s11, v4 ; 3E12080B v_mac_f32_e32 v10, s14, v4 ; 3E14080E v_mac_f32_e32 v2, s15, v4 ; 3E04080F v_mac_f32_e32 v0, s16, v5 ; 3E000A10 v_mac_f32_e32 v9, s17, v5 ; 3E120A11 v_mac_f32_e32 v10, s18, v5 ; 3E140A12 v_mac_f32_e32 v2, s0, v5 ; 3E040A00 exp 15, 32, 0, 0, 0, v6, v7, v8, v1 ; F800020F 01080706 exp 15, 12, 0, 1, 0, v0, v9, v10, v2 ; F80008CF 020A0900 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 196 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[0] DCL TEMP[0], LOCAL 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MUL TEMP[0], TEMP[0], CONST[0] 3: MOV OUT[0], TEMP[0] 4: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %28 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %29 = load <32 x i8>, <32 x i8> addrspace(2)* %28, align 32, !tbaa !0 %30 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !tbaa !0 %32 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %34 = bitcast float %32 to i32 %35 = bitcast float %33 to i32 %36 = insertelement <2 x i32> undef, i32 %34, i32 0 %37 = insertelement <2 x i32> %36, i32 %35, i32 1 %38 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %37, <32 x i8> %29, <16 x i8> %31, i32 2) %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = extractelement <4 x float> %38, i32 2 %42 = extractelement <4 x float> %38, i32 3 %43 = fmul float %39, %24 %44 = fmul float %40, %25 %45 = fmul float %41, %26 %46 = fmul float %42, %27 %47 = call i32 @llvm.SI.packf16(float %43, float %44) %48 = bitcast i32 %47 to float %49 = call i32 @llvm.SI.packf16(float %45, float %46) %50 = bitcast i32 %49 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %48, float %50, float %48, float %50) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s6, s[0:3], 0x0 ; C2030100 s_load_dwordx4 s[20:23], s[4:5], 0x0 ; C08A0500 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_buffer_load_dword s5, s[0:3], 0x2 ; C2028102 s_buffer_load_dword s0, s[0:3], 0x3 ; C2000103 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[20:23] ; F0800F00 00A30002 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, s6, v0 ; 10000006 v_mul_f32_e32 v1, s4, v1 ; 10020204 v_mul_f32_e32 v2, s5, v2 ; 10040405 v_mul_f32_e32 v3, s0, v3 ; 10060600 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 4 Code Size: 108 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL CONST[0..3] DCL TEMP[0..1], LOCAL 0: MUL TEMP[0], CONST[0], IN[0].xxxx 1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0] 4: MOV TEMP[1].xyz, IN[1].xyzx 5: MOV OUT[1], TEMP[1] 6: MOV OUT[0], TEMP[0] 7: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = fmul float %13, %33 %45 = fmul float %14, %33 %46 = fmul float %15, %33 %47 = fmul float %16, %33 %48 = fmul float %17, %34 %49 = fadd float %48, %44 %50 = fmul float %18, %34 %51 = fadd float %50, %45 %52 = fmul float %19, %34 %53 = fadd float %52, %46 %54 = fmul float %20, %34 %55 = fadd float %54, %47 %56 = fmul float %21, %35 %57 = fadd float %56, %49 %58 = fmul float %22, %35 %59 = fadd float %58, %51 %60 = fmul float %23, %35 %61 = fadd float %60, %53 %62 = fmul float %24, %35 %63 = fadd float %62, %55 %64 = fmul float %25, %36 %65 = fadd float %64, %57 %66 = fmul float %26, %36 %67 = fadd float %66, %59 %68 = fmul float %27, %36 %69 = fadd float %68, %61 %70 = fmul float %28, %36 %71 = fadd float %70, %63 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float %43, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %65, float %67, float %69, float %71) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_mov_b32_e32 v1, 0 ; 7E020280 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[0:3], 0x0 ; C2060100 s_buffer_load_dword s13, s[0:3], 0x1 ; C2068101 buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[8:11], 0 idxen ; E00C2000 80020600 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_buffer_load_dword s5, s[0:3], 0x3 ; C2028103 s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104 s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105 s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106 s_buffer_load_dword s9, s[0:3], 0x7 ; C2048107 s_buffer_load_dword s10, s[0:3], 0x8 ; C2050108 s_buffer_load_dword s11, s[0:3], 0x9 ; C2058109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 v_mul_f32_e32 v0, s12, v2 ; 1000040C s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v0, s6, v3 ; 3E000606 v_mul_f32_e32 v9, s13, v2 ; 1012040D v_mac_f32_e32 v9, s7, v3 ; 3E120607 v_mul_f32_e32 v10, s4, v2 ; 10140404 v_mac_f32_e32 v10, s8, v3 ; 3E140608 v_mul_f32_e32 v2, s5, v2 ; 10040405 v_mac_f32_e32 v2, s9, v3 ; 3E040609 v_mac_f32_e32 v0, s10, v4 ; 3E00080A v_mac_f32_e32 v9, s11, v4 ; 3E12080B v_mac_f32_e32 v10, s14, v4 ; 3E14080E v_mac_f32_e32 v2, s15, v4 ; 3E04080F v_mac_f32_e32 v0, s16, v5 ; 3E000A10 v_mac_f32_e32 v9, s17, v5 ; 3E120A11 v_mac_f32_e32 v10, s18, v5 ; 3E140A12 v_mac_f32_e32 v2, s0, v5 ; 3E040A00 exp 15, 32, 0, 0, 0, v6, v7, v8, v1 ; F800020F 01080706 exp 15, 12, 0, 1, 0, v0, v9, v10, v2 ; F80008CF 020A0900 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 196 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[0..1] DCL TEMP[0], LOCAL 0: MUL TEMP[0].xy, IN[0].xyyy, CONST[1].xxxx 1: MOV TEMP[0].xy, TEMP[0].xyyy 2: TEX TEMP[0], TEMP[0], SAMP[0], 2D 3: MUL TEMP[0], TEMP[0], CONST[0] 4: MOV OUT[0], TEMP[0] 5: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %29 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %30 = load <32 x i8>, <32 x i8> addrspace(2)* %29, align 32, !tbaa !0 %31 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %32 = load <16 x i8>, <16 x i8> addrspace(2)* %31, align 16, !tbaa !0 %33 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %35 = fmul float %33, %28 %36 = fmul float %34, %28 %37 = bitcast float %35 to i32 %38 = bitcast float %36 to i32 %39 = insertelement <2 x i32> undef, i32 %37, i32 0 %40 = insertelement <2 x i32> %39, i32 %38, i32 1 %41 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %40, <32 x i8> %30, <16 x i8> %32, i32 2) %42 = extractelement <4 x float> %41, i32 0 %43 = extractelement <4 x float> %41, i32 1 %44 = extractelement <4 x float> %41, i32 2 %45 = extractelement <4 x float> %41, i32 3 %46 = fmul float %42, %24 %47 = fmul float %43, %25 %48 = fmul float %44, %26 %49 = fmul float %45, %27 %50 = call i32 @llvm.SI.packf16(float %46, float %47) %51 = bitcast i32 %50 to float %52 = call i32 @llvm.SI.packf16(float %48, float %49) %53 = bitcast i32 %52 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %51, float %53, float %51, float %53) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500 s_mov_b32 m0, s9 ; BEFC0309 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_buffer_load_dword s5, s[0:3], 0x0 ; C2028100 s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700 s_buffer_load_dword s6, s[0:3], 0x1 ; C2030101 s_buffer_load_dword s7, s[0:3], 0x2 ; C2038102 s_buffer_load_dword s0, s[0:3], 0x3 ; C2000103 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v0, v0, 1, 0, [m0] ; C8000100 v_interp_p2_f32 v0, [v0], v1, 1, 0, [m0] ; C8010101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s4, v2 ; 10020404 v_mul_f32_e32 v2, s4, v0 ; 10040004 image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[1:2], s[16:23], s[12:15] ; F0800F00 00640001 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, s5, v0 ; 10000005 v_mul_f32_e32 v1, s6, v1 ; 10020206 v_mul_f32_e32 v2, s7, v2 ; 10040407 v_mul_f32_e32 v3, s0, v3 ; 10060600 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 4 Code Size: 120 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL OUT[5], GENERIC[4] DCL CONST[0..19] DCL TEMP[0..8], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MUL TEMP[0], CONST[16], IN[0].xxxx 1: MAD TEMP[0], CONST[17], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[18], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[19], IN[0].wwww, TEMP[0] 4: MUL TEMP[1], CONST[7], IN[0].xxxx 5: MAD TEMP[1], CONST[8], IN[0].yyyy, TEMP[1] 6: MAD TEMP[1], CONST[9], IN[0].zzzz, TEMP[1] 7: MAD TEMP[1].xyz, CONST[10], IN[0].wwww, TEMP[1] 8: MOV TEMP[2].x, CONST[11].xxxx 9: MOV TEMP[2].y, CONST[12].xxxx 10: MOV TEMP[2].z, CONST[13].xxxx 11: MOV TEMP[3].x, CONST[11].yyyy 12: MOV TEMP[3].y, CONST[12].yyyy 13: MOV TEMP[3].z, CONST[13].yyyy 14: MOV TEMP[4].x, CONST[11].zzzz 15: MOV TEMP[4].y, CONST[12].zzzz 16: MOV TEMP[4].z, CONST[13].zzzz 17: MUL TEMP[2].xyz, TEMP[2].xyzz, IN[1].xxxx 18: MAD TEMP[2].xyz, TEMP[3].xyzz, IN[1].yyyy, TEMP[2].xyzz 19: MAD TEMP[2].xyz, TEMP[4].xyzz, IN[1].zzzz, TEMP[2].xyzz 20: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz 21: RSQ TEMP[3].x, TEMP[3].xxxx 22: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx 23: MUL TEMP[3].xyz, CONST[7].xyzz, IN[3].xxxx 24: MAD TEMP[3].xyz, CONST[8].xyzz, IN[3].yyyy, TEMP[3].xyzz 25: MAD TEMP[3].xyz, CONST[9].xyzz, IN[3].zzzz, TEMP[3].xyzz 26: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz 27: RSQ TEMP[4].x, TEMP[4].xxxx 28: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx 29: MUL TEMP[4].xyz, TEMP[2].zxyy, TEMP[3].yzxx 30: MAD TEMP[4].xyz, TEMP[2].yzxx, TEMP[3].zxyy, -TEMP[4].xyzz 31: MUL TEMP[4].xyz, TEMP[4].xyzz, IN[3].wwww 32: MOV TEMP[5].x, TEMP[3].xxxx 33: MOV TEMP[5].y, TEMP[4].xxxx 34: MOV TEMP[5].z, TEMP[2].xxxx 35: MOV TEMP[5].w, TEMP[1].xxxx 36: MOV TEMP[6].x, TEMP[3].yyyy 37: MOV TEMP[6].y, TEMP[4].yyyy 38: MOV TEMP[6].z, TEMP[2].yyyy 39: MOV TEMP[6].w, TEMP[1].yyyy 40: MOV TEMP[3].x, TEMP[3].zzzz 41: MOV TEMP[3].y, TEMP[4].zzzz 42: MOV TEMP[3].z, TEMP[2].zzzz 43: MOV TEMP[3].w, TEMP[1].zzzz 44: MOV TEMP[1].w, IMM[0].xxxx 45: MOV TEMP[1].xyz, TEMP[2].xyzx 46: DP4 TEMP[4].x, CONST[0], TEMP[1] 47: DP4 TEMP[7].x, CONST[1], TEMP[1] 48: MOV TEMP[4].y, TEMP[7].xxxx 49: DP4 TEMP[1].x, CONST[2], TEMP[1] 50: MOV TEMP[4].z, TEMP[1].xxxx 51: MUL TEMP[1], TEMP[2].xyzz, TEMP[2].yzzx 52: DP4 TEMP[7].x, CONST[3], TEMP[1] 53: DP4 TEMP[8].x, CONST[4], TEMP[1] 54: MOV TEMP[7].y, TEMP[8].xxxx 55: DP4 TEMP[1].x, CONST[5], TEMP[1] 56: MOV TEMP[7].z, TEMP[1].xxxx 57: MUL TEMP[1].x, TEMP[2].yyyy, TEMP[2].yyyy 58: MAD TEMP[1].x, TEMP[2].xxxx, TEMP[2].xxxx, -TEMP[1].xxxx 59: MAD TEMP[1].xyz, CONST[6].xyzz, TEMP[1].xxxx, TEMP[7].xyzz 60: ADD TEMP[1].xyz, TEMP[1].xyzz, TEMP[4].xyzz 61: MAD TEMP[2].xy, IN[2].xyyy, CONST[15].xyyy, CONST[15].zwww 62: MOV TEMP[2].w, TEMP[1].xxxx 63: MOV TEMP[1].xy, TEMP[1].yzyy 64: MOV TEMP[2].z, TEMP[0].zzzz 65: MOV OUT[4], TEMP[2] 66: MOV OUT[1], TEMP[5] 67: MOV OUT[2], TEMP[6] 68: MOV OUT[3], TEMP[3] 69: MOV OUT[0], TEMP[0] 70: MOV OUT[5], TEMP[1] 71: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244) %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248) %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 252) %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256) %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260) %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264) %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 268) %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272) %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276) %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280) %72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284) %73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288) %74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292) %75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296) %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300) %77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304) %78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308) %79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312) %80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316) %81 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %82 = load <16 x i8>, <16 x i8> addrspace(2)* %81, align 16, !tbaa !0 %83 = add i32 %5, %7 %84 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %82, i32 0, i32 %83) %85 = extractelement <4 x float> %84, i32 0 %86 = extractelement <4 x float> %84, i32 1 %87 = extractelement <4 x float> %84, i32 2 %88 = extractelement <4 x float> %84, i32 3 %89 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %90 = load <16 x i8>, <16 x i8> addrspace(2)* %89, align 16, !tbaa !0 %91 = add i32 %5, %7 %92 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %90, i32 0, i32 %91) %93 = extractelement <4 x float> %92, i32 0 %94 = extractelement <4 x float> %92, i32 1 %95 = extractelement <4 x float> %92, i32 2 %96 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %97 = load <16 x i8>, <16 x i8> addrspace(2)* %96, align 16, !tbaa !0 %98 = add i32 %5, %7 %99 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %97, i32 0, i32 %98) %100 = extractelement <4 x float> %99, i32 0 %101 = extractelement <4 x float> %99, i32 1 %102 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %103 = load <16 x i8>, <16 x i8> addrspace(2)* %102, align 16, !tbaa !0 %104 = add i32 %5, %7 %105 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %103, i32 0, i32 %104) %106 = extractelement <4 x float> %105, i32 0 %107 = extractelement <4 x float> %105, i32 1 %108 = extractelement <4 x float> %105, i32 2 %109 = extractelement <4 x float> %105, i32 3 %110 = fmul float %65, %85 %111 = fmul float %66, %85 %112 = fmul float %67, %85 %113 = fmul float %68, %85 %114 = fmul float %69, %86 %115 = fadd float %114, %110 %116 = fmul float %70, %86 %117 = fadd float %116, %111 %118 = fmul float %71, %86 %119 = fadd float %118, %112 %120 = fmul float %72, %86 %121 = fadd float %120, %113 %122 = fmul float %73, %87 %123 = fadd float %122, %115 %124 = fmul float %74, %87 %125 = fadd float %124, %117 %126 = fmul float %75, %87 %127 = fadd float %126, %119 %128 = fmul float %76, %87 %129 = fadd float %128, %121 %130 = fmul float %77, %88 %131 = fadd float %130, %123 %132 = fmul float %78, %88 %133 = fadd float %132, %125 %134 = fmul float %79, %88 %135 = fadd float %134, %127 %136 = fmul float %80, %88 %137 = fadd float %136, %129 %138 = fmul float %40, %85 %139 = fmul float %41, %85 %140 = fmul float %42, %85 %141 = fmul float %43, %86 %142 = fadd float %141, %138 %143 = fmul float %44, %86 %144 = fadd float %143, %139 %145 = fmul float %45, %86 %146 = fadd float %145, %140 %147 = fmul float %46, %87 %148 = fadd float %147, %142 %149 = fmul float %47, %87 %150 = fadd float %149, %144 %151 = fmul float %48, %87 %152 = fadd float %151, %146 %153 = fmul float %49, %88 %154 = fadd float %153, %148 %155 = fmul float %50, %88 %156 = fadd float %155, %150 %157 = fmul float %51, %88 %158 = fadd float %157, %152 %159 = fmul float %52, %93 %160 = fmul float %55, %93 %161 = fmul float %58, %93 %162 = fmul float %53, %94 %163 = fadd float %162, %159 %164 = fmul float %56, %94 %165 = fadd float %164, %160 %166 = fmul float %59, %94 %167 = fadd float %166, %161 %168 = fmul float %54, %95 %169 = fadd float %168, %163 %170 = fmul float %57, %95 %171 = fadd float %170, %165 %172 = fmul float %60, %95 %173 = fadd float %172, %167 %174 = fmul float %169, %169 %175 = fmul float %171, %171 %176 = fadd float %175, %174 %177 = fmul float %173, %173 %178 = fadd float %176, %177 %179 = call float @llvm.AMDGPU.rsq.clamped.f32(float %178) %180 = fmul float %169, %179 %181 = fmul float %171, %179 %182 = fmul float %173, %179 %183 = fmul float %40, %106 %184 = fmul float %41, %106 %185 = fmul float %42, %106 %186 = fmul float %43, %107 %187 = fadd float %186, %183 %188 = fmul float %44, %107 %189 = fadd float %188, %184 %190 = fmul float %45, %107 %191 = fadd float %190, %185 %192 = fmul float %46, %108 %193 = fadd float %192, %187 %194 = fmul float %47, %108 %195 = fadd float %194, %189 %196 = fmul float %48, %108 %197 = fadd float %196, %191 %198 = fmul float %193, %193 %199 = fmul float %195, %195 %200 = fadd float %199, %198 %201 = fmul float %197, %197 %202 = fadd float %200, %201 %203 = call float @llvm.AMDGPU.rsq.clamped.f32(float %202) %204 = fmul float %193, %203 %205 = fmul float %195, %203 %206 = fmul float %197, %203 %207 = fmul float %182, %205 %208 = fmul float %180, %206 %209 = fmul float %181, %204 %210 = fmul float %181, %206 %211 = fsub float %210, %207 %212 = fmul float %182, %204 %213 = fsub float %212, %208 %214 = fmul float %180, %205 %215 = fsub float %214, %209 %216 = fmul float %211, %109 %217 = fmul float %213, %109 %218 = fmul float %215, %109 %219 = fmul float %13, %180 %220 = fmul float %14, %181 %221 = fadd float %219, %220 %222 = fmul float %15, %182 %223 = fadd float %221, %222 %224 = fadd float %223, %16 %225 = fmul float %17, %180 %226 = fmul float %18, %181 %227 = fadd float %225, %226 %228 = fmul float %19, %182 %229 = fadd float %227, %228 %230 = fadd float %229, %20 %231 = fmul float %21, %180 %232 = fmul float %22, %181 %233 = fadd float %231, %232 %234 = fmul float %23, %182 %235 = fadd float %233, %234 %236 = fadd float %235, %24 %237 = fmul float %180, %181 %238 = fmul float %181, %182 %239 = fmul float %182, %182 %240 = fmul float %182, %180 %241 = fmul float %25, %237 %242 = fmul float %26, %238 %243 = fadd float %241, %242 %244 = fmul float %27, %239 %245 = fadd float %243, %244 %246 = fmul float %28, %240 %247 = fadd float %245, %246 %248 = fmul float %29, %237 %249 = fmul float %30, %238 %250 = fadd float %248, %249 %251 = fmul float %31, %239 %252 = fadd float %250, %251 %253 = fmul float %32, %240 %254 = fadd float %252, %253 %255 = fmul float %33, %237 %256 = fmul float %34, %238 %257 = fadd float %255, %256 %258 = fmul float %35, %239 %259 = fadd float %257, %258 %260 = fmul float %36, %240 %261 = fadd float %259, %260 %262 = fmul float %181, %181 %263 = fmul float %180, %180 %264 = fsub float %263, %262 %265 = fmul float %37, %264 %266 = fadd float %265, %247 %267 = fmul float %38, %264 %268 = fadd float %267, %254 %269 = fmul float %39, %264 %270 = fadd float %269, %261 %271 = fadd float %266, %224 %272 = fadd float %268, %230 %273 = fadd float %270, %236 %274 = fmul float %100, %61 %275 = fadd float %274, %63 %276 = fmul float %101, %62 %277 = fadd float %276, %64 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %204, float %216, float %180, float %154) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %205, float %217, float %181, float %156) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %206, float %218, float %182, float %158) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %275, float %277, float %135, float %271) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %272, float %273, float %273, float %240) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %131, float %133, float %135, float %137) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[8:11], s[8:9], 0xc ; C084090C s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[8:11], v0, s[16:19], 0 idxen ; E00C2000 80040800 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[10:13], v0, s[8:11], 0 idxen ; E00C2000 80020A00 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x3e ; C202013E s_buffer_load_dword s5, s[0:3], 0x3f ; C202813F s_buffer_load_dword s6, s[0:3], 0x3c ; C203013C s_buffer_load_dword s7, s[0:3], 0x3d ; C203813D s_buffer_load_dword s8, s[0:3], 0x2c ; C204012C s_buffer_load_dword s9, s[0:3], 0x40 ; C2048140 s_buffer_load_dword s10, s[0:3], 0x41 ; C2050141 s_buffer_load_dword s11, s[0:3], 0x42 ; C2058142 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s4 ; 7E000204 s_buffer_load_dword s4, s[0:3], 0x2d ; C202012D v_mac_f32_e32 v0, s6, v8 ; 3E001006 v_mov_b32_e32 v8, s5 ; 7E100205 s_buffer_load_dword s5, s[0:3], 0x30 ; C2028130 s_buffer_load_dword s6, s[0:3], 0x31 ; C2030131 v_mac_f32_e32 v8, s7, v9 ; 3E101207 s_buffer_load_dword s7, s[0:3], 0x34 ; C2038134 v_mul_f32_e32 v9, s8, v5 ; 10120A08 s_buffer_load_dword s8, s[0:3], 0x35 ; C2040135 s_buffer_load_dword s12, s[0:3], 0x2e ; C206012E s_buffer_load_dword s13, s[0:3], 0x32 ; C2068132 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v9, s4, v6 ; 3E120C04 s_buffer_load_dword s4, s[0:3], 0x36 ; C2020136 v_mul_f32_e32 v14, s5, v5 ; 101C0A05 v_mac_f32_e32 v14, s6, v6 ; 3E1C0C06 s_buffer_load_dword s5, s[0:3], 0x44 ; C2028144 v_mul_f32_e32 v5, s7, v5 ; 100A0A07 s_buffer_load_dword s6, s[0:3], 0x45 ; C2030145 v_mac_f32_e32 v5, s8, v6 ; 3E0A0C08 v_mac_f32_e32 v9, s12, v7 ; 3E120E0C s_buffer_load_dword s7, s[0:3], 0x46 ; C2038146 v_mac_f32_e32 v14, s13, v7 ; 3E1C0E0D s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v5, s4, v7 ; 3E0A0E04 v_mul_f32_e32 v6, s9, v1 ; 100C0209 s_buffer_load_dword s4, s[0:3], 0x47 ; C2020147 v_mac_f32_e32 v6, s5, v2 ; 3E0C0405 v_mul_f32_e32 v7, s10, v1 ; 100E020A v_mac_f32_e32 v7, s6, v2 ; 3E0E0406 v_mul_f32_e32 v15, s11, v1 ; 101E020B s_buffer_load_dword s5, s[0:3], 0x43 ; C2028143 v_mac_f32_e32 v15, s7, v2 ; 3E1E0407 s_buffer_load_dword s6, s[0:3], 0x48 ; C2030148 s_buffer_load_dword s7, s[0:3], 0x49 ; C2038149 s_buffer_load_dword s8, s[0:3], 0x4a ; C204014A s_buffer_load_dword s9, s[0:3], 0x4b ; C204814B s_buffer_load_dword s10, s[0:3], 0x4c ; C205014C s_buffer_load_dword s11, s[0:3], 0x4d ; C205814D s_buffer_load_dword s12, s[0:3], 0x4e ; C206014E s_buffer_load_dword s13, s[0:3], 0x4f ; C206814F s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v16, s5, v1 ; 10200205 v_mac_f32_e32 v16, s4, v2 ; 3E200404 v_mac_f32_e32 v6, s6, v3 ; 3E0C0606 v_mac_f32_e32 v7, s7, v3 ; 3E0E0607 v_mac_f32_e32 v15, s8, v3 ; 3E1E0608 v_mac_f32_e32 v16, s9, v3 ; 3E200609 v_mac_f32_e32 v6, s10, v4 ; 3E0C080A v_mac_f32_e32 v7, s11, v4 ; 3E0E080B v_mac_f32_e32 v15, s12, v4 ; 3E1E080C v_mac_f32_e32 v16, s13, v4 ; 3E20080D s_buffer_load_dword s4, s[0:3], 0x1c ; C202011C s_buffer_load_dword s5, s[0:3], 0x20 ; C2028120 s_buffer_load_dword s6, s[0:3], 0x21 ; C2030121 s_buffer_load_dword s7, s[0:3], 0x22 ; C2038122 s_buffer_load_dword s8, s[0:3], 0x24 ; C2040124 s_buffer_load_dword s9, s[0:3], 0x28 ; C2048128 s_buffer_load_dword s10, s[0:3], 0x1d ; C205011D s_buffer_load_dword s11, s[0:3], 0x1e ; C205811E s_buffer_load_dword s12, s[0:3], 0x25 ; C2060125 s_buffer_load_dword s13, s[0:3], 0x29 ; C2068129 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v17, s4, v1 ; 10220204 v_mac_f32_e32 v17, s5, v2 ; 3E220405 s_buffer_load_dword s14, s[0:3], 0x26 ; C2070126 s_buffer_load_dword s15, s[0:3], 0x2a ; C207812A v_mac_f32_e32 v17, s8, v3 ; 3E220608 v_mac_f32_e32 v17, s9, v4 ; 3E220809 v_mul_f32_e32 v18, s10, v1 ; 1024020A v_mac_f32_e32 v18, s6, v2 ; 3E240406 v_mac_f32_e32 v18, s12, v3 ; 3E24060C v_mac_f32_e32 v18, s13, v4 ; 3E24080D v_mul_f32_e32 v1, s11, v1 ; 1002020B v_mac_f32_e32 v1, s7, v2 ; 3E020407 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v1, s14, v3 ; 3E02060E v_mac_f32_e32 v1, s15, v4 ; 3E02080F v_mul_f32_e32 v2, s4, v10 ; 10041404 v_mac_f32_e32 v2, s5, v11 ; 3E041605 v_mul_f32_e32 v3, s10, v10 ; 1006140A v_mac_f32_e32 v3, s6, v11 ; 3E061606 v_mul_f32_e32 v4, s11, v10 ; 1008140B v_mac_f32_e32 v4, s7, v11 ; 3E081607 v_mac_f32_e32 v2, s8, v12 ; 3E041808 v_mac_f32_e32 v3, s12, v12 ; 3E06180C v_mac_f32_e32 v4, s14, v12 ; 3E08180E v_mul_f32_e32 v10, v9, v9 ; 10141309 v_mac_f32_e32 v10, v14, v14 ; 3E141D0E v_mac_f32_e32 v10, v5, v5 ; 3E140B05 v_rsq_clamp_f32_e32 v10, v10 ; 7E14590A v_mul_f32_e32 v11, v2, v2 ; 10160502 v_mac_f32_e32 v11, v3, v3 ; 3E160703 v_mac_f32_e32 v11, v4, v4 ; 3E160904 v_rsq_clamp_f32_e32 v11, v11 ; 7E16590B v_mul_f32_e32 v9, v10, v9 ; 1012130A v_mul_f32_e32 v12, v10, v14 ; 10181D0A v_mul_f32_e32 v5, v10, v5 ; 100A0B0A v_mul_f32_e32 v2, v11, v2 ; 1004050B v_mul_f32_e32 v3, v11, v3 ; 1006070B v_mul_f32_e32 v4, v11, v4 ; 1008090B v_mul_f32_e32 v10, v3, v5 ; 10140B03 v_mad_f32 v10, v12, v4, -v10 ; D282000A 842A090C v_mul_f32_e32 v11, v4, v9 ; 10161304 v_mad_f32 v11, v5, v2, -v11 ; D282000B 842E0505 v_mul_f32_e32 v14, v2, v12 ; 101C1902 v_mad_f32 v14, v9, v3, -v14 ; D282000E 843A0709 v_mul_f32_e32 v10, v13, v10 ; 1014150D v_mul_f32_e32 v11, v13, v11 ; 1016170D v_mul_f32_e32 v13, v13, v14 ; 101A1D0D s_buffer_load_dword s4, s[0:3], 0x19 ; C2020119 s_buffer_load_dword s5, s[0:3], 0x1a ; C202811A s_buffer_load_dword s6, s[0:3], 0x0 ; C2030100 s_buffer_load_dword s7, s[0:3], 0x1 ; C2038101 s_buffer_load_dword s8, s[0:3], 0x2 ; C2040102 s_buffer_load_dword s9, s[0:3], 0x3 ; C2048103 s_buffer_load_dword s10, s[0:3], 0x4 ; C2050104 s_buffer_load_dword s11, s[0:3], 0x5 ; C2058105 s_buffer_load_dword s12, s[0:3], 0x6 ; C2060106 s_buffer_load_dword s13, s[0:3], 0x7 ; C2068107 s_buffer_load_dword s14, s[0:3], 0x8 ; C2070108 s_buffer_load_dword s15, s[0:3], 0x9 ; C2078109 s_buffer_load_dword s16, s[0:3], 0xa ; C208010A s_buffer_load_dword s17, s[0:3], 0xb ; C208810B s_buffer_load_dword s18, s[0:3], 0xc ; C209010C s_buffer_load_dword s19, s[0:3], 0xd ; C209810D s_buffer_load_dword s20, s[0:3], 0xe ; C20A010E s_buffer_load_dword s21, s[0:3], 0x14 ; C20A8114 s_buffer_load_dword s22, s[0:3], 0x15 ; C20B0115 s_buffer_load_dword s23, s[0:3], 0xf ; C20B810F s_buffer_load_dword s24, s[0:3], 0x10 ; C20C0110 s_buffer_load_dword s25, s[0:3], 0x11 ; C20C8111 s_buffer_load_dword s26, s[0:3], 0x12 ; C20D0112 s_buffer_load_dword s27, s[0:3], 0x13 ; C20D8113 s_buffer_load_dword s28, s[0:3], 0x16 ; C20E0116 s_buffer_load_dword s29, s[0:3], 0x17 ; C20E8117 s_buffer_load_dword s0, s[0:3], 0x18 ; C2000118 exp 15, 32, 0, 0, 0, v2, v10, v9, v17 ; F800020F 11090A02 exp 15, 33, 0, 0, 0, v3, v11, v12, v18 ; F800021F 120C0B03 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v2, v5, v12 ; 10041905 v_mul_f32_e32 v3, s19, v2 ; 10060413 v_mul_f32_e32 v10, s25, v2 ; 10140419 v_mul_f32_e32 v2, s22, v2 ; 10040416 v_mul_f32_e32 v11, v12, v9 ; 1016130C v_mac_f32_e32 v3, s18, v11 ; 3E061612 v_mac_f32_e32 v10, s24, v11 ; 3E141618 v_mac_f32_e32 v2, s21, v11 ; 3E041615 v_mul_f32_e32 v11, v5, v5 ; 10160B05 v_mac_f32_e32 v3, s20, v11 ; 3E061614 v_mac_f32_e32 v10, s26, v11 ; 3E14161A v_mac_f32_e32 v2, s28, v11 ; 3E04161C v_mul_f32_e32 v11, s7, v12 ; 10161807 v_mac_f32_e32 v11, s6, v9 ; 3E161206 v_mul_f32_e32 v14, s11, v12 ; 101C180B v_mac_f32_e32 v14, s10, v9 ; 3E1C120A v_mul_f32_e32 v17, s15, v12 ; 1022180F v_mac_f32_e32 v17, s14, v9 ; 3E22120E v_mul_f32_e32 v18, v9, v5 ; 10240B09 v_mac_f32_e32 v3, s23, v18 ; 3E062417 v_mac_f32_e32 v10, s27, v18 ; 3E14241B v_mac_f32_e32 v2, s29, v18 ; 3E04241D v_mul_f32_e32 v12, v12, v12 ; 1018190C v_mad_f32 v9, v9, v9, -v12 ; D2820009 84321309 v_mac_f32_e32 v3, s0, v9 ; 3E061200 v_mac_f32_e32 v10, s4, v9 ; 3E141204 v_mac_f32_e32 v2, s5, v9 ; 3E041205 v_mac_f32_e32 v11, s8, v5 ; 3E160A08 v_mac_f32_e32 v14, s12, v5 ; 3E1C0A0C v_mac_f32_e32 v17, s16, v5 ; 3E220A10 v_add_f32_e32 v9, s9, v11 ; 06121609 v_add_f32_e32 v11, s13, v14 ; 06161C0D v_add_f32_e32 v12, s17, v17 ; 06182211 exp 15, 34, 0, 0, 0, v4, v13, v5, v1 ; F800022F 01050D04 s_waitcnt expcnt(0) ; BF8C070F v_add_f32_e32 v1, v9, v3 ; 06020709 exp 15, 35, 0, 0, 0, v0, v8, v15, v1 ; F800023F 010F0800 s_waitcnt expcnt(0) ; BF8C070F v_add_f32_e32 v0, v11, v10 ; 0600150B v_add_f32_e32 v1, v12, v2 ; 0602050C exp 15, 36, 0, 0, 0, v0, v1, v1, v18 ; F800024F 12010100 exp 15, 12, 0, 1, 0, v6, v7, v15, v16 ; F80008CF 100F0706 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 20 Code Size: 884 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL IN[4], GENERIC[4], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL SVIEW[4], 2D, FLOAT DCL CONST[0..4] DCL TEMP[0..12], LOCAL IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 1.5000} IMM[1] FLT32 { 4.0000, 8.0000, 16.0000, 32.0000} IMM[2] FLT32 { 1.2000, 0.6000, 0.0000, 1.0000} IMM[3] FLT32 { 50.0000, 3.0000, 0.3500, -0.9200} 0: MOV TEMP[0].x, IN[3].wwww 1: MOV TEMP[0].yz, IN[4].yxyy 2: MOV TEMP[1].x, IN[0].wwww 3: MOV TEMP[1].y, IN[1].wwww 4: MOV TEMP[1].z, IN[2].wwww 5: MOV TEMP[2].xy, IN[3].xyyy 6: TEX TEMP[2].yw, TEMP[2], SAMP[1], 2D 7: MAD TEMP[2].xy, TEMP[2].wyyy, IMM[0].xxxx, IMM[0].yyyy 8: DP2 TEMP[3].x, TEMP[2].xyyy, TEMP[2].xyyy 9: MOV_SAT TEMP[3].x, TEMP[3].xxxx 10: ADD TEMP[3].x, IMM[0].zzzz, -TEMP[3].xxxx 11: SQRT TEMP[3].x, TEMP[3].xxxx 12: MOV TEMP[2].z, TEMP[3].xxxx 13: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz 14: RSQ TEMP[3].x, TEMP[3].xxxx 15: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx 16: MOV TEMP[3].xz, TEMP[2].xxzx 17: MOV TEMP[3].y, -TEMP[2].yyyy 18: MUL TEMP[3].xy, TEMP[3].xyyy, IMM[0].xxxx 19: MOV TEMP[2].xy, IN[3].xyyy 20: TEX TEMP[2].x, TEMP[2], SAMP[2], 2D 21: MUL TEMP[4].x, TEMP[2].xxxx, IMM[0].wwww 22: MOV_SAT TEMP[4].x, TEMP[4].xxxx 23: MUL TEMP[5].xy, IN[3].xyyy, IMM[1].xxxx 24: MOV TEMP[6].xy, TEMP[5].xyyy 25: TEX TEMP[6].yw, TEMP[6], SAMP[4], 2D 26: MAD TEMP[6].xy, TEMP[6].wyyy, IMM[0].xxxx, IMM[0].yyyy 27: DP2 TEMP[7].x, TEMP[6].xyyy, TEMP[6].xyyy 28: MOV_SAT TEMP[7].x, TEMP[7].xxxx 29: ADD TEMP[7].x, IMM[0].zzzz, -TEMP[7].xxxx 30: SQRT TEMP[7].x, TEMP[7].xxxx 31: MOV TEMP[6].z, TEMP[7].xxxx 32: MUL TEMP[7].xy, IN[3].xyyy, IMM[1].yyyy 33: MOV TEMP[8].xy, TEMP[7].xyyy 34: TEX TEMP[8].yw, TEMP[8], SAMP[4], 2D 35: MAD TEMP[8].xy, TEMP[8].wyyy, IMM[0].xxxx, IMM[0].yyyy 36: DP2 TEMP[9].x, TEMP[8].xyyy, TEMP[8].xyyy 37: MOV_SAT TEMP[9].x, TEMP[9].xxxx 38: ADD TEMP[9].x, IMM[0].zzzz, -TEMP[9].xxxx 39: SQRT TEMP[9].x, TEMP[9].xxxx 40: MOV TEMP[8].z, TEMP[9].xxxx 41: MUL TEMP[9].xy, IN[3].xyyy, IMM[1].zzzz 42: MOV TEMP[10].xy, TEMP[9].xyyy 43: TEX TEMP[10].yw, TEMP[10], SAMP[4], 2D 44: MAD TEMP[10].xy, TEMP[10].wyyy, IMM[0].xxxx, IMM[0].yyyy 45: DP2 TEMP[11].x, TEMP[10].xyyy, TEMP[10].xyyy 46: MOV_SAT TEMP[11].x, TEMP[11].xxxx 47: ADD TEMP[11].x, IMM[0].zzzz, -TEMP[11].xxxx 48: SQRT TEMP[11].x, TEMP[11].xxxx 49: MOV TEMP[10].z, TEMP[11].xxxx 50: MUL TEMP[11].xy, IN[3].xyyy, IMM[1].wwww 51: MOV TEMP[11].xy, TEMP[11].xyyy 52: TEX TEMP[11].yw, TEMP[11], SAMP[4], 2D 53: MAD TEMP[11].xy, TEMP[11].wyyy, IMM[0].xxxx, IMM[0].yyyy 54: DP2 TEMP[12].x, TEMP[11].xyyy, TEMP[11].xyyy 55: MOV_SAT TEMP[12].x, TEMP[12].xxxx 56: ADD TEMP[12].x, IMM[0].zzzz, -TEMP[12].xxxx 57: SQRT TEMP[12].x, TEMP[12].xxxx 58: MOV TEMP[11].z, TEMP[12].xxxx 59: MOV TEMP[12].xy, IN[3].xyyy 60: TEX TEMP[12].xyz, TEMP[12], SAMP[0], 2D 61: MOV TEMP[5].xy, TEMP[5].xyyy 62: TEX TEMP[5].x, TEMP[5], SAMP[3], 2D 63: MUL TEMP[5].x, TEMP[5].xxxx, IMM[2].xxxx 64: MOV_SAT TEMP[5].x, TEMP[5].xxxx 65: LRP TEMP[5].x, TEMP[4].xxxx, IMM[0].zzzz, TEMP[5].xxxx 66: MUL TEMP[5].xyz, TEMP[12].xyzz, TEMP[5].xxxx 67: MOV TEMP[7].xy, TEMP[7].xyyy 68: TEX TEMP[7].x, TEMP[7], SAMP[3], 2D 69: MUL TEMP[7].x, TEMP[7].xxxx, IMM[2].xxxx 70: MOV_SAT TEMP[7].x, TEMP[7].xxxx 71: LRP TEMP[7].x, TEMP[4].xxxx, IMM[0].zzzz, TEMP[7].xxxx 72: MOV TEMP[9].xy, TEMP[9].xyyy 73: TEX TEMP[9].x, TEMP[9], SAMP[3], 2D 74: MUL TEMP[9].x, TEMP[9].xxxx, IMM[2].xxxx 75: MOV_SAT TEMP[9].x, TEMP[9].xxxx 76: LRP TEMP[9].x, TEMP[4].xxxx, IMM[0].zzzz, TEMP[9].xxxx 77: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[9].xxxx 78: MUL TEMP[5].xyz, TEMP[5].xyzz, TEMP[7].xxxx 79: ADD TEMP[6].xyz, TEMP[6].xyzz, TEMP[8].xyzz 80: ADD TEMP[7].xyz, TEMP[10].xyzz, TEMP[11].xyzz 81: ADD TEMP[6].xyz, TEMP[6].xyzz, TEMP[7].xyzz 82: MAD TEMP[3].xyz, TEMP[6].xyzz, IMM[2].yyyy, TEMP[3].xyzz 83: LRP TEMP[3].xyz, TEMP[4].xxxx, IMM[2].zzww, TEMP[3].xyzz 84: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz 85: RSQ TEMP[4].x, TEMP[4].xxxx 86: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx 87: MOV TEMP[4].w, IMM[2].zzzz 88: DP3 TEMP[6].x, IN[0].xyzz, TEMP[3].xyzz 89: DP3 TEMP[7].x, IN[1].xyzz, TEMP[3].xyzz 90: MOV TEMP[6].y, TEMP[7].xxxx 91: DP3 TEMP[3].x, IN[2].xyzz, TEMP[3].xyzz 92: MOV TEMP[6].z, TEMP[3].xxxx 93: MUL TEMP[4].xyz, TEMP[5].xyzz, TEMP[0].xyzz 94: DP3 TEMP[0].x, TEMP[6].xyzz, CONST[1].xyzz 95: MOV_SAT TEMP[0].x, TEMP[0].xxxx 96: ADD TEMP[1].xyz, CONST[0].xyzz, -TEMP[1].xyzz 97: DP3 TEMP[3].x, TEMP[1].xyzz, TEMP[1].xyzz 98: RSQ TEMP[3].x, TEMP[3].xxxx 99: MAD TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xxxx, CONST[1].xyzz 100: DP3 TEMP[3].x, TEMP[1].xyzz, TEMP[1].xyzz 101: RSQ TEMP[3].x, TEMP[3].xxxx 102: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xxxx 103: DP3 TEMP[1].x, TEMP[6].xyzz, TEMP[1].xyzz 104: MAX TEMP[1].x, IMM[2].zzzz, TEMP[1].xxxx 105: MUL TEMP[3].x, IMM[3].xxxx, TEMP[2].xxxx 106: POW TEMP[1].x, TEMP[1].xxxx, TEMP[3].xxxx 107: MOV_SAT TEMP[1].x, TEMP[1].xxxx 108: MUL TEMP[3].x, IMM[0].xxxx, TEMP[1].xxxx 109: ADD TEMP[3].x, IMM[3].yyyy, -TEMP[3].xxxx 110: MUL TEMP[3].x, TEMP[1].xxxx, TEMP[3].xxxx 111: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[3].xxxx 112: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx 113: MUL TEMP[1].x, TEMP[1].xxxx, IMM[3].zzzz 114: ADD TEMP[2].x, TEMP[2].xxxx, IMM[3].wwww 115: MOV_SAT TEMP[2].x, TEMP[2].xxxx 116: MAD TEMP[1].x, TEMP[1].xxxx, TEMP[0].xxxx, TEMP[2].xxxx 117: MUL TEMP[2].xyz, TEMP[5].xyzz, CONST[4].xyzz 118: MUL TEMP[0].xyz, TEMP[2].xyzz, TEMP[0].xxxx 119: MAD TEMP[0].xyz, CONST[4].xyzz, TEMP[1].xxxx, TEMP[0].xyzz 120: MUL TEMP[0].xyz, TEMP[0].xyzz, IMM[0].xxxx 121: MOV TEMP[0].w, IMM[0].zzzz 122: ADD TEMP[0].xyz, TEMP[4], TEMP[0] 123: MAD TEMP[1].x, IN[3].zzzz, CONST[3].zzzz, CONST[3].wwww 124: MOV_SAT TEMP[1].x, TEMP[1].xxxx 125: LRP TEMP[4].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[2].xyzz 126: MOV TEMP[4].w, IMM[0].zzzz 127: MOV OUT[0], TEMP[4] 128: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %38 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %39 = load <32 x i8>, <32 x i8> addrspace(2)* %38, align 32, !tbaa !0 %40 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %41 = load <16 x i8>, <16 x i8> addrspace(2)* %40, align 16, !tbaa !0 %42 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %43 = bitcast <8 x i32> addrspace(2)* %42 to <32 x i8> addrspace(2)* %44 = load <32 x i8>, <32 x i8> addrspace(2)* %43, align 32, !tbaa !0 %45 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %46 = bitcast <4 x i32> addrspace(2)* %45 to <16 x i8> addrspace(2)* %47 = load <16 x i8>, <16 x i8> addrspace(2)* %46, align 16, !tbaa !0 %48 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %49 = bitcast <8 x i32> addrspace(2)* %48 to <32 x i8> addrspace(2)* %50 = load <32 x i8>, <32 x i8> addrspace(2)* %49, align 32, !tbaa !0 %51 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %52 = bitcast <4 x i32> addrspace(2)* %51 to <16 x i8> addrspace(2)* %53 = load <16 x i8>, <16 x i8> addrspace(2)* %52, align 16, !tbaa !0 %54 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %55 = load <8 x i32>, <8 x i32> addrspace(2)* %54, align 32, !tbaa !0 %56 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %57 = load <4 x i32>, <4 x i32> addrspace(2)* %56, align 16, !tbaa !0 %58 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %59 = load <8 x i32>, <8 x i32> addrspace(2)* %58, align 32, !tbaa !0 %60 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %61 = load <4 x i32>, <4 x i32> addrspace(2)* %60, align 16, !tbaa !0 %62 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %63 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %64 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %65 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %66 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %67 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %68 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %69 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %70 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %71 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %72 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %73 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %74 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %75 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %76 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %77 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7) %78 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %79 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %80 = bitcast float %74 to i32 %81 = bitcast float %75 to i32 %82 = insertelement <2 x i32> undef, i32 %80, i32 0 %83 = insertelement <2 x i32> %82, i32 %81, i32 1 %84 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %83, <32 x i8> %44, <16 x i8> %47, i32 2) %85 = extractelement <4 x float> %84, i32 1 %86 = extractelement <4 x float> %84, i32 3 %87 = fmul float %86, 2.000000e+00 %88 = fadd float %87, -1.000000e+00 %89 = fmul float %85, 2.000000e+00 %90 = fadd float %89, -1.000000e+00 %91 = fmul float %88, %88 %92 = fmul float %90, %90 %93 = fadd float %91, %92 %94 = call float @llvm.AMDIL.clamp.(float %93, float 0.000000e+00, float 1.000000e+00) %95 = fsub float 1.000000e+00, %94 %96 = call float @llvm.sqrt.f32(float %95) %97 = fmul float %88, %88 %98 = fmul float %90, %90 %99 = fadd float %98, %97 %100 = fmul float %96, %96 %101 = fadd float %99, %100 %102 = call float @llvm.AMDGPU.rsq.clamped.f32(float %101) %103 = fmul float %88, %102 %104 = fmul float %90, %102 %105 = fmul float %96, %102 %106 = fmul float %103, 2.000000e+00 %107 = fmul float %104, -2.000000e+00 %108 = bitcast float %74 to i32 %109 = bitcast float %75 to i32 %110 = insertelement <2 x i32> undef, i32 %108, i32 0 %111 = insertelement <2 x i32> %110, i32 %109, i32 1 %112 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %111, <32 x i8> %50, <16 x i8> %53, i32 2) %113 = extractelement <4 x float> %112, i32 0 %114 = fmul float %113, 1.500000e+00 %115 = call float @llvm.AMDIL.clamp.(float %114, float 0.000000e+00, float 1.000000e+00) %116 = fmul float %74, 4.000000e+00 %117 = fmul float %75, 4.000000e+00 %118 = bitcast float %116 to i32 %119 = bitcast float %117 to i32 %120 = insertelement <2 x i32> undef, i32 %118, i32 0 %121 = insertelement <2 x i32> %120, i32 %119, i32 1 %122 = bitcast <8 x i32> %59 to <32 x i8> %123 = bitcast <4 x i32> %61 to <16 x i8> %124 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %121, <32 x i8> %122, <16 x i8> %123, i32 2) %125 = extractelement <4 x float> %124, i32 1 %126 = extractelement <4 x float> %124, i32 3 %127 = fmul float %126, 2.000000e+00 %128 = fadd float %127, -1.000000e+00 %129 = fmul float %125, 2.000000e+00 %130 = fadd float %129, -1.000000e+00 %131 = fmul float %128, %128 %132 = fmul float %130, %130 %133 = fadd float %131, %132 %134 = call float @llvm.AMDIL.clamp.(float %133, float 0.000000e+00, float 1.000000e+00) %135 = fsub float 1.000000e+00, %134 %136 = call float @llvm.sqrt.f32(float %135) %137 = fmul float %74, 8.000000e+00 %138 = fmul float %75, 8.000000e+00 %139 = bitcast float %137 to i32 %140 = bitcast float %138 to i32 %141 = insertelement <2 x i32> undef, i32 %139, i32 0 %142 = insertelement <2 x i32> %141, i32 %140, i32 1 %143 = bitcast <8 x i32> %59 to <32 x i8> %144 = bitcast <4 x i32> %61 to <16 x i8> %145 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %142, <32 x i8> %143, <16 x i8> %144, i32 2) %146 = extractelement <4 x float> %145, i32 1 %147 = extractelement <4 x float> %145, i32 3 %148 = fmul float %147, 2.000000e+00 %149 = fadd float %148, -1.000000e+00 %150 = fmul float %146, 2.000000e+00 %151 = fadd float %150, -1.000000e+00 %152 = fmul float %149, %149 %153 = fmul float %151, %151 %154 = fadd float %152, %153 %155 = call float @llvm.AMDIL.clamp.(float %154, float 0.000000e+00, float 1.000000e+00) %156 = fsub float 1.000000e+00, %155 %157 = call float @llvm.sqrt.f32(float %156) %158 = fmul float %74, 1.600000e+01 %159 = fmul float %75, 1.600000e+01 %160 = bitcast float %158 to i32 %161 = bitcast float %159 to i32 %162 = insertelement <2 x i32> undef, i32 %160, i32 0 %163 = insertelement <2 x i32> %162, i32 %161, i32 1 %164 = bitcast <8 x i32> %59 to <32 x i8> %165 = bitcast <4 x i32> %61 to <16 x i8> %166 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %163, <32 x i8> %164, <16 x i8> %165, i32 2) %167 = extractelement <4 x float> %166, i32 1 %168 = extractelement <4 x float> %166, i32 3 %169 = fmul float %168, 2.000000e+00 %170 = fadd float %169, -1.000000e+00 %171 = fmul float %167, 2.000000e+00 %172 = fadd float %171, -1.000000e+00 %173 = fmul float %170, %170 %174 = fmul float %172, %172 %175 = fadd float %173, %174 %176 = call float @llvm.AMDIL.clamp.(float %175, float 0.000000e+00, float 1.000000e+00) %177 = fsub float 1.000000e+00, %176 %178 = call float @llvm.sqrt.f32(float %177) %179 = fmul float %74, 3.200000e+01 %180 = fmul float %75, 3.200000e+01 %181 = bitcast float %179 to i32 %182 = bitcast float %180 to i32 %183 = insertelement <2 x i32> undef, i32 %181, i32 0 %184 = insertelement <2 x i32> %183, i32 %182, i32 1 %185 = bitcast <8 x i32> %59 to <32 x i8> %186 = bitcast <4 x i32> %61 to <16 x i8> %187 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %184, <32 x i8> %185, <16 x i8> %186, i32 2) %188 = extractelement <4 x float> %187, i32 1 %189 = extractelement <4 x float> %187, i32 3 %190 = fmul float %189, 2.000000e+00 %191 = fadd float %190, -1.000000e+00 %192 = fmul float %188, 2.000000e+00 %193 = fadd float %192, -1.000000e+00 %194 = fmul float %191, %191 %195 = fmul float %193, %193 %196 = fadd float %194, %195 %197 = call float @llvm.AMDIL.clamp.(float %196, float 0.000000e+00, float 1.000000e+00) %198 = fsub float 1.000000e+00, %197 %199 = call float @llvm.sqrt.f32(float %198) %200 = bitcast float %74 to i32 %201 = bitcast float %75 to i32 %202 = insertelement <2 x i32> undef, i32 %200, i32 0 %203 = insertelement <2 x i32> %202, i32 %201, i32 1 %204 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %203, <32 x i8> %39, <16 x i8> %41, i32 2) %205 = extractelement <4 x float> %204, i32 0 %206 = extractelement <4 x float> %204, i32 1 %207 = extractelement <4 x float> %204, i32 2 %208 = bitcast float %116 to i32 %209 = bitcast float %117 to i32 %210 = insertelement <2 x i32> undef, i32 %208, i32 0 %211 = insertelement <2 x i32> %210, i32 %209, i32 1 %212 = bitcast <8 x i32> %55 to <32 x i8> %213 = bitcast <4 x i32> %57 to <16 x i8> %214 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %211, <32 x i8> %212, <16 x i8> %213, i32 2) %215 = extractelement <4 x float> %214, i32 0 %216 = fmul float %215, 0x3FF3333340000000 %217 = call float @llvm.AMDIL.clamp.(float %216, float 0.000000e+00, float 1.000000e+00) %218 = call float @llvm.AMDGPU.lrp(float %115, float 1.000000e+00, float %217) %219 = fmul float %205, %218 %220 = fmul float %206, %218 %221 = fmul float %207, %218 %222 = bitcast float %137 to i32 %223 = bitcast float %138 to i32 %224 = insertelement <2 x i32> undef, i32 %222, i32 0 %225 = insertelement <2 x i32> %224, i32 %223, i32 1 %226 = bitcast <8 x i32> %55 to <32 x i8> %227 = bitcast <4 x i32> %57 to <16 x i8> %228 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %225, <32 x i8> %226, <16 x i8> %227, i32 2) %229 = extractelement <4 x float> %228, i32 0 %230 = fmul float %229, 0x3FF3333340000000 %231 = call float @llvm.AMDIL.clamp.(float %230, float 0.000000e+00, float 1.000000e+00) %232 = call float @llvm.AMDGPU.lrp(float %115, float 1.000000e+00, float %231) %233 = bitcast float %158 to i32 %234 = bitcast float %159 to i32 %235 = insertelement <2 x i32> undef, i32 %233, i32 0 %236 = insertelement <2 x i32> %235, i32 %234, i32 1 %237 = bitcast <8 x i32> %55 to <32 x i8> %238 = bitcast <4 x i32> %57 to <16 x i8> %239 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %236, <32 x i8> %237, <16 x i8> %238, i32 2) %240 = extractelement <4 x float> %239, i32 0 %241 = fmul float %240, 0x3FF3333340000000 %242 = call float @llvm.AMDIL.clamp.(float %241, float 0.000000e+00, float 1.000000e+00) %243 = call float @llvm.AMDGPU.lrp(float %115, float 1.000000e+00, float %242) %244 = fmul float %232, %243 %245 = fmul float %219, %244 %246 = fmul float %220, %244 %247 = fmul float %221, %244 %248 = fadd float %128, %149 %249 = fadd float %130, %151 %250 = fadd float %136, %157 %251 = fadd float %170, %191 %252 = fadd float %172, %193 %253 = fadd float %178, %199 %254 = fadd float %248, %251 %255 = fadd float %249, %252 %256 = fadd float %250, %253 %257 = fmul float %254, 0x3FE3333340000000 %258 = fadd float %257, %106 %259 = fmul float %255, 0x3FE3333340000000 %260 = fadd float %259, %107 %261 = fmul float %256, 0x3FE3333340000000 %262 = fadd float %261, %105 %263 = call float @llvm.AMDGPU.lrp(float %115, float 0.000000e+00, float %258) %264 = call float @llvm.AMDGPU.lrp(float %115, float 0.000000e+00, float %260) %265 = call float @llvm.AMDGPU.lrp(float %115, float 1.000000e+00, float %262) %266 = fmul float %263, %263 %267 = fmul float %264, %264 %268 = fadd float %267, %266 %269 = fmul float %265, %265 %270 = fadd float %268, %269 %271 = call float @llvm.AMDGPU.rsq.clamped.f32(float %270) %272 = fmul float %263, %271 %273 = fmul float %264, %271 %274 = fmul float %265, %271 %275 = fmul float %62, %272 %276 = fmul float %63, %273 %277 = fadd float %276, %275 %278 = fmul float %64, %274 %279 = fadd float %277, %278 %280 = fmul float %66, %272 %281 = fmul float %67, %273 %282 = fadd float %281, %280 %283 = fmul float %68, %274 %284 = fadd float %282, %283 %285 = fmul float %70, %272 %286 = fmul float %71, %273 %287 = fadd float %286, %285 %288 = fmul float %72, %274 %289 = fadd float %287, %288 %290 = fmul float %245, %77 %291 = fmul float %246, %78 %292 = fmul float %247, %79 %293 = fmul float %279, %27 %294 = fmul float %284, %28 %295 = fadd float %294, %293 %296 = fmul float %289, %29 %297 = fadd float %295, %296 %298 = call float @llvm.AMDIL.clamp.(float %297, float 0.000000e+00, float 1.000000e+00) %299 = fsub float %24, %65 %300 = fsub float %25, %69 %301 = fsub float %26, %73 %302 = fmul float %299, %299 %303 = fmul float %300, %300 %304 = fadd float %303, %302 %305 = fmul float %301, %301 %306 = fadd float %304, %305 %307 = call float @llvm.AMDGPU.rsq.clamped.f32(float %306) %308 = fmul float %299, %307 %309 = fadd float %308, %27 %310 = fmul float %300, %307 %311 = fadd float %310, %28 %312 = fmul float %301, %307 %313 = fadd float %312, %29 %314 = fmul float %309, %309 %315 = fmul float %311, %311 %316 = fadd float %315, %314 %317 = fmul float %313, %313 %318 = fadd float %316, %317 %319 = call float @llvm.AMDGPU.rsq.clamped.f32(float %318) %320 = fmul float %309, %319 %321 = fmul float %311, %319 %322 = fmul float %313, %319 %323 = fmul float %279, %320 %324 = fmul float %284, %321 %325 = fadd float %324, %323 %326 = fmul float %289, %322 %327 = fadd float %325, %326 %328 = call float @llvm.maxnum.f32(float %327, float 0.000000e+00) %329 = fmul float %113, 5.000000e+01 %330 = call float @llvm.pow.f32(float %328, float %329) %331 = call float @llvm.AMDIL.clamp.(float %330, float 0.000000e+00, float 1.000000e+00) %332 = fmul float %331, 2.000000e+00 %333 = fsub float 3.000000e+00, %332 %334 = fmul float %331, %333 %335 = fmul float %331, %334 %336 = fmul float %335, %113 %337 = fmul float %336, 0x3FD6666660000000 %338 = fadd float %113, 0xBFED70A3E0000000 %339 = call float @llvm.AMDIL.clamp.(float %338, float 0.000000e+00, float 1.000000e+00) %340 = fmul float %337, %298 %341 = fadd float %340, %339 %342 = fmul float %245, %35 %343 = fmul float %246, %36 %344 = fmul float %247, %37 %345 = fmul float %342, %298 %346 = fmul float %343, %298 %347 = fmul float %344, %298 %348 = fmul float %35, %341 %349 = fadd float %348, %345 %350 = fmul float %36, %341 %351 = fadd float %350, %346 %352 = fmul float %37, %341 %353 = fadd float %352, %347 %354 = fmul float %349, 2.000000e+00 %355 = fmul float %351, 2.000000e+00 %356 = fmul float %353, 2.000000e+00 %357 = fadd float %290, %354 %358 = fadd float %291, %355 %359 = fadd float %292, %356 %360 = fmul float %76, %33 %361 = fadd float %360, %34 %362 = call float @llvm.AMDIL.clamp.(float %361, float 0.000000e+00, float 1.000000e+00) %363 = call float @llvm.AMDGPU.lrp(float %362, float %357, float %30) %364 = call float @llvm.AMDGPU.lrp(float %362, float %358, float %31) %365 = call float @llvm.AMDGPU.lrp(float %362, float %359, float %32) %366 = call i32 @llvm.SI.packf16(float %363, float %364) %367 = bitcast i32 %366 to float %368 = call i32 @llvm.SI.packf16(float %365, float 1.000000e+00) %369 = bitcast i32 %368 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %367, float %369, float %367, float %369) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 s_load_dwordx4 s[20:23], s[4:5], 0x0 ; C08A0500 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600 v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601 v_interp_p1_f32 v9, v0, 3, 1, [m0] ; C8240700 v_interp_p2_f32 v9, [v9], v1, 3, 1, [m0] ; C8250701 v_interp_p1_f32 v10, v0, 0, 2, [m0] ; C8280800 v_interp_p2_f32 v10, [v10], v1, 0, 2, [m0] ; C8290801 v_interp_p1_f32 v11, v0, 1, 2, [m0] ; C82C0900 v_interp_p2_f32 v11, [v11], v1, 1, 2, [m0] ; C82D0901 v_interp_p1_f32 v12, v0, 2, 2, [m0] ; C8300A00 v_interp_p2_f32 v12, [v12], v1, 2, 2, [m0] ; C8310A01 v_interp_p1_f32 v13, v0, 3, 2, [m0] ; C8340B00 v_interp_p2_f32 v13, [v13], v1, 3, 2, [m0] ; C8350B01 v_interp_p1_f32 v14, v0, 0, 3, [m0] ; C8380C00 v_interp_p2_f32 v14, [v14], v1, 0, 3, [m0] ; C8390C01 v_interp_p1_f32 v15, v0, 1, 3, [m0] ; C83C0D00 v_interp_p2_f32 v15, [v15], v1, 1, 3, [m0] ; C83D0D01 v_interp_p1_f32 v16, v0, 2, 3, [m0] ; C8400E00 v_interp_p2_f32 v16, [v16], v1, 2, 3, [m0] ; C8410E01 v_interp_p1_f32 v17, v0, 3, 3, [m0] ; C8440F00 v_interp_p2_f32 v17, [v17], v1, 3, 3, [m0] ; C8450F01 v_interp_p1_f32 v18, v0, 0, 4, [m0] ; C8481000 v_interp_p2_f32 v18, [v18], v1, 0, 4, [m0] ; C8491001 v_interp_p1_f32 v0, v0, 1, 4, [m0] ; C8001100 v_interp_p2_f32 v0, [v0], v1, 1, 4, [m0] ; C8011101 v_mul_f32_e32 v19, 4.0, v14 ; 10261CF6 v_mul_f32_e32 v20, 4.0, v15 ; 10281EF6 s_load_dwordx4 s[48:51], s[4:5], 0x4 ; C0980504 s_load_dwordx4 s[36:39], s[4:5], 0x8 ; C0920508 s_load_dwordx4 s[8:11], s[4:5], 0xc ; C084050C s_load_dwordx4 s[24:27], s[4:5], 0x10 ; C08C0510 s_load_dwordx8 s[52:59], s[6:7], 0x8 ; C0DA0708 s_load_dwordx8 s[40:47], s[6:7], 0x10 ; C0D40710 s_load_dwordx8 s[12:19], s[6:7], 0x18 ; C0C60718 s_load_dwordx8 s[28:35], s[6:7], 0x20 ; C0CE0720 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[21:22], 10, 0, 0, 0, 0, 0, 0, 0, v[14:15], s[52:59], s[48:51] ; F0800A00 018D150E s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, 2.0, v22, -1.0 ; D2820001 03CE2CF4 v_mad_f32 v21, 2.0, v21, -1.0 ; D2820015 03CE2AF4 v_mul_f32_e32 v22, v21, v21 ; 102C2B15 v_mac_f32_e32 v22, v1, v1 ; 3E2C0301 v_add_f32_e64 v23, 0, v22 clamp ; D2060817 00022C80 v_sub_f32_e32 v23, 1.0, v23 ; 082E2EF2 v_sqrt_f32_e32 v23, v23 ; 7E2E6717 v_mac_f32_e32 v22, v23, v23 ; 3E2C2F17 v_rsq_clamp_f32_e32 v22, v22 ; 7E2C5916 image_sample v24, 1, 0, 0, 0, 0, 0, 0, 0, v[14:15], s[40:47], s[36:39] ; F0800100 012A180E v_mul_f32_e32 v25, v22, v1 ; 10320316 v_mac_f32_e32 v25, v22, v1 ; 3E320316 v_mul_f32_e32 v1, v22, v21 ; 10022B16 v_mul_f32_e32 v21, v22, v23 ; 102A2F16 image_sample v[22:23], 10, 0, 0, 0, 0, 0, 0, 0, v[19:20], s[28:35], s[24:27] ; F0800A00 00C71613 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v23, 2.0, v23, -1.0 ; D2820017 03CE2EF4 v_mad_f32 v22, 2.0, v22, -1.0 ; D2820016 03CE2CF4 v_mov_b32_e32 v26, 0x41000000 ; 7E3402FF 41000000 v_mul_f32_e32 v27, v26, v14 ; 10361D1A v_mul_f32_e32 v28, v26, v15 ; 10381F1A image_sample v[29:30], 10, 0, 0, 0, 0, 0, 0, 0, v[27:28], s[28:35], s[24:27] ; F0800A00 00C71D1B s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v26, 2.0, v30, -1.0 ; D282001A 03CE3CF4 v_mad_f32 v29, 2.0, v29, -1.0 ; D282001D 03CE3AF4 v_mov_b32_e32 v30, 0x41800000 ; 7E3C02FF 41800000 v_mul_f32_e32 v31, v30, v14 ; 103E1D1E v_mul_f32_e32 v32, v30, v15 ; 10401F1E image_sample v[33:34], 10, 0, 0, 0, 0, 0, 0, 0, v[31:32], s[28:35], s[24:27] ; F0800A00 00C7211F s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v30, 2.0, v34, -1.0 ; D282001E 03CE44F4 v_mad_f32 v33, 2.0, v33, -1.0 ; D2820021 03CE42F4 v_mov_b32_e32 v34, 0x42000000 ; 7E4402FF 42000000 v_mul_f32_e32 v35, v34, v14 ; 10461D22 v_mul_f32_e32 v36, v34, v15 ; 10481F22 image_sample v[34:35], 10, 0, 0, 0, 0, 0, 0, 0, v[35:36], s[28:35], s[24:27] ; F0800A00 00C72223 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v35, 2.0, v35, -1.0 ; D2820023 03CE46F4 v_mad_f32 v34, 2.0, v34, -1.0 ; D2820022 03CE44F4 v_mul_f32_e32 v36, v22, v22 ; 10482D16 v_mac_f32_e32 v36, v23, v23 ; 3E482F17 v_mul_f32_e32 v37, v29, v29 ; 104A3B1D v_mac_f32_e32 v37, v26, v26 ; 3E4A351A v_add_f32_e32 v23, v26, v23 ; 062E2F1A v_add_f32_e32 v22, v29, v22 ; 062C2D1D v_add_f32_e64 v26, 0, v36 clamp ; D206081A 00024880 v_sub_f32_e32 v26, 1.0, v26 ; 083434F2 v_sqrt_f32_e32 v26, v26 ; 7E34671A v_add_f32_e64 v29, 0, v37 clamp ; D206081D 00024A80 v_sub_f32_e32 v29, 1.0, v29 ; 083A3AF2 v_sqrt_f32_e32 v29, v29 ; 7E3A671D v_add_f32_e32 v26, v29, v26 ; 0634351D v_mul_f32_e32 v29, v33, v33 ; 103A4321 v_mac_f32_e32 v29, v30, v30 ; 3E3A3D1E v_mul_f32_e32 v36, v34, v34 ; 10484522 v_mac_f32_e32 v36, v35, v35 ; 3E484723 v_add_f32_e32 v30, v35, v30 ; 063C3D23 v_add_f32_e32 v33, v34, v33 ; 06424322 v_add_f32_e64 v29, 0, v29 clamp ; D206081D 00023A80 v_sub_f32_e32 v29, 1.0, v29 ; 083A3AF2 v_sqrt_f32_e32 v29, v29 ; 7E3A671D v_add_f32_e64 v34, 0, v36 clamp ; D2060822 00024880 v_sub_f32_e32 v34, 1.0, v34 ; 084444F2 v_sqrt_f32_e32 v34, v34 ; 7E446722 s_load_dwordx8 s[24:31], s[6:7], 0x0 ; C0CC0700 v_add_f32_e32 v29, v34, v29 ; 063A3B22 v_add_f32_e32 v23, v30, v23 ; 062E2F1E v_add_f32_e32 v22, v33, v22 ; 062C2D21 v_add_f32_e32 v26, v29, v26 ; 0634351D v_mov_b32_e32 v29, 0x3f19999a ; 7E3A02FF 3F19999A v_mac_f32_e32 v25, v29, v23 ; 3E322F1D v_mul_f32_e32 v1, -2.0, v1 ; 100202F5 v_mac_f32_e32 v1, v29, v22 ; 3E022D1D v_mac_f32_e32 v21, v29, v26 ; 3E2A351D s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[33:35], 7, 0, 0, 0, 0, 0, 0, 0, v[14:15], s[24:31], s[20:23] ; F0800700 00A6210E image_sample v14, 1, 0, 0, 0, 0, 0, 0, 0, v[19:20], s[12:19], s[8:11] ; F0800100 00430E13 v_mul_f32_e32 v15, 0x3fc00000, v24 ; 101E30FF 3FC00000 v_add_f32_e64 v15, 0, v15 clamp ; D206080F 00021E80 v_mov_b32_e32 v19, 0x3f99999a ; 7E2602FF 3F99999A s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v14, v19, v14 ; 101C1D13 v_add_f32_e64 v14, 0, v14 clamp ; D206080E 00021C80 v_sub_f32_e32 v20, 1.0, v15 ; 08281EF2 v_mul_f32_e32 v14, v14, v20 ; 101C290E v_mac_f32_e32 v14, 1.0, v15 ; 3E1C1EF2 v_mul_f32_e32 v22, v25, v20 ; 102C2919 v_mac_f32_e32 v22, 0, v15 ; 3E2C1E80 v_mul_f32_e32 v1, v1, v20 ; 10022901 v_mac_f32_e32 v1, 0, v15 ; 3E021E80 v_mul_f32_e32 v21, v21, v20 ; 102A2915 v_mac_f32_e32 v21, 1.0, v15 ; 3E2A1EF2 v_mul_f32_e32 v23, v22, v22 ; 102E2D16 v_mac_f32_e32 v23, v1, v1 ; 3E2E0301 v_mac_f32_e32 v23, v21, v21 ; 3E2E2B15 v_rsq_clamp_f32_e32 v23, v23 ; 7E2E5917 v_mul_f32_e32 v25, v14, v33 ; 1032430E v_mul_f32_e32 v26, v14, v34 ; 1034450E v_mul_f32_e32 v14, v14, v35 ; 101C470E v_mul_f32_e32 v22, v23, v22 ; 102C2D17 v_mul_f32_e32 v1, v23, v1 ; 10020317 v_mul_f32_e32 v2, v22, v2 ; 10040516 v_mac_f32_e32 v2, v1, v3 ; 3E040701 v_mul_f32_e32 v3, v22, v6 ; 10060D16 v_mac_f32_e32 v3, v1, v7 ; 3E060F01 v_mul_f32_e32 v6, v22, v10 ; 100C1516 v_mac_f32_e32 v6, v1, v11 ; 3E0C1701 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 image_sample v1, 1, 0, 0, 0, 0, 0, 0, 0, v[27:28], s[12:19], s[8:11] ; F0800100 0043011B image_sample v7, 1, 0, 0, 0, 0, 0, 0, 0, v[31:32], s[12:19], s[8:11] ; F0800100 0043071F v_mul_f32_e32 v10, v23, v21 ; 10142B17 v_mac_f32_e32 v2, v10, v4 ; 3E04090A v_mac_f32_e32 v3, v10, v8 ; 3E06110A s_buffer_load_dword s7, s[0:3], 0x4 ; C2038104 s_buffer_load_dword s8, s[0:3], 0x5 ; C2040105 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v4, s4, v5 ; 08080A04 v_sub_f32_e32 v5, s5, v9 ; 080A1205 v_sub_f32_e32 v8, s6, v13 ; 08101A06 v_mul_f32_e32 v9, v4, v4 ; 10120904 v_mac_f32_e32 v9, v5, v5 ; 3E120B05 v_mac_f32_e32 v9, v8, v8 ; 3E121108 v_rsq_clamp_f32_e32 v9, v9 ; 7E125909 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_buffer_load_dword s5, s[0:3], 0x8 ; C2028108 s_buffer_load_dword s6, s[0:3], 0x9 ; C2030109 s_buffer_load_dword s9, s[0:3], 0xa ; C204810A s_buffer_load_dword s10, s[0:3], 0xe ; C205010E s_buffer_load_dword s11, s[0:3], 0xf ; C205810F s_buffer_load_dword s12, s[0:3], 0x10 ; C2060110 s_buffer_load_dword s13, s[0:3], 0x11 ; C2068111 s_buffer_load_dword s0, s[0:3], 0x12 ; C2000112 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v4, v4, v9, s7 ; D2820004 001E1304 v_mad_f32 v5, v5, v9, s8 ; D2820005 00221305 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v8, v8, v9, s4 ; D2820008 00121308 v_mul_f32_e32 v9, v4, v4 ; 10120904 v_mac_f32_e32 v9, v5, v5 ; 3E120B05 v_mac_f32_e32 v9, v8, v8 ; 3E121108 v_rsq_clamp_f32_e32 v9, v9 ; 7E125909 v_mac_f32_e32 v6, v10, v12 ; 3E0C190A v_mul_f32_e32 v10, s7, v2 ; 10140407 v_mac_f32_e32 v10, s8, v3 ; 3E140608 v_mul_f32_e32 v4, v9, v4 ; 10080909 v_mul_f32_e32 v2, v4, v2 ; 10040504 v_mul_f32_e32 v4, v9, v5 ; 10080B09 v_mac_f32_e32 v2, v4, v3 ; 3E040704 v_mac_f32_e32 v10, s4, v6 ; 3E140C04 v_mul_f32_e32 v3, v9, v8 ; 10061109 v_mac_f32_e32 v2, v3, v6 ; 3E040D03 v_mul_f32_e32 v1, v19, v1 ; 10020313 v_mul_f32_e32 v3, v19, v7 ; 10060F13 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_mul_f32_e32 v1, v1, v20 ; 10022901 v_add_f32_e64 v3, 0, v3 clamp ; D2060803 00020680 v_mul_f32_e32 v3, v3, v20 ; 10062903 v_mac_f32_e32 v1, 1.0, v15 ; 3E021EF2 v_mac_f32_e32 v3, 1.0, v15 ; 3E061EF2 v_mul_f32_e32 v1, v3, v1 ; 10020303 v_mul_f32_e32 v3, v1, v25 ; 10063301 v_max_f32_e32 v2, 0, v2 ; 20040480 v_log_f32_e32 v2, v2 ; 7E044F02 v_mul_f32_e32 v4, v1, v26 ; 10083501 v_mul_f32_e32 v1, v1, v14 ; 10021D01 v_mul_f32_e32 v5, 0x42480000, v24 ; 100A30FF 42480000 v_mul_legacy_f32_e32 v2, v5, v2 ; 0E040505 v_exp_f32_e32 v2, v2 ; 7E044B02 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_madak_f32_e32 v5, -2.0, v2, 0x40400000 ; 420A04F5 40400000 v_mul_f32_e32 v5, v5, v2 ; 100A0505 v_mul_f32_e32 v2, v5, v2 ; 10040505 v_mul_f32_e32 v2, v24, v2 ; 10040518 v_mov_b32_e32 v5, 0xbf6b851f ; 7E0A02FF BF6B851F v_add_f32_e32 v5, v24, v5 ; 060A0B18 v_add_f32_e64 v6, 0, v10 clamp ; D2060806 00021480 v_mul_f32_e32 v2, 0x3eb33333, v2 ; 100404FF 3EB33333 v_add_f32_e64 v5, 0, v5 clamp ; D2060805 00020A80 v_mac_f32_e32 v5, v6, v2 ; 3E0A0506 v_mul_f32_e32 v2, s12, v3 ; 1004060C v_mul_f32_e32 v7, v6, v2 ; 100E0506 v_mac_f32_e32 v7, s12, v5 ; 3E0E0A0C v_mac_f32_e32 v7, v6, v2 ; 3E0E0506 v_mul_f32_e32 v2, s13, v4 ; 1004080D v_mul_f32_e32 v8, v6, v2 ; 10100506 v_mac_f32_e32 v8, s13, v5 ; 3E100A0D v_mac_f32_e32 v8, v6, v2 ; 3E100506 v_mul_f32_e32 v2, s0, v1 ; 10040200 v_mul_f32_e32 v9, v6, v2 ; 10120506 v_mac_f32_e32 v9, s0, v5 ; 3E120A00 v_mac_f32_e32 v9, v6, v2 ; 3E120506 v_mac_f32_e32 v7, s12, v5 ; 3E0E0A0C v_mac_f32_e32 v7, v17, v3 ; 3E0E0711 v_mac_f32_e32 v8, s13, v5 ; 3E100A0D v_mac_f32_e32 v8, v18, v4 ; 3E100912 v_mac_f32_e32 v9, s0, v5 ; 3E120A00 v_mac_f32_e32 v9, v0, v1 ; 3E120300 v_mov_b32_e32 v0, s11 ; 7E00020B v_mac_f32_e32 v0, s10, v16 ; 3E00200A v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_sub_f32_e32 v1, 1.0, v0 ; 080200F2 v_mul_f32_e32 v2, s5, v1 ; 10040205 v_mac_f32_e32 v2, v7, v0 ; 3E040107 v_mul_f32_e32 v3, s6, v1 ; 10060206 v_mac_f32_e32 v3, v8, v0 ; 3E060108 v_mul_f32_e32 v1, s9, v1 ; 10020209 v_mac_f32_e32 v1, v9, v0 ; 3E020109 v_cvt_pkrtz_f16_f32_e32 v0, v2, v3 ; 5E000702 v_cvt_pkrtz_f16_f32_e64 v1, v1, 1.0 ; D25E0001 0001E501 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 64 VGPRS: 40 Code Size: 1256 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL CONST[0..36] DCL TEMP[0..7], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 2.0000, 0.0000} 0: MUL TEMP[0].xyz, IN[0].xyzz, CONST[14].xyzz 1: MUL TEMP[1], CONST[15], TEMP[0].xxxx 2: MAD TEMP[1], CONST[16], TEMP[0].yyyy, TEMP[1] 3: MAD TEMP[1].xyz, CONST[17], TEMP[0].zzzz, TEMP[1] 4: LRP TEMP[0].xyz, IN[2].wwww, TEMP[1].xyzz, TEMP[0].xyzz 5: DP3 TEMP[1].x, CONST[19].xyzz, TEMP[0].xyzz 6: ADD TEMP[1].x, TEMP[1].xxxx, CONST[19].wwww 7: MUL TEMP[1].xyz, TEMP[1].xxxx, CONST[19].xyzz 8: ADD TEMP[1].xyz, TEMP[0].xyzz, -TEMP[1].xyzz 9: LRP TEMP[0].xyz, CONST[20].xxxx, TEMP[0].xyzz, TEMP[1].xyzz 10: MUL TEMP[1], CONST[33], TEMP[0].xxxx 11: MAD TEMP[1], CONST[34], TEMP[0].yyyy, TEMP[1] 12: MAD TEMP[1], CONST[35], TEMP[0].zzzz, TEMP[1] 13: ADD TEMP[1].xyz, TEMP[1], CONST[36] 14: MUL TEMP[2], CONST[29], TEMP[0].xxxx 15: MAD TEMP[2], CONST[30], TEMP[0].yyyy, TEMP[2] 16: MAD TEMP[0], CONST[31], TEMP[0].zzzz, TEMP[2] 17: ADD TEMP[0], TEMP[0], CONST[32] 18: MOV TEMP[2].w, IMM[0].yyyy 19: MUL TEMP[3], CONST[28], IMM[0].zzzz 20: MUL TEMP[4].xyz, TEMP[1].xyzz, CONST[4].wwww 21: ADD TEMP[4].xyz, CONST[4].xyzz, -TEMP[4].xyzz 22: MOV TEMP[5].xy, TEMP[4].xyxx 23: MOV TEMP[5].z, -TEMP[4].zzzz 24: DP3 TEMP[4].x, TEMP[5].xyzz, TEMP[5].xyzz 25: RSQ TEMP[4].x, TEMP[4].xxxx 26: MUL TEMP[4].xyz, TEMP[5].xyzz, TEMP[4].xxxx 27: MOV TEMP[2].xyz, CONST[0].xyzx 28: MUL TEMP[6].x, CONST[21].xxxx, IN[4].wwww 29: MUL TEMP[7].xyz, CONST[24].xyzz, TEMP[4].xxxx 30: MAD TEMP[7].xyz, CONST[25].xyzz, TEMP[4].yyyy, TEMP[7].xyzz 31: MAD TEMP[4].xyz, CONST[26].xyzz, TEMP[4].zzzz, TEMP[7].xyzz 32: DP3 TEMP[4].x, IN[1].xyzz, TEMP[4].xyzz 33: MAX TEMP[4].x, IMM[0].yyyy, TEMP[4].xxxx 34: ADD TEMP[7].x, TEMP[6].xxxx, CONST[22].xxxx 35: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[7].xxxx 36: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[5].xyzz 37: MAD TEMP[5].x, TEMP[5].xxxx, CONST[8].zzzz, IMM[0].xxxx 38: RCP TEMP[5].x, TEMP[5].xxxx 39: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx 40: MAD TEMP[3], TEMP[2], TEMP[4].xxxx, TEMP[3] 41: MUL TEMP[4].xyz, TEMP[1].xyzz, CONST[5].wwww 42: ADD TEMP[4].xyz, CONST[5].xyzz, -TEMP[4].xyzz 43: MOV TEMP[5].xy, TEMP[4].xyxx 44: MOV TEMP[5].z, -TEMP[4].zzzz 45: DP3 TEMP[4].x, TEMP[5].xyzz, TEMP[5].xyzz 46: RSQ TEMP[4].x, TEMP[4].xxxx 47: MUL TEMP[4].xyz, TEMP[5].xyzz, TEMP[4].xxxx 48: MOV TEMP[2].xyz, CONST[1].xyzx 49: MUL TEMP[7].xyz, CONST[24].xyzz, TEMP[4].xxxx 50: MAD TEMP[7].xyz, CONST[25].xyzz, TEMP[4].yyyy, TEMP[7].xyzz 51: MAD TEMP[4].xyz, CONST[26].xyzz, TEMP[4].zzzz, TEMP[7].xyzz 52: DP3 TEMP[4].x, IN[1].xyzz, TEMP[4].xyzz 53: MAX TEMP[4].x, IMM[0].yyyy, TEMP[4].xxxx 54: ADD TEMP[7].x, TEMP[6].xxxx, CONST[22].xxxx 55: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[7].xxxx 56: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[5].xyzz 57: MAD TEMP[5].x, TEMP[5].xxxx, CONST[9].zzzz, IMM[0].xxxx 58: RCP TEMP[5].x, TEMP[5].xxxx 59: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx 60: MAD TEMP[3], TEMP[2], TEMP[4].xxxx, TEMP[3] 61: MUL TEMP[4].xyz, TEMP[1].xyzz, CONST[6].wwww 62: ADD TEMP[4].xyz, CONST[6].xyzz, -TEMP[4].xyzz 63: MOV TEMP[5].xy, TEMP[4].xyxx 64: MOV TEMP[5].z, -TEMP[4].zzzz 65: DP3 TEMP[4].x, TEMP[5].xyzz, TEMP[5].xyzz 66: RSQ TEMP[4].x, TEMP[4].xxxx 67: MUL TEMP[4].xyz, TEMP[5].xyzz, TEMP[4].xxxx 68: MOV TEMP[2].xyz, CONST[2].xyzx 69: MUL TEMP[7].xyz, CONST[24].xyzz, TEMP[4].xxxx 70: MAD TEMP[7].xyz, CONST[25].xyzz, TEMP[4].yyyy, TEMP[7].xyzz 71: MAD TEMP[4].xyz, CONST[26].xyzz, TEMP[4].zzzz, TEMP[7].xyzz 72: DP3 TEMP[4].x, IN[1].xyzz, TEMP[4].xyzz 73: MAX TEMP[4].x, IMM[0].yyyy, TEMP[4].xxxx 74: ADD TEMP[7].x, TEMP[6].xxxx, CONST[22].xxxx 75: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[7].xxxx 76: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[5].xyzz 77: MAD TEMP[5].x, TEMP[5].xxxx, CONST[10].zzzz, IMM[0].xxxx 78: RCP TEMP[5].x, TEMP[5].xxxx 79: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx 80: MAD TEMP[3], TEMP[2], TEMP[4].xxxx, TEMP[3] 81: MUL TEMP[1].xyz, TEMP[1].xyzz, CONST[7].wwww 82: ADD TEMP[1].xyz, CONST[7].xyzz, -TEMP[1].xyzz 83: MOV TEMP[4].xy, TEMP[1].xyxx 84: MOV TEMP[4].z, -TEMP[1].zzzz 85: DP3 TEMP[1].x, TEMP[4].xyzz, TEMP[4].xyzz 86: RSQ TEMP[1].x, TEMP[1].xxxx 87: MUL TEMP[1].xyz, TEMP[4].xyzz, TEMP[1].xxxx 88: MOV TEMP[2].xyz, CONST[3].xyzx 89: MUL TEMP[5].xyz, CONST[24].xyzz, TEMP[1].xxxx 90: MAD TEMP[5].xyz, CONST[25].xyzz, TEMP[1].yyyy, TEMP[5].xyzz 91: MAD TEMP[1].xyz, CONST[26].xyzz, TEMP[1].zzzz, TEMP[5].xyzz 92: DP3 TEMP[1].x, IN[1].xyzz, TEMP[1].xyzz 93: MAX TEMP[1].x, IMM[0].yyyy, TEMP[1].xxxx 94: ADD TEMP[5].x, TEMP[6].xxxx, CONST[22].xxxx 95: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[5].xxxx 96: DP3 TEMP[4].x, TEMP[4].xyzz, TEMP[4].xyzz 97: MAD TEMP[4].x, TEMP[4].xxxx, CONST[11].zzzz, IMM[0].xxxx 98: RCP TEMP[4].x, TEMP[4].xxxx 99: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[4].xxxx 100: MAD TEMP[1].xyz, TEMP[2], TEMP[1].xxxx, TEMP[3] 101: MOV TEMP[3].xyz, TEMP[1].xyzx 102: MOV TEMP[3].w, IMM[0].xxxx 103: MUL TEMP[1], TEMP[3], CONST[23] 104: MUL TEMP[1], TEMP[1], CONST[13] 105: MAD TEMP[2].x, TEMP[0].zzzz, CONST[12].zzzz, CONST[12].wwww 106: MOV OUT[1], IN[3] 107: MOV OUT[2], TEMP[1] 108: MOV OUT[3], TEMP[2] 109: MOV OUT[0], TEMP[0] 110: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 204) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280) %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304) %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308) %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312) %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316) %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320) %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 336) %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 352) %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 368) %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 372) %72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 376) %73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 380) %74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 384) %75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 388) %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 392) %77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 400) %78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 404) %79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 408) %80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 416) %81 = call float @llvm.SI.load.const(<16 x i8> %12, i32 420) %82 = call float @llvm.SI.load.const(<16 x i8> %12, i32 424) %83 = call float @llvm.SI.load.const(<16 x i8> %12, i32 448) %84 = call float @llvm.SI.load.const(<16 x i8> %12, i32 452) %85 = call float @llvm.SI.load.const(<16 x i8> %12, i32 456) %86 = call float @llvm.SI.load.const(<16 x i8> %12, i32 464) %87 = call float @llvm.SI.load.const(<16 x i8> %12, i32 468) %88 = call float @llvm.SI.load.const(<16 x i8> %12, i32 472) %89 = call float @llvm.SI.load.const(<16 x i8> %12, i32 476) %90 = call float @llvm.SI.load.const(<16 x i8> %12, i32 480) %91 = call float @llvm.SI.load.const(<16 x i8> %12, i32 484) %92 = call float @llvm.SI.load.const(<16 x i8> %12, i32 488) %93 = call float @llvm.SI.load.const(<16 x i8> %12, i32 492) %94 = call float @llvm.SI.load.const(<16 x i8> %12, i32 496) %95 = call float @llvm.SI.load.const(<16 x i8> %12, i32 500) %96 = call float @llvm.SI.load.const(<16 x i8> %12, i32 504) %97 = call float @llvm.SI.load.const(<16 x i8> %12, i32 508) %98 = call float @llvm.SI.load.const(<16 x i8> %12, i32 512) %99 = call float @llvm.SI.load.const(<16 x i8> %12, i32 516) %100 = call float @llvm.SI.load.const(<16 x i8> %12, i32 520) %101 = call float @llvm.SI.load.const(<16 x i8> %12, i32 524) %102 = call float @llvm.SI.load.const(<16 x i8> %12, i32 528) %103 = call float @llvm.SI.load.const(<16 x i8> %12, i32 532) %104 = call float @llvm.SI.load.const(<16 x i8> %12, i32 536) %105 = call float @llvm.SI.load.const(<16 x i8> %12, i32 544) %106 = call float @llvm.SI.load.const(<16 x i8> %12, i32 548) %107 = call float @llvm.SI.load.const(<16 x i8> %12, i32 552) %108 = call float @llvm.SI.load.const(<16 x i8> %12, i32 560) %109 = call float @llvm.SI.load.const(<16 x i8> %12, i32 564) %110 = call float @llvm.SI.load.const(<16 x i8> %12, i32 568) %111 = call float @llvm.SI.load.const(<16 x i8> %12, i32 576) %112 = call float @llvm.SI.load.const(<16 x i8> %12, i32 580) %113 = call float @llvm.SI.load.const(<16 x i8> %12, i32 584) %114 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %115 = load <16 x i8>, <16 x i8> addrspace(2)* %114, align 16, !tbaa !0 %116 = add i32 %5, %7 %117 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %115, i32 0, i32 %116) %118 = extractelement <4 x float> %117, i32 0 %119 = extractelement <4 x float> %117, i32 1 %120 = extractelement <4 x float> %117, i32 2 %121 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %122 = load <16 x i8>, <16 x i8> addrspace(2)* %121, align 16, !tbaa !0 %123 = add i32 %5, %7 %124 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %122, i32 0, i32 %123) %125 = extractelement <4 x float> %124, i32 0 %126 = extractelement <4 x float> %124, i32 1 %127 = extractelement <4 x float> %124, i32 2 %128 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %129 = load <16 x i8>, <16 x i8> addrspace(2)* %128, align 16, !tbaa !0 %130 = add i32 %5, %7 %131 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %129, i32 0, i32 %130) %132 = extractelement <4 x float> %131, i32 3 %133 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %134 = load <16 x i8>, <16 x i8> addrspace(2)* %133, align 16, !tbaa !0 %135 = add i32 %5, %7 %136 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %134, i32 0, i32 %135) %137 = extractelement <4 x float> %136, i32 0 %138 = extractelement <4 x float> %136, i32 1 %139 = extractelement <4 x float> %136, i32 2 %140 = extractelement <4 x float> %136, i32 3 %141 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4 %142 = load <16 x i8>, <16 x i8> addrspace(2)* %141, align 16, !tbaa !0 %143 = add i32 %5, %7 %144 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %142, i32 0, i32 %143) %145 = extractelement <4 x float> %144, i32 3 %146 = fmul float %118, %51 %147 = fmul float %119, %52 %148 = fmul float %120, %53 %149 = fmul float %54, %146 %150 = fmul float %55, %146 %151 = fmul float %56, %146 %152 = fmul float %57, %147 %153 = fadd float %152, %149 %154 = fmul float %58, %147 %155 = fadd float %154, %150 %156 = fmul float %59, %147 %157 = fadd float %156, %151 %158 = fmul float %60, %148 %159 = fadd float %158, %153 %160 = fmul float %61, %148 %161 = fadd float %160, %155 %162 = fmul float %62, %148 %163 = fadd float %162, %157 %164 = call float @llvm.AMDGPU.lrp(float %132, float %159, float %146) %165 = call float @llvm.AMDGPU.lrp(float %132, float %161, float %147) %166 = call float @llvm.AMDGPU.lrp(float %132, float %163, float %148) %167 = fmul float %63, %164 %168 = fmul float %64, %165 %169 = fadd float %168, %167 %170 = fmul float %65, %166 %171 = fadd float %169, %170 %172 = fadd float %171, %66 %173 = fmul float %172, %63 %174 = fmul float %172, %64 %175 = fmul float %172, %65 %176 = fsub float %164, %173 %177 = fsub float %165, %174 %178 = fsub float %166, %175 %179 = call float @llvm.AMDGPU.lrp(float %67, float %164, float %176) %180 = call float @llvm.AMDGPU.lrp(float %67, float %165, float %177) %181 = call float @llvm.AMDGPU.lrp(float %67, float %166, float %178) %182 = fmul float %102, %179 %183 = fmul float %103, %179 %184 = fmul float %104, %179 %185 = fmul float %105, %180 %186 = fadd float %185, %182 %187 = fmul float %106, %180 %188 = fadd float %187, %183 %189 = fmul float %107, %180 %190 = fadd float %189, %184 %191 = fmul float %108, %181 %192 = fadd float %191, %186 %193 = fmul float %109, %181 %194 = fadd float %193, %188 %195 = fmul float %110, %181 %196 = fadd float %195, %190 %197 = fadd float %192, %111 %198 = fadd float %194, %112 %199 = fadd float %196, %113 %200 = fmul float %86, %179 %201 = fmul float %87, %179 %202 = fmul float %88, %179 %203 = fmul float %89, %179 %204 = fmul float %90, %180 %205 = fadd float %204, %200 %206 = fmul float %91, %180 %207 = fadd float %206, %201 %208 = fmul float %92, %180 %209 = fadd float %208, %202 %210 = fmul float %93, %180 %211 = fadd float %210, %203 %212 = fmul float %94, %181 %213 = fadd float %212, %205 %214 = fmul float %95, %181 %215 = fadd float %214, %207 %216 = fmul float %96, %181 %217 = fadd float %216, %209 %218 = fmul float %97, %181 %219 = fadd float %218, %211 %220 = fadd float %213, %98 %221 = fadd float %215, %99 %222 = fadd float %217, %100 %223 = fadd float %219, %101 %224 = fmul float %83, 2.000000e+00 %225 = fmul float %84, 2.000000e+00 %226 = fmul float %85, 2.000000e+00 %227 = fmul float %197, %28 %228 = fmul float %198, %28 %229 = fmul float %199, %28 %230 = fsub float %25, %227 %231 = fsub float %26, %228 %232 = fsub float %27, %229 %233 = fmul float %230, %230 %234 = fmul float %231, %231 %235 = fadd float %234, %233 %236 = fmul float %232, %232 %237 = fadd float %235, %236 %238 = call float @llvm.AMDGPU.rsq.clamped.f32(float %237) %239 = fmul float %230, %238 %240 = fmul float %231, %238 %241 = fmul float %232, %238 %242 = fsub float -0.000000e+00, %241 %243 = fmul float %68, %145 %244 = fmul float %74, %239 %245 = fmul float %75, %239 %246 = fmul float %76, %239 %247 = fmul float %77, %240 %248 = fadd float %247, %244 %249 = fmul float %78, %240 %250 = fadd float %249, %245 %251 = fmul float %79, %240 %252 = fadd float %251, %246 %253 = fmul float %80, %242 %254 = fadd float %253, %248 %255 = fmul float %81, %242 %256 = fadd float %255, %250 %257 = fmul float %82, %242 %258 = fadd float %257, %252 %259 = fmul float %125, %254 %260 = fmul float %126, %256 %261 = fadd float %260, %259 %262 = fmul float %127, %258 %263 = fadd float %261, %262 %264 = call float @llvm.maxnum.f32(float %263, float 0.000000e+00) %265 = fadd float %243, %69 %266 = fmul float %264, %265 %267 = fmul float %230, %230 %268 = fmul float %231, %231 %269 = fadd float %268, %267 %270 = fmul float %232, %232 %271 = fadd float %269, %270 %272 = fmul float %271, %41 %273 = fadd float %272, 1.000000e+00 %274 = fdiv float 1.000000e+00, %273 %275 = fmul float %266, %274 %276 = fmul float %13, %275 %277 = fadd float %276, %224 %278 = fmul float %14, %275 %279 = fadd float %278, %225 %280 = fmul float %15, %275 %281 = fadd float %280, %226 %282 = fmul float %197, %32 %283 = fmul float %198, %32 %284 = fmul float %199, %32 %285 = fsub float %29, %282 %286 = fsub float %30, %283 %287 = fsub float %31, %284 %288 = fmul float %285, %285 %289 = fmul float %286, %286 %290 = fadd float %289, %288 %291 = fmul float %287, %287 %292 = fadd float %290, %291 %293 = call float @llvm.AMDGPU.rsq.clamped.f32(float %292) %294 = fmul float %285, %293 %295 = fmul float %286, %293 %296 = fmul float %287, %293 %297 = fsub float -0.000000e+00, %296 %298 = fmul float %74, %294 %299 = fmul float %75, %294 %300 = fmul float %76, %294 %301 = fmul float %77, %295 %302 = fadd float %301, %298 %303 = fmul float %78, %295 %304 = fadd float %303, %299 %305 = fmul float %79, %295 %306 = fadd float %305, %300 %307 = fmul float %80, %297 %308 = fadd float %307, %302 %309 = fmul float %81, %297 %310 = fadd float %309, %304 %311 = fmul float %82, %297 %312 = fadd float %311, %306 %313 = fmul float %125, %308 %314 = fmul float %126, %310 %315 = fadd float %314, %313 %316 = fmul float %127, %312 %317 = fadd float %315, %316 %318 = call float @llvm.maxnum.f32(float %317, float 0.000000e+00) %319 = fadd float %243, %69 %320 = fmul float %318, %319 %321 = fmul float %285, %285 %322 = fmul float %286, %286 %323 = fadd float %322, %321 %324 = fmul float %287, %287 %325 = fadd float %323, %324 %326 = fmul float %325, %42 %327 = fadd float %326, 1.000000e+00 %328 = fdiv float 1.000000e+00, %327 %329 = fmul float %320, %328 %330 = fmul float %16, %329 %331 = fadd float %330, %277 %332 = fmul float %17, %329 %333 = fadd float %332, %279 %334 = fmul float %18, %329 %335 = fadd float %334, %281 %336 = fmul float %197, %36 %337 = fmul float %198, %36 %338 = fmul float %199, %36 %339 = fsub float %33, %336 %340 = fsub float %34, %337 %341 = fsub float %35, %338 %342 = fmul float %339, %339 %343 = fmul float %340, %340 %344 = fadd float %343, %342 %345 = fmul float %341, %341 %346 = fadd float %344, %345 %347 = call float @llvm.AMDGPU.rsq.clamped.f32(float %346) %348 = fmul float %339, %347 %349 = fmul float %340, %347 %350 = fmul float %341, %347 %351 = fsub float -0.000000e+00, %350 %352 = fmul float %74, %348 %353 = fmul float %75, %348 %354 = fmul float %76, %348 %355 = fmul float %77, %349 %356 = fadd float %355, %352 %357 = fmul float %78, %349 %358 = fadd float %357, %353 %359 = fmul float %79, %349 %360 = fadd float %359, %354 %361 = fmul float %80, %351 %362 = fadd float %361, %356 %363 = fmul float %81, %351 %364 = fadd float %363, %358 %365 = fmul float %82, %351 %366 = fadd float %365, %360 %367 = fmul float %125, %362 %368 = fmul float %126, %364 %369 = fadd float %368, %367 %370 = fmul float %127, %366 %371 = fadd float %369, %370 %372 = call float @llvm.maxnum.f32(float %371, float 0.000000e+00) %373 = fadd float %243, %69 %374 = fmul float %372, %373 %375 = fmul float %339, %339 %376 = fmul float %340, %340 %377 = fadd float %376, %375 %378 = fmul float %341, %341 %379 = fadd float %377, %378 %380 = fmul float %379, %43 %381 = fadd float %380, 1.000000e+00 %382 = fdiv float 1.000000e+00, %381 %383 = fmul float %374, %382 %384 = fmul float %19, %383 %385 = fadd float %384, %331 %386 = fmul float %20, %383 %387 = fadd float %386, %333 %388 = fmul float %21, %383 %389 = fadd float %388, %335 %390 = fmul float %197, %40 %391 = fmul float %198, %40 %392 = fmul float %199, %40 %393 = fsub float %37, %390 %394 = fsub float %38, %391 %395 = fsub float %39, %392 %396 = fmul float %393, %393 %397 = fmul float %394, %394 %398 = fadd float %397, %396 %399 = fmul float %395, %395 %400 = fadd float %398, %399 %401 = call float @llvm.AMDGPU.rsq.clamped.f32(float %400) %402 = fmul float %393, %401 %403 = fmul float %394, %401 %404 = fmul float %395, %401 %405 = fsub float -0.000000e+00, %404 %406 = fmul float %74, %402 %407 = fmul float %75, %402 %408 = fmul float %76, %402 %409 = fmul float %77, %403 %410 = fadd float %409, %406 %411 = fmul float %78, %403 %412 = fadd float %411, %407 %413 = fmul float %79, %403 %414 = fadd float %413, %408 %415 = fmul float %80, %405 %416 = fadd float %415, %410 %417 = fmul float %81, %405 %418 = fadd float %417, %412 %419 = fmul float %82, %405 %420 = fadd float %419, %414 %421 = fmul float %125, %416 %422 = fmul float %126, %418 %423 = fadd float %422, %421 %424 = fmul float %127, %420 %425 = fadd float %423, %424 %426 = call float @llvm.maxnum.f32(float %425, float 0.000000e+00) %427 = fadd float %243, %69 %428 = fmul float %426, %427 %429 = fmul float %393, %393 %430 = fmul float %394, %394 %431 = fadd float %430, %429 %432 = fmul float %395, %395 %433 = fadd float %431, %432 %434 = fmul float %433, %44 %435 = fadd float %434, 1.000000e+00 %436 = fdiv float 1.000000e+00, %435 %437 = fmul float %428, %436 %438 = fmul float %22, %437 %439 = fadd float %438, %385 %440 = fmul float %23, %437 %441 = fadd float %440, %387 %442 = fmul float %24, %437 %443 = fadd float %442, %389 %444 = fmul float %439, %70 %445 = fmul float %441, %71 %446 = fmul float %443, %72 %447 = fmul float %444, %47 %448 = fmul float %445, %48 %449 = fmul float %446, %49 %450 = fmul float %73, %50 %451 = fmul float %222, %45 %452 = fadd float %451, %46 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %137, float %138, float %139, float %140) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %447, float %448, float %449, float %450) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %452, float %23, float %24, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %220, float %221, float %222, float %223) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[52:55], s[2:3], 0x0 ; C09A0300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 s_load_dwordx4 s[20:23], s[8:9], 0xc ; C08A090C s_load_dwordx4 s[8:11], s[8:9], 0x10 ; C0840910 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s1, s[52:55], 0x35 ; C200B535 buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 s_buffer_load_dword s0, s[52:55], 0x36 ; C2003536 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[4:7], v0, s[12:15], 0 idxen ; E00C2000 80030400 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[7:10], v0, s[16:19], 0 idxen ; E00C2000 80040700 buffer_load_format_xyzw v[11:14], v0, s[20:23], 0 idxen ; E00C2000 80050B00 buffer_load_format_xyzw v[15:18], v0, s[8:11], 0 idxen ; E00C2000 80020F00 s_buffer_load_dword s2, s[52:55], 0x37 ; C2013537 s_buffer_load_dword s56, s[52:55], 0x38 ; C21C3538 s_buffer_load_dword s57, s[52:55], 0x39 ; C21CB539 s_buffer_load_dword s58, s[52:55], 0x3a ; C21D353A s_buffer_load_dword s59, s[52:55], 0x3c ; C21DB53C s_buffer_load_dword s60, s[52:55], 0x3d ; C21E353D s_buffer_load_dword s61, s[52:55], 0x3e ; C21EB53E s_buffer_load_dword s62, s[52:55], 0x40 ; C21F3540 s_buffer_load_dword s63, s[52:55], 0x41 ; C21FB541 s_buffer_load_dword s64, s[52:55], 0x42 ; C2203542 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v0, s2 ; 7E000202 s_buffer_load_dword s65, s[52:55], 0x44 ; C220B544 s_buffer_load_dword s66, s[52:55], 0x45 ; C2213545 s_buffer_load_dword s67, s[52:55], 0x46 ; C221B546 s_buffer_load_dword s68, s[52:55], 0x4c ; C222354C s_buffer_load_dword s69, s[52:55], 0x4d ; C222B54D s_buffer_load_dword s70, s[52:55], 0x4e ; C223354E s_buffer_load_dword s71, s[52:55], 0x4f ; C223B54F s_buffer_load_dword s72, s[52:55], 0x50 ; C2243550 s_buffer_load_dword s3, s[52:55], 0x80 ; C201B580 s_buffer_load_dword s2, s[52:55], 0x81 ; C2013581 s_buffer_load_dword s7, s[52:55], 0x82 ; C203B582 s_buffer_load_dword s4, s[52:55], 0x83 ; C2023583 s_buffer_load_dword s73, s[52:55], 0x84 ; C224B584 s_buffer_load_dword s74, s[52:55], 0x85 ; C2253585 s_buffer_load_dword s75, s[52:55], 0x86 ; C225B586 s_buffer_load_dword s76, s[52:55], 0x88 ; C2263588 s_buffer_load_dword s77, s[52:55], 0x89 ; C226B589 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e64 v7, 1.0, s72 ; D2080007 000090F2 s_buffer_load_dword s78, s[52:55], 0x8a ; C227358A s_buffer_load_dword s79, s[52:55], 0x8c ; C227B58C s_buffer_load_dword s80, s[52:55], 0x8d ; C228358D s_buffer_load_dword s81, s[52:55], 0x8e ; C228B58E s_buffer_load_dword s82, s[52:55], 0x90 ; C2293590 s_buffer_load_dword s83, s[52:55], 0x91 ; C229B591 s_buffer_load_dword s6, s[52:55], 0xd ; C203350D s_buffer_load_dword s5, s[52:55], 0xe ; C202B50E s_buffer_load_dword s8, s[52:55], 0x10 ; C2043510 s_buffer_load_dword s9, s[52:55], 0x11 ; C204B511 s_buffer_load_dword s10, s[52:55], 0x12 ; C2053512 s_buffer_load_dword s84, s[52:55], 0x13 ; C22A3513 s_buffer_load_dword s11, s[52:55], 0x14 ; C205B514 s_buffer_load_dword s12, s[52:55], 0x15 ; C2063515 s_buffer_load_dword s13, s[52:55], 0x16 ; C206B516 s_buffer_load_dword s51, s[52:55], 0x17 ; C219B517 s_buffer_load_dword s85, s[52:55], 0x92 ; C22AB592 s_buffer_load_dword s16, s[52:55], 0x18 ; C2083518 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v8, s8 ; 7E100208 s_buffer_load_dword s8, s[52:55], 0x19 ; C2043519 v_mov_b32_e32 v9, s9 ; 7E120209 s_buffer_load_dword s9, s[52:55], 0x1a ; C204B51A v_mov_b32_e32 v15, s10 ; 7E1E020A s_buffer_load_dword s50, s[52:55], 0x1b ; C219351B s_buffer_load_dword s10, s[52:55], 0x1c ; C205351C v_mov_b32_e32 v16, s11 ; 7E20020B s_buffer_load_dword s11, s[52:55], 0x1d ; C205B51D v_mov_b32_e32 v17, s12 ; 7E22020C s_buffer_load_dword s12, s[52:55], 0x1e ; C206351E v_mov_b32_e32 v19, s13 ; 7E26020D s_buffer_load_dword s49, s[52:55], 0x1f ; C218B51F s_buffer_load_dword s15, s[52:55], 0x22 ; C207B522 s_buffer_load_dword s14, s[52:55], 0x26 ; C2073526 v_mov_b32_e32 v20, s16 ; 7E280210 s_buffer_load_dword s13, s[52:55], 0x5f ; C206B55F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v21, s8 ; 7E2A0208 s_buffer_load_dword s42, s[52:55], 0x60 ; C2153560 v_mov_b32_e32 v22, s9 ; 7E2C0209 s_buffer_load_dword s41, s[52:55], 0x61 ; C214B561 s_buffer_load_dword s40, s[52:55], 0x62 ; C2143562 v_mov_b32_e32 v23, s10 ; 7E2E020A s_buffer_load_dword s37, s[52:55], 0x64 ; C212B564 v_mov_b32_e32 v24, s11 ; 7E30020B s_buffer_load_dword s36, s[52:55], 0x65 ; C2123565 v_mov_b32_e32 v25, s12 ; 7E32020C s_buffer_load_dword s35, s[52:55], 0x66 ; C211B566 s_buffer_load_dword s32, s[52:55], 0x68 ; C2103568 s_buffer_load_dword s30, s[52:55], 0x69 ; C20F3569 s_buffer_load_dword s31, s[52:55], 0x6a ; C20FB56A v_mul_f32_e32 v0, s13, v0 ; 1000000D s_buffer_load_dword s17, s[52:55], 0x2a ; C208B52A s_buffer_load_dword s13, s[52:55], 0x2e ; C206B52E s_buffer_load_dword s12, s[52:55], 0x32 ; C2063532 s_buffer_load_dword s16, s[52:55], 0x33 ; C2083533 s_buffer_load_dword s8, s[52:55], 0x34 ; C2043534 s_buffer_load_dword s29, s[52:55], 0x54 ; C20EB554 s_buffer_load_dword s18, s[52:55], 0x58 ; C2093558 s_buffer_load_dword s11, s[52:55], 0x5c ; C205B55C s_buffer_load_dword s10, s[52:55], 0x5d ; C205355D s_buffer_load_dword s9, s[52:55], 0x5e ; C204B55E s_buffer_load_dword s19, s[52:55], 0x70 ; C209B570 s_buffer_load_dword s20, s[52:55], 0x71 ; C20A3571 s_buffer_load_dword s23, s[52:55], 0x72 ; C20BB572 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v26, s16 ; 7E340210 s_buffer_load_dword s28, s[52:55], 0x74 ; C20E3574 s_buffer_load_dword s27, s[52:55], 0x75 ; C20DB575 s_buffer_load_dword s24, s[52:55], 0x0 ; C20C3500 v_mov_b32_e32 v27, s18 ; 7E360212 s_buffer_load_dword s25, s[52:55], 0x1 ; C20CB501 s_buffer_load_dword s26, s[52:55], 0x2 ; C20D3502 s_buffer_load_dword s21, s[52:55], 0x4 ; C20AB504 s_buffer_load_dword s22, s[52:55], 0x5 ; C20B3505 v_add_f32_e64 v28, s19, s19 ; D206001C 00002613 v_add_f32_e64 v29, s20, s20 ; D206001D 00002814 v_add_f32_e64 v30, s23, s23 ; D206001E 00002E17 s_buffer_load_dword s23, s[52:55], 0x6 ; C20BB506 s_buffer_load_dword s18, s[52:55], 0x8 ; C2093508 s_buffer_load_dword s19, s[52:55], 0x9 ; C209B509 s_buffer_load_dword s20, s[52:55], 0xa ; C20A350A s_buffer_load_dword s16, s[52:55], 0xc ; C208350C s_buffer_load_dword s44, s[52:55], 0x76 ; C2163576 s_buffer_load_dword s45, s[52:55], 0x77 ; C216B577 s_buffer_load_dword s46, s[52:55], 0x78 ; C2173578 s_buffer_load_dword s47, s[52:55], 0x79 ; C217B579 s_buffer_load_dword s48, s[52:55], 0x7a ; C218357A s_buffer_load_dword s43, s[52:55], 0x7b ; C215B57B s_buffer_load_dword s38, s[52:55], 0x7c ; C213357C s_buffer_load_dword s33, s[52:55], 0x7d ; C210B57D s_buffer_load_dword s39, s[52:55], 0x7e ; C213B57E s_buffer_load_dword s34, s[52:55], 0x7f ; C211357F v_mul_f32_e32 v1, s56, v1 ; 10020238 v_mul_f32_e32 v2, s57, v2 ; 10040439 v_mul_f32_e32 v3, s58, v3 ; 1006063A v_mul_f32_e32 v31, s59, v1 ; 103E023B v_mul_f32_e32 v32, s60, v1 ; 1040023C v_mul_f32_e32 v33, s61, v1 ; 1042023D v_mac_f32_e32 v31, s62, v2 ; 3E3E043E v_mac_f32_e32 v32, s63, v2 ; 3E40043F v_mac_f32_e32 v33, s64, v2 ; 3E420440 v_mac_f32_e32 v31, s65, v3 ; 3E3E0641 v_mac_f32_e32 v32, s66, v3 ; 3E400642 v_mac_f32_e32 v33, s67, v3 ; 3E420643 v_sub_f32_e32 v34, 1.0, v10 ; 084414F2 v_mul_f32_e32 v1, v1, v34 ; 10024501 v_mul_f32_e32 v2, v2, v34 ; 10044502 v_mul_f32_e32 v3, v3, v34 ; 10064503 v_mac_f32_e32 v1, v31, v10 ; 3E02151F v_mac_f32_e32 v2, v32, v10 ; 3E041520 v_mac_f32_e32 v3, v33, v10 ; 3E061521 v_mul_f32_e32 v10, s68, v1 ; 10140244 v_mac_f32_e32 v10, s69, v2 ; 3E140445 v_mac_f32_e32 v10, s70, v3 ; 3E140646 v_add_f32_e32 v10, s71, v10 ; 06141447 v_mad_f32 v31, -v10, s68, v1 ; D282001F 2404890A v_mad_f32 v32, -v10, s69, v2 ; D2820020 24088B0A v_mad_f32 v10, -v10, s70, v3 ; D282000A 240C8D0A v_mul_f32_e32 v31, v31, v7 ; 103E0F1F v_mul_f32_e32 v32, v32, v7 ; 10400F20 v_mul_f32_e32 v7, v10, v7 ; 100E0F0A v_mac_f32_e32 v31, s72, v1 ; 3E3E0248 v_mac_f32_e32 v32, s72, v2 ; 3E400448 v_mac_f32_e32 v7, s72, v3 ; 3E0E0648 v_mul_f32_e32 v1, s73, v31 ; 10023E49 v_mul_f32_e32 v2, s74, v31 ; 10043E4A v_mul_f32_e32 v3, s75, v31 ; 10063E4B v_mac_f32_e32 v1, s76, v32 ; 3E02404C v_mac_f32_e32 v2, s77, v32 ; 3E04404D v_mac_f32_e32 v3, s78, v32 ; 3E06404E v_mac_f32_e32 v1, s79, v7 ; 3E020E4F v_mac_f32_e32 v2, s80, v7 ; 3E040E50 v_mac_f32_e32 v3, s81, v7 ; 3E060E51 v_add_f32_e32 v1, s82, v1 ; 06020252 v_add_f32_e32 v2, s83, v2 ; 06040453 v_add_f32_e32 v3, s85, v3 ; 06060655 v_mad_f32 v8, -v1, s84, v8 ; D2820008 2420A901 v_mad_f32 v9, -v2, s84, v9 ; D2820009 2424A902 v_mad_f32 v10, -v3, s84, v15 ; D282000A 243CA903 v_mad_f32 v15, -v1, s51, v16 ; D282000F 24406701 v_mad_f32 v16, -v2, s51, v17 ; D2820010 24446702 v_mad_f32 v17, -v3, s51, v19 ; D2820011 244C6703 v_mad_f32 v19, -v1, s50, v20 ; D2820013 24506501 v_mad_f32 v20, -v2, s50, v21 ; D2820014 24546502 v_mad_f32 v21, -v3, s50, v22 ; D2820015 24586503 v_mad_f32 v1, -v1, s49, v23 ; D2820001 245C6301 v_mad_f32 v2, -v2, s49, v24 ; D2820002 24606302 v_mad_f32 v3, -v3, s49, v25 ; D2820003 24646303 v_mul_f32_e32 v22, v8, v8 ; 102C1108 v_mac_f32_e32 v22, v9, v9 ; 3E2C1309 v_mul_f32_e32 v23, v15, v15 ; 102E1F0F v_mac_f32_e32 v23, v16, v16 ; 3E2E2110 v_mul_f32_e32 v24, v19, v19 ; 10302713 v_mac_f32_e32 v24, v20, v20 ; 3E302914 v_mul_f32_e32 v25, v1, v1 ; 10320301 v_mac_f32_e32 v25, v2, v2 ; 3E320502 v_mac_f32_e32 v22, v10, v10 ; 3E2C150A v_mac_f32_e32 v23, v17, v17 ; 3E2E2311 v_mac_f32_e32 v24, v21, v21 ; 3E302B15 v_mac_f32_e32 v25, v3, v3 ; 3E320703 v_rsq_clamp_f32_e32 v33, v22 ; 7E425916 v_rsq_clamp_f32_e32 v34, v23 ; 7E445917 v_rsq_clamp_f32_e32 v35, v24 ; 7E465918 v_rsq_clamp_f32_e32 v36, v25 ; 7E485919 v_mul_f32_e32 v8, v33, v8 ; 10101121 v_mul_f32_e32 v15, v34, v15 ; 101E1F22 v_mul_f32_e32 v19, v35, v19 ; 10262723 v_mul_f32_e32 v1, v36, v1 ; 10020324 v_mul_f32_e32 v37, s42, v8 ; 104A102A v_mul_f32_e32 v38, s42, v15 ; 104C1E2A v_mul_f32_e32 v39, s42, v19 ; 104E262A v_mul_f32_e32 v40, s42, v1 ; 1050022A v_mul_f32_e32 v41, s41, v8 ; 10521029 v_mul_f32_e32 v42, s41, v15 ; 10541E29 v_mul_f32_e32 v43, s41, v19 ; 10562629 v_mul_f32_e32 v44, s41, v1 ; 10580229 v_mul_f32_e32 v8, s40, v8 ; 10101028 v_mul_f32_e32 v15, s40, v15 ; 101E1E28 v_mul_f32_e32 v19, s40, v19 ; 10262628 v_mul_f32_e32 v1, s40, v1 ; 10020228 v_mul_f32_e32 v9, v33, v9 ; 10121321 v_mul_f32_e32 v16, v34, v16 ; 10202122 v_mul_f32_e32 v20, v35, v20 ; 10282923 v_mul_f32_e32 v2, v36, v2 ; 10040524 v_mac_f32_e32 v37, s37, v9 ; 3E4A1225 v_mac_f32_e32 v38, s37, v16 ; 3E4C2025 v_mac_f32_e32 v39, s37, v20 ; 3E4E2825 v_mac_f32_e32 v40, s37, v2 ; 3E500425 v_mac_f32_e32 v41, s36, v9 ; 3E521224 v_mac_f32_e32 v42, s36, v16 ; 3E542024 v_mac_f32_e32 v43, s36, v20 ; 3E562824 v_mac_f32_e32 v44, s36, v2 ; 3E580424 v_mul_f32_e32 v10, v33, v10 ; 10141521 v_mul_f32_e32 v17, v34, v17 ; 10222322 v_mul_f32_e32 v21, v35, v21 ; 102A2B23 v_mul_f32_e32 v3, v36, v3 ; 10060724 v_mac_f32_e32 v8, s35, v9 ; 3E101223 v_mac_f32_e32 v15, s35, v16 ; 3E1E2023 v_mac_f32_e32 v19, s35, v20 ; 3E262823 v_mac_f32_e32 v1, s35, v2 ; 3E020423 v_mad_f32 v2, -s32, v10, v37 ; D2820002 24961420 v_mad_f32 v9, -s30, v10, v41 ; D2820009 24A6141E v_mad_f32 v8, -s31, v10, v8 ; D2820008 2422141F v_mad_f32 v10, -s32, v17, v38 ; D282000A 249A2220 v_mad_f32 v16, -s30, v17, v42 ; D2820010 24AA221E v_mad_f32 v15, -s31, v17, v15 ; D282000F 243E221F v_mad_f32 v17, -s32, v21, v39 ; D2820011 249E2A20 v_mad_f32 v20, -s30, v21, v43 ; D2820014 24AE2A1E v_mad_f32 v19, -s31, v21, v19 ; D2820013 244E2A1F v_mad_f32 v21, -s32, v3, v40 ; D2820015 24A20620 v_mad_f32 v33, -s30, v3, v44 ; D2820021 24B2061E v_mad_f32 v1, -s31, v3, v1 ; D2820001 2406061F v_mul_f32_e32 v2, v2, v4 ; 10040902 v_mac_f32_e32 v2, v9, v5 ; 3E040B09 v_mul_f32_e32 v3, v10, v4 ; 1006090A v_mac_f32_e32 v3, v16, v5 ; 3E060B10 v_mul_f32_e32 v9, v17, v4 ; 10120911 v_mac_f32_e32 v9, v20, v5 ; 3E120B14 v_mul_f32_e32 v4, v21, v4 ; 10080915 v_mac_f32_e32 v4, v33, v5 ; 3E080B21 v_mac_f32_e32 v2, v8, v6 ; 3E040D08 v_mac_f32_e32 v3, v15, v6 ; 3E060D0F v_mac_f32_e32 v9, v19, v6 ; 3E120D13 v_mac_f32_e32 v4, v1, v6 ; 3E080D01 v_mac_f32_e32 v27, s29, v18 ; 3E36241D exp 15, 32, 0, 0, 0, v11, v12, v13, v14 ; F800020F 0E0D0C0B s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s28, v31 ; 10023E1C v_mul_f32_e32 v5, s27, v31 ; 100A3E1B v_mul_f32_e32 v6, s44, v31 ; 100C3E2C v_mul_f32_e32 v8, s45, v31 ; 10103E2D v_mac_f32_e32 v1, s46, v32 ; 3E02402E v_mac_f32_e32 v5, s47, v32 ; 3E0A402F v_mac_f32_e32 v6, s48, v32 ; 3E0C4030 v_mad_f32 v10, v22, s15, 1.0 ; D282000A 03C81F16 v_rcp_f32_e32 v10, v10 ; 7E14550A v_mac_f32_e32 v8, s43, v32 ; 3E10402B v_max_f32_e32 v2, 0, v2 ; 20040480 v_mul_f32_e32 v2, v27, v2 ; 1004051B v_mul_f32_e32 v2, v10, v2 ; 1004050A v_mac_f32_e32 v28, s24, v2 ; 3E380418 v_mac_f32_e32 v29, s25, v2 ; 3E3A0419 v_mac_f32_e32 v30, s26, v2 ; 3E3C041A v_mad_f32 v2, v23, s14, 1.0 ; D2820002 03C81D17 v_rcp_f32_e32 v2, v2 ; 7E045502 v_mad_f32 v10, v24, s17, 1.0 ; D282000A 03C82318 v_max_f32_e32 v3, 0, v3 ; 20060680 v_mul_f32_e32 v3, v27, v3 ; 1006071B v_mul_f32_e32 v2, v2, v3 ; 10040702 v_mac_f32_e32 v28, s21, v2 ; 3E380415 v_mac_f32_e32 v29, s22, v2 ; 3E3A0416 v_rcp_f32_e32 v3, v10 ; 7E06550A v_mac_f32_e32 v30, s23, v2 ; 3E3C0417 v_max_f32_e32 v2, 0, v9 ; 20041280 v_mul_f32_e32 v2, v27, v2 ; 1004051B v_mul_f32_e32 v2, v3, v2 ; 10040503 v_mac_f32_e32 v28, s18, v2 ; 3E380412 v_mac_f32_e32 v29, s19, v2 ; 3E3A0413 v_mac_f32_e32 v30, s20, v2 ; 3E3C0414 v_mac_f32_e32 v1, s38, v7 ; 3E020E26 v_mac_f32_e32 v6, s39, v7 ; 3E0C0E27 v_add_f32_e32 v2, s7, v6 ; 06040C07 v_mad_f32 v3, v25, s13, 1.0 ; D2820003 03C81B19 v_rcp_f32_e32 v3, v3 ; 7E065503 v_mac_f32_e32 v26, s12, v2 ; 3E34040C v_max_f32_e32 v4, 0, v4 ; 20080880 v_mul_f32_e32 v4, v27, v4 ; 1008091B v_mul_f32_e32 v3, v3, v4 ; 10060903 v_mac_f32_e32 v28, s16, v3 ; 3E380610 v_mul_f32_e32 v4, s11, v28 ; 1008380B v_mac_f32_e32 v29, s6, v3 ; 3E3A0606 v_mul_f32_e32 v6, s10, v29 ; 100C3A0A v_mac_f32_e32 v30, s5, v3 ; 3E3C0605 v_mul_f32_e32 v3, s9, v30 ; 10063C09 v_mul_f32_e32 v4, s8, v4 ; 10080808 v_mul_f32_e32 v6, s1, v6 ; 100C0C01 v_mov_b32_e32 v9, s6 ; 7E120206 v_mov_b32_e32 v10, s5 ; 7E140205 v_mul_f32_e32 v3, s0, v3 ; 10060600 exp 15, 33, 0, 0, 0, v4, v6, v3, v0 ; F800021F 00030604 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v0, 0 ; 7E000280 exp 15, 34, 0, 0, 0, v26, v9, v10, v0 ; F800022F 000A091A v_mac_f32_e32 v5, s33, v7 ; 3E0A0E21 v_mac_f32_e32 v8, s34, v7 ; 3E100E22 s_waitcnt expcnt(0) ; BF8C070F v_add_f32_e32 v0, s3, v1 ; 06000203 v_add_f32_e32 v1, s2, v5 ; 06020A02 v_add_f32_e32 v3, s4, v8 ; 06061004 exp 15, 12, 0, 1, 0, v0, v1, v2, v3 ; F80008CF 03020100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 88 VGPRS: 48 Code Size: 1536 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[0] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[1].xyz, TEMP[0], SAMP[0], 2D 2: MUL TEMP[0].xyz, IN[1].xyzz, TEMP[1].xyzz 3: MOV_SAT TEMP[1].x, IN[2].xxxx 4: LRP TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[0].xyzz 5: MOV TEMP[0].w, IMM[0].xxxx 6: MOV OUT[0], TEMP[0] 7: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %28 = load <32 x i8>, <32 x i8> addrspace(2)* %27, align 32, !tbaa !0 %29 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %32 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %35 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %36 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %37 = bitcast float %31 to i32 %38 = bitcast float %32 to i32 %39 = insertelement <2 x i32> undef, i32 %37, i32 0 %40 = insertelement <2 x i32> %39, i32 %38, i32 1 %41 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %40, <32 x i8> %28, <16 x i8> %30, i32 2) %42 = extractelement <4 x float> %41, i32 0 %43 = extractelement <4 x float> %41, i32 1 %44 = extractelement <4 x float> %41, i32 2 %45 = fmul float %33, %42 %46 = fmul float %34, %43 %47 = fmul float %35, %44 %48 = call float @llvm.AMDIL.clamp.(float %36, float 0.000000e+00, float 1.000000e+00) %49 = call float @llvm.AMDGPU.lrp(float %48, float %45, float %24) %50 = call float @llvm.AMDGPU.lrp(float %48, float %46, float %25) %51 = call float @llvm.AMDGPU.lrp(float %48, float %47, float %26) %52 = call i32 @llvm.SI.packf16(float %49, float %50) %53 = bitcast i32 %52 to float %54 = call i32 @llvm.SI.packf16(float %51, float 1.000000e+00) %55 = bitcast i32 %54 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %53, float %55, float %53, float %55) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401 v_interp_p1_f32 v5, v0, 1, 1, [m0] ; C8140500 v_interp_p2_f32 v5, [v5], v1, 1, 1, [m0] ; C8150501 v_interp_p1_f32 v6, v0, 2, 1, [m0] ; C8180600 v_interp_p2_f32 v6, [v6], v1, 2, 1, [m0] ; C8190601 v_interp_p1_f32 v0, v0, 0, 2, [m0] ; C8000800 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 v_interp_p2_f32 v0, [v0], v1, 0, 2, [m0] ; C8010801 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 image_sample v[1:3], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[8:11] ; F0800700 00430102 s_buffer_load_dword s0, s[0:3], 0x2 ; C2000102 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v1, v1, v4 ; 10020901 v_mul_f32_e32 v2, v2, v5 ; 10040B02 v_mul_f32_e32 v3, v3, v6 ; 10060D03 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_sub_f32_e32 v4, 1.0, v0 ; 080800F2 v_mul_f32_e32 v5, s4, v4 ; 100A0804 v_mac_f32_e32 v5, v1, v0 ; 3E0A0101 v_mul_f32_e32 v1, s5, v4 ; 10020805 v_mac_f32_e32 v1, v2, v0 ; 3E020102 v_mul_f32_e32 v2, s0, v4 ; 10040800 v_mac_f32_e32 v2, v3, v0 ; 3E040103 v_cvt_pkrtz_f16_f32_e32 v0, v5, v1 ; 5E000305 v_cvt_pkrtz_f16_f32_e64 v1, v2, 1.0 ; D25E0001 0001E502 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 8 Code Size: 168 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..7] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: DP3 TEMP[0].x, IN[0].xyzz, IN[0].xyzz 1: RSQ TEMP[1].x, TEMP[0].xxxx 2: MUL TEMP[0].xyz, IN[0].xyzz, TEMP[1].xxxx 3: MUL TEMP[1], CONST[0], TEMP[0].xxxx 4: MAD TEMP[1], CONST[1], TEMP[0].yyyy, TEMP[1] 5: MAD TEMP[0].xyz, CONST[2], TEMP[0].zzzz, TEMP[1] 6: MUL TEMP[1], CONST[4], IN[0].xxxx 7: MAD TEMP[1], CONST[5], IN[0].yyyy, TEMP[1] 8: MAD TEMP[1], CONST[6], IN[0].zzzz, TEMP[1] 9: MAD TEMP[1], CONST[7], IN[0].wwww, TEMP[1] 10: MUL TEMP[2], CONST[0], IN[0].xxxx 11: MAD TEMP[2], CONST[1], IN[0].yyyy, TEMP[2] 12: MAD TEMP[2], CONST[2], IN[0].zzzz, TEMP[2] 13: MAD TEMP[2].xyz, CONST[3], IN[0].wwww, TEMP[2] 14: MOV TEMP[2].xyz, TEMP[2].xyzx 15: MOV TEMP[2].w, TEMP[0].xxxx 16: MOV TEMP[0].xy, TEMP[0].yzyy 17: MOV OUT[2], TEMP[0] 18: MOV OUT[1], TEMP[2] 19: MOV OUT[0], TEMP[1] 20: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %41 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0 %43 = add i32 %5, %7 %44 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %43) %45 = extractelement <4 x float> %44, i32 0 %46 = extractelement <4 x float> %44, i32 1 %47 = extractelement <4 x float> %44, i32 2 %48 = extractelement <4 x float> %44, i32 3 %49 = fmul float %45, %45 %50 = fmul float %46, %46 %51 = fadd float %50, %49 %52 = fmul float %47, %47 %53 = fadd float %51, %52 %54 = call float @llvm.AMDGPU.rsq.clamped.f32(float %53) %55 = fmul float %45, %54 %56 = fmul float %46, %54 %57 = fmul float %47, %54 %58 = fmul float %13, %55 %59 = fmul float %14, %55 %60 = fmul float %15, %55 %61 = fmul float %16, %56 %62 = fadd float %61, %58 %63 = fmul float %17, %56 %64 = fadd float %63, %59 %65 = fmul float %18, %56 %66 = fadd float %65, %60 %67 = fmul float %19, %57 %68 = fadd float %67, %62 %69 = fmul float %20, %57 %70 = fadd float %69, %64 %71 = fmul float %21, %57 %72 = fadd float %71, %66 %73 = fmul float %25, %45 %74 = fmul float %26, %45 %75 = fmul float %27, %45 %76 = fmul float %28, %45 %77 = fmul float %29, %46 %78 = fadd float %77, %73 %79 = fmul float %30, %46 %80 = fadd float %79, %74 %81 = fmul float %31, %46 %82 = fadd float %81, %75 %83 = fmul float %32, %46 %84 = fadd float %83, %76 %85 = fmul float %33, %47 %86 = fadd float %85, %78 %87 = fmul float %34, %47 %88 = fadd float %87, %80 %89 = fmul float %35, %47 %90 = fadd float %89, %82 %91 = fmul float %36, %47 %92 = fadd float %91, %84 %93 = fmul float %37, %48 %94 = fadd float %93, %86 %95 = fmul float %38, %48 %96 = fadd float %95, %88 %97 = fmul float %39, %48 %98 = fadd float %97, %90 %99 = fmul float %40, %48 %100 = fadd float %99, %92 %101 = fmul float %13, %45 %102 = fmul float %14, %45 %103 = fmul float %15, %45 %104 = fmul float %16, %46 %105 = fadd float %104, %101 %106 = fmul float %17, %46 %107 = fadd float %106, %102 %108 = fmul float %18, %46 %109 = fadd float %108, %103 %110 = fmul float %19, %47 %111 = fadd float %110, %105 %112 = fmul float %20, %47 %113 = fadd float %112, %107 %114 = fmul float %21, %47 %115 = fadd float %114, %109 %116 = fmul float %22, %48 %117 = fadd float %116, %111 %118 = fmul float %23, %48 %119 = fadd float %118, %113 %120 = fmul float %24, %48 %121 = fadd float %120, %115 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %117, float %119, float %121, float %68) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %70, float %72, float %72, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %94, float %96, float %98, float %100) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x4 ; C2038104 s_buffer_load_dword s8, s[0:3], 0x5 ; C2040105 s_buffer_load_dword s9, s[0:3], 0x6 ; C2048106 s_buffer_load_dword s10, s[0:3], 0x8 ; C2050108 s_buffer_load_dword s11, s[0:3], 0x9 ; C2058109 s_buffer_load_dword s12, s[0:3], 0xa ; C206010A s_buffer_load_dword s13, s[0:3], 0xc ; C206810C s_buffer_load_dword s14, s[0:3], 0xd ; C207010D s_buffer_load_dword s15, s[0:3], 0xe ; C207810E s_buffer_load_dword s16, s[0:3], 0x10 ; C2080110 s_buffer_load_dword s17, s[0:3], 0x11 ; C2088111 s_buffer_load_dword s18, s[0:3], 0x12 ; C2090112 s_buffer_load_dword s19, s[0:3], 0x13 ; C2098113 s_buffer_load_dword s20, s[0:3], 0x14 ; C20A0114 s_buffer_load_dword s21, s[0:3], 0x15 ; C20A8115 s_buffer_load_dword s22, s[0:3], 0x16 ; C20B0116 s_buffer_load_dword s23, s[0:3], 0x17 ; C20B8117 s_buffer_load_dword s24, s[0:3], 0x18 ; C20C0118 s_buffer_load_dword s25, s[0:3], 0x19 ; C20C8119 s_buffer_load_dword s26, s[0:3], 0x1a ; C20D011A s_buffer_load_dword s27, s[0:3], 0x1b ; C20D811B s_buffer_load_dword s28, s[0:3], 0x1c ; C20E011C s_buffer_load_dword s29, s[0:3], 0x1d ; C20E811D s_buffer_load_dword s30, s[0:3], 0x1e ; C20F011E s_buffer_load_dword s0, s[0:3], 0x1f ; C200011F s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s16, v0 ; 10080010 v_mac_f32_e32 v4, s20, v1 ; 3E080214 v_mul_f32_e32 v5, s17, v0 ; 100A0011 v_mac_f32_e32 v5, s21, v1 ; 3E0A0215 v_mul_f32_e32 v6, s18, v0 ; 100C0012 v_mac_f32_e32 v6, s22, v1 ; 3E0C0216 v_mul_f32_e32 v7, s19, v0 ; 100E0013 v_mac_f32_e32 v7, s23, v1 ; 3E0E0217 v_mac_f32_e32 v4, s24, v2 ; 3E080418 v_mac_f32_e32 v5, s25, v2 ; 3E0A0419 v_mac_f32_e32 v6, s26, v2 ; 3E0C041A v_mac_f32_e32 v7, s27, v2 ; 3E0E041B v_mac_f32_e32 v4, s28, v3 ; 3E08061C v_mac_f32_e32 v5, s29, v3 ; 3E0A061D v_mac_f32_e32 v6, s30, v3 ; 3E0C061E v_mac_f32_e32 v7, s0, v3 ; 3E0E0600 v_mul_f32_e32 v8, s4, v0 ; 10100004 v_mac_f32_e32 v8, s7, v1 ; 3E100207 v_mac_f32_e32 v8, s10, v2 ; 3E10040A v_mac_f32_e32 v8, s13, v3 ; 3E10060D v_mul_f32_e32 v9, s5, v0 ; 10120005 v_mac_f32_e32 v9, s8, v1 ; 3E120208 v_mac_f32_e32 v9, s11, v2 ; 3E12040B v_mac_f32_e32 v9, s14, v3 ; 3E12060E v_mul_f32_e32 v10, v0, v0 ; 10140100 v_mac_f32_e32 v10, v1, v1 ; 3E140301 v_mac_f32_e32 v10, v2, v2 ; 3E140502 v_rsq_clamp_f32_e32 v10, v10 ; 7E14590A v_mul_f32_e32 v11, s6, v0 ; 10160006 v_mac_f32_e32 v11, s9, v1 ; 3E160209 v_mac_f32_e32 v11, s12, v2 ; 3E16040C v_mac_f32_e32 v11, s15, v3 ; 3E16060F v_mul_f32_e32 v0, v10, v0 ; 1000010A v_mul_f32_e32 v1, v10, v1 ; 1002030A v_mul_f32_e32 v2, v10, v2 ; 1004050A v_mul_f32_e32 v3, s4, v0 ; 10060004 v_mac_f32_e32 v3, s7, v1 ; 3E060207 v_mac_f32_e32 v3, s10, v2 ; 3E06040A exp 15, 32, 0, 0, 0, v8, v9, v11, v3 ; F800020F 030B0908 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v3, s5, v0 ; 10060005 v_mul_f32_e32 v0, s6, v0 ; 10000006 v_mac_f32_e32 v3, s8, v1 ; 3E060208 v_mac_f32_e32 v0, s9, v1 ; 3E000209 v_mac_f32_e32 v3, s11, v2 ; 3E06040B v_mac_f32_e32 v0, s12, v2 ; 3E00040C v_mov_b32_e32 v1, 0 ; 7E020280 exp 15, 33, 0, 0, 0, v3, v0, v0, v1 ; F800021F 01000003 exp 15, 12, 0, 1, 0, v4, v5, v6, v7 ; F80008CF 07060504 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 12 Code Size: 356 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL CONST[0..10] DCL TEMP[0..4], LOCAL IMM[0] FLT32 { 1.0000, -1.0000, 2.0000, 1.3000} 0: MOV TEMP[0].x, IN[0].wwww 1: MOV TEMP[0].yz, IN[1].yxyy 2: MOV TEMP[1].xyz, -CONST[0].xyzx 3: ADD TEMP[2].xyz, CONST[1].xyzz, TEMP[1].xyzz 4: ADD TEMP[1].xyz, IN[0].xyzz, TEMP[1].xyzz 5: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz 6: RSQ TEMP[3].x, TEMP[3].xxxx 7: MUL TEMP[3].xyz, TEMP[2].xyzz, TEMP[3].xxxx 8: DP3 TEMP[4].x, TEMP[1].xyzz, TEMP[1].xyzz 9: RSQ TEMP[4].x, TEMP[4].xxxx 10: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[4].xxxx 11: DP3 TEMP[1].x, TEMP[3].xyzz, TEMP[1].xyzz 12: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[1].xxxx 13: ADD TEMP[1].x, IMM[0].xxxx, -TEMP[1].xxxx 14: SQRT TEMP[1].x, TEMP[1].xxxx 15: DP3 TEMP[2].x, TEMP[2].xyzz, TEMP[2].xyzz 16: SQRT TEMP[2].x, TEMP[2].xxxx 17: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx 18: RCP TEMP[2].x, CONST[1].wwww 19: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx 20: ADD TEMP[2].x, TEMP[1].xxxx, IMM[0].yyyy 21: ABS TEMP[2].x, TEMP[2].xxxx 22: DP3 TEMP[3].x, TEMP[0].xyzz, TEMP[0].xyzz 23: RSQ TEMP[3].x, TEMP[3].xxxx 24: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[3].xxxx 25: DP3 TEMP[3].x, CONST[10].xyzz, CONST[10].xyzz 26: RSQ TEMP[3].x, TEMP[3].xxxx 27: MUL TEMP[3].xyz, CONST[10].xyzz, TEMP[3].xxxx 28: DP3 TEMP[0].x, TEMP[0].xyzz, TEMP[3].xyzz 29: ADD TEMP[0].x, TEMP[0].xxxx, IMM[0].zzzz 30: POW TEMP[0].x, TEMP[0].xxxx, IMM[0].wwww 31: FSLT TEMP[1].x, IMM[0].xxxx, TEMP[1].xxxx 32: UIF TEMP[1].xxxx :0 33: ADD TEMP[1].x, CONST[2].zzzz, -TEMP[2].xxxx 34: RCP TEMP[3].x, CONST[2].zzzz 35: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[3].xxxx 36: MOV_SAT TEMP[1].x, TEMP[1].xxxx 37: POW TEMP[1].x, TEMP[1].xxxx, CONST[2].wwww 38: ADD TEMP[3].x, CONST[2].xxxx, -TEMP[2].xxxx 39: RCP TEMP[4].x, CONST[2].xxxx 40: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[4].xxxx 41: MOV_SAT TEMP[3].x, TEMP[3].xxxx 42: POW TEMP[3].x, TEMP[3].xxxx, CONST[2].yyyy 43: MUL TEMP[3], CONST[3], TEMP[3].xxxx 44: MAD TEMP[1], CONST[4], TEMP[1].xxxx, TEMP[3] 45: MUL TEMP[1], TEMP[1], CONST[9].xxxx 46: MUL TEMP[1], TEMP[1], TEMP[0].xxxx 47: ELSE :0 48: ADD TEMP[3].x, CONST[5].zzzz, -TEMP[2].xxxx 49: RCP TEMP[4].x, CONST[5].zzzz 50: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[4].xxxx 51: MOV_SAT TEMP[3].x, TEMP[3].xxxx 52: POW TEMP[3].x, TEMP[3].xxxx, CONST[5].wwww 53: ADD TEMP[2].x, CONST[5].xxxx, -TEMP[2].xxxx 54: RCP TEMP[4].x, CONST[5].xxxx 55: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[4].xxxx 56: MOV_SAT TEMP[2].x, TEMP[2].xxxx 57: POW TEMP[2].x, TEMP[2].xxxx, CONST[5].yyyy 58: MUL TEMP[2], CONST[6], TEMP[2].xxxx 59: MAD TEMP[2], CONST[7], TEMP[3].xxxx, TEMP[2] 60: ADD TEMP[2], TEMP[2], CONST[8] 61: MUL TEMP[2], TEMP[2], CONST[9].xxxx 62: MUL TEMP[1], TEMP[2], TEMP[0].xxxx 63: ENDIF 64: MOV OUT[0], TEMP[1] 65: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %39 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %40 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %41 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %42 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %43 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %44 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %45 = fsub float %27, %24 %46 = fsub float %28, %25 %47 = fsub float %29, %26 %48 = fsub float %39, %24 %49 = fsub float %40, %25 %50 = fsub float %41, %26 %51 = fmul float %45, %45 %52 = fmul float %46, %46 %53 = fadd float %52, %51 %54 = fmul float %47, %47 %55 = fadd float %53, %54 %56 = call float @llvm.AMDGPU.rsq.clamped.f32(float %55) %57 = fmul float %45, %56 %58 = fmul float %46, %56 %59 = fmul float %47, %56 %60 = fmul float %48, %48 %61 = fmul float %49, %49 %62 = fadd float %61, %60 %63 = fmul float %50, %50 %64 = fadd float %62, %63 %65 = call float @llvm.AMDGPU.rsq.clamped.f32(float %64) %66 = fmul float %48, %65 %67 = fmul float %49, %65 %68 = fmul float %50, %65 %69 = fmul float %57, %66 %70 = fmul float %58, %67 %71 = fadd float %70, %69 %72 = fmul float %59, %68 %73 = fadd float %71, %72 %74 = fmul float %73, %73 %75 = fsub float 1.000000e+00, %74 %76 = call float @llvm.sqrt.f32(float %75) %77 = fmul float %45, %45 %78 = fmul float %46, %46 %79 = fadd float %78, %77 %80 = fmul float %47, %47 %81 = fadd float %79, %80 %82 = call float @llvm.sqrt.f32(float %81) %83 = fmul float %76, %82 %84 = fdiv float 1.000000e+00, %30 %85 = fmul float %83, %84 %86 = fadd float %85, -1.000000e+00 %87 = call float @llvm.fabs.f32(float %86) %88 = fmul float %42, %42 %89 = fmul float %43, %43 %90 = fadd float %89, %88 %91 = fmul float %44, %44 %92 = fadd float %90, %91 %93 = call float @llvm.AMDGPU.rsq.clamped.f32(float %92) %94 = fmul float %42, %93 %95 = fmul float %43, %93 %96 = fmul float %44, %93 %97 = fmul float %36, %36 %98 = fmul float %37, %37 %99 = fadd float %98, %97 %100 = fmul float %38, %38 %101 = fadd float %99, %100 %102 = call float @llvm.AMDGPU.rsq.clamped.f32(float %101) %103 = fmul float %36, %102 %104 = fmul float %37, %102 %105 = fmul float %38, %102 %106 = fmul float %94, %103 %107 = fmul float %95, %104 %108 = fadd float %107, %106 %109 = fmul float %96, %105 %110 = fadd float %108, %109 %111 = fadd float %110, 2.000000e+00 %112 = call float @llvm.pow.f32(float %111, float 0x3FF4CCCCC0000000) %113 = fcmp ogt float %85, 1.000000e+00 br i1 %113, label %IF, label %ELSE IF: ; preds = %main_body %114 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %115 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %116 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %117 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %118 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) %119 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %120 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %121 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %122 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) %123 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %124 = fsub float %32, %87 %125 = fdiv float 1.000000e+00, %32 %126 = fmul float %124, %125 %127 = call float @llvm.AMDIL.clamp.(float %126, float 0.000000e+00, float 1.000000e+00) %128 = call float @llvm.pow.f32(float %127, float %122) %129 = fsub float %31, %87 %130 = fdiv float 1.000000e+00, %31 %131 = fmul float %129, %130 %132 = call float @llvm.AMDIL.clamp.(float %131, float 0.000000e+00, float 1.000000e+00) %133 = call float @llvm.pow.f32(float %132, float %123) %134 = fmul float %121, %133 %135 = fmul float %120, %133 %136 = fmul float %119, %133 %137 = fmul float %118, %133 %138 = fmul float %117, %128 %139 = fadd float %138, %134 %140 = fmul float %116, %128 %141 = fadd float %140, %135 %142 = fmul float %115, %128 %143 = fadd float %142, %136 %144 = fmul float %114, %128 %145 = fadd float %144, %137 br label %ENDIF ELSE: ; preds = %main_body %146 = call float @llvm.SI.load.const(<16 x i8> %23, i32 140) %147 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %148 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %149 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %150 = call float @llvm.SI.load.const(<16 x i8> %23, i32 124) %151 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %152 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %153 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %154 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108) %155 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %156 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %157 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %158 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %159 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %160 = fsub float %34, %87 %161 = fdiv float 1.000000e+00, %34 %162 = fmul float %160, %161 %163 = call float @llvm.AMDIL.clamp.(float %162, float 0.000000e+00, float 1.000000e+00) %164 = call float @llvm.pow.f32(float %163, float %158) %165 = fsub float %33, %87 %166 = fdiv float 1.000000e+00, %33 %167 = fmul float %165, %166 %168 = call float @llvm.AMDIL.clamp.(float %167, float 0.000000e+00, float 1.000000e+00) %169 = call float @llvm.pow.f32(float %168, float %159) %170 = fmul float %157, %169 %171 = fmul float %156, %169 %172 = fmul float %155, %169 %173 = fmul float %154, %169 %174 = fmul float %153, %164 %175 = fadd float %174, %170 %176 = fmul float %152, %164 %177 = fadd float %176, %171 %178 = fmul float %151, %164 %179 = fadd float %178, %172 %180 = fmul float %150, %164 %181 = fadd float %180, %173 %182 = fadd float %175, %149 %183 = fadd float %177, %148 %184 = fadd float %179, %147 %185 = fadd float %181, %146 br label %ENDIF ENDIF: ; preds = %ELSE, %IF %.sink25 = phi float [ %139, %IF ], [ %182, %ELSE ] %.sink24 = phi float [ %141, %IF ], [ %183, %ELSE ] %.sink = phi float [ %143, %IF ], [ %184, %ELSE ] %.sink23 = phi float [ %145, %IF ], [ %185, %ELSE ] %186 = fmul float %.sink25, %35 %187 = fmul float %.sink24, %35 %188 = fmul float %.sink, %35 %189 = fmul float %.sink23, %35 %190 = fmul float %186, %112 %191 = fmul float %187, %112 %192 = fmul float %188, %112 %193 = fmul float %189, %112 %194 = call i32 @llvm.SI.packf16(float %190, float %191) %195 = bitcast i32 %194 to float %196 = call i32 @llvm.SI.packf16(float %192, float %193) %197 = bitcast i32 %196 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %195, float %197, float %195, float %197) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x4 ; C2038104 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 s_buffer_load_dword s8, s[0:3], 0x5 ; C2040105 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 s_buffer_load_dword s9, s[0:3], 0x6 ; C2048106 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v2, s4, v2 ; 0A040404 v_mov_b32_e32 v7, s4 ; 7E0E0204 v_sub_f32_e32 v7, s7, v7 ; 080E0E07 v_subrev_f32_e32 v3, s5, v3 ; 0A060605 v_mov_b32_e32 v8, s5 ; 7E100205 v_sub_f32_e32 v8, s8, v8 ; 08101008 v_subrev_f32_e32 v4, s6, v4 ; 0A080806 v_mov_b32_e32 v9, s6 ; 7E120206 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_buffer_load_dword s5, s[0:3], 0x28 ; C2028128 s_buffer_load_dword s6, s[0:3], 0x29 ; C2030129 s_buffer_load_dword s7, s[0:3], 0x2a ; C203812A v_sub_f32_e32 v9, s9, v9 ; 08121209 v_mul_f32_e32 v10, v7, v7 ; 10140F07 v_mac_f32_e32 v10, v8, v8 ; 3E141108 v_mac_f32_e32 v10, v9, v9 ; 3E141309 v_mul_f32_e32 v11, v2, v2 ; 10160502 v_mac_f32_e32 v11, v3, v3 ; 3E160703 v_mac_f32_e32 v11, v4, v4 ; 3E160904 v_rsq_clamp_f32_e32 v12, v10 ; 7E18590A v_rsq_clamp_f32_e32 v11, v11 ; 7E16590B v_interp_p1_f32 v0, v0, 1, 1, [m0] ; C8000500 v_interp_p2_f32 v0, [v0], v1, 1, 1, [m0] ; C8010501 v_mul_f32_e32 v1, v12, v7 ; 10020F0C v_mul_f32_e32 v2, v11, v2 ; 1004050B v_mul_f32_e32 v1, v2, v1 ; 10020302 v_mul_f32_e32 v2, v12, v8 ; 1004110C v_mul_f32_e32 v3, v11, v3 ; 1006070B v_mac_f32_e32 v1, v3, v2 ; 3E020503 v_mul_f32_e32 v2, v12, v9 ; 1004130C v_mul_f32_e32 v3, v11, v4 ; 1006090B v_mac_f32_e32 v1, v3, v2 ; 3E020503 v_mad_f32 v1, -v1, v1, 1.0 ; D2820001 23CA0301 v_sqrt_f32_e32 v1, v1 ; 7E026701 v_sqrt_f32_e32 v2, v10 ; 7E04670A v_mul_f32_e32 v1, v2, v1 ; 10020302 v_mul_f32_e32 v2, v5, v5 ; 10040B05 v_mac_f32_e32 v2, v6, v6 ; 3E040D06 v_mac_f32_e32 v2, v0, v0 ; 3E040100 v_rsq_clamp_f32_e32 v2, v2 ; 7E045902 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e64 v3, s5, s5 ; D2100003 00000A05 v_mac_f32_e64 v3, s6, s6 ; D23E0003 00000C06 v_mac_f32_e64 v3, s7, s7 ; D23E0003 00000E07 v_rsq_clamp_f32_e32 v3, v3 ; 7E065903 v_mul_f32_e32 v4, v2, v5 ; 10080B02 v_mul_f32_e32 v5, v2, v6 ; 100A0D02 v_mul_f32_e32 v0, v2, v0 ; 10000102 v_mul_f32_e32 v2, s5, v3 ; 10040605 v_mul_f32_e32 v6, s6, v3 ; 100C0606 v_mul_f32_e32 v3, s7, v3 ; 10060607 v_rcp_f32_e32 v7, s4 ; 7E0E5404 v_mul_f32_e32 v2, v2, v4 ; 10040902 v_mac_f32_e32 v2, v6, v5 ; 3E040B06 v_mac_f32_e32 v2, v3, v0 ; 3E040103 v_add_f32_e32 v0, 2.0, v2 ; 060004F4 v_log_f32_e32 v0, v0 ; 7E004F00 v_mul_f32_e32 v2, v7, v1 ; 10040307 v_mad_f32 v1, v1, v7, -1.0 ; D2820001 03CE0F01 v_and_b32_e32 v1, 0x7fffffff, v1 ; 360202FF 7FFFFFFF v_mul_legacy_f32_e32 v0, 0x3fa66666, v0 ; 0E0000FF 3FA66666 v_exp_f32_e32 v0, v0 ; 7E004B00 v_cmp_nlt_f32_e32 vcc, 1.0, v2 ; 7C1C04F2 s_and_saveexec_b64 s[4:5], vcc ; BE84246A s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E s_cbranch_execz BB0_1 ; BF880000 s_buffer_load_dword s6, s[0:3], 0x14 ; C2030114 s_buffer_load_dword s7, s[0:3], 0x15 ; C2038115 s_buffer_load_dword s8, s[0:3], 0x16 ; C2040116 s_buffer_load_dword s9, s[0:3], 0x17 ; C2048117 s_buffer_load_dword s10, s[0:3], 0x18 ; C2050118 s_buffer_load_dword s11, s[0:3], 0x23 ; C2058123 s_buffer_load_dword s12, s[0:3], 0x1e ; C206011E s_buffer_load_dword s13, s[0:3], 0x1f ; C206811F s_buffer_load_dword s14, s[0:3], 0x20 ; C2070120 s_buffer_load_dword s15, s[0:3], 0x21 ; C2078121 s_buffer_load_dword s16, s[0:3], 0x22 ; C2080122 s_buffer_load_dword s17, s[0:3], 0x19 ; C2088119 s_buffer_load_dword s18, s[0:3], 0x1a ; C209011A s_buffer_load_dword s19, s[0:3], 0x1b ; C209811B s_buffer_load_dword s20, s[0:3], 0x1c ; C20A011C s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v2, s11 ; 7E04020B s_buffer_load_dword s11, s[0:3], 0x1d ; C205811D v_rcp_f32_e32 v3, s8 ; 7E065408 v_rcp_f32_e32 v4, s6 ; 7E085406 v_sub_f32_e32 v5, s8, v1 ; 080A0208 v_mul_f32_e32 v3, v3, v5 ; 10060B03 v_sub_f32_e32 v5, s6, v1 ; 080A0206 v_mul_f32_e32 v4, v4, v5 ; 10080B04 v_add_f32_e64 v3, 0, v3 clamp ; D2060803 00020680 v_log_f32_e32 v3, v3 ; 7E064F03 v_add_f32_e64 v4, 0, v4 clamp ; D2060804 00020880 v_log_f32_e32 v4, v4 ; 7E084F04 v_mul_legacy_f32_e32 v3, s9, v3 ; 0E060609 v_exp_f32_e32 v6, v3 ; 7E0C4B03 v_mul_legacy_f32_e32 v3, s7, v4 ; 0E060807 v_exp_f32_e32 v7, v3 ; 7E0E4B03 v_mov_b32_e32 v3, s14 ; 7E06020E v_mac_f32_e32 v3, s10, v7 ; 3E060E0A v_mac_f32_e32 v3, s20, v6 ; 3E060C14 v_mov_b32_e32 v4, s15 ; 7E08020F v_mac_f32_e32 v4, s17, v7 ; 3E080E11 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v4, s11, v6 ; 3E080C0B v_mov_b32_e32 v5, s16 ; 7E0A0210 v_mac_f32_e32 v5, s18, v7 ; 3E0A0E12 v_mac_f32_e32 v5, s12, v6 ; 3E0A0C0C v_mac_f32_e32 v2, s19, v7 ; 3E040E13 v_mac_f32_e32 v2, s13, v6 ; 3E040C0D s_or_saveexec_b64 s[4:5], s[4:5] ; BE842504 s_buffer_load_dword s6, s[0:3], 0x24 ; C2030124 s_waitcnt lgkmcnt(0) ; BF8C007F s_xor_b64 exec, exec, s[4:5] ; 89FE047E s_cbranch_execz BB0_4 ; BF880000 s_buffer_load_dword s7, s[0:3], 0x8 ; C2038108 s_buffer_load_dword s8, s[0:3], 0x9 ; C2040109 s_buffer_load_dword s9, s[0:3], 0xa ; C204810A s_buffer_load_dword s10, s[0:3], 0xb ; C205010B s_buffer_load_dword s11, s[0:3], 0xc ; C205810C s_buffer_load_dword s12, s[0:3], 0x12 ; C2060112 s_buffer_load_dword s13, s[0:3], 0x13 ; C2068113 s_buffer_load_dword s14, s[0:3], 0xd ; C207010D s_buffer_load_dword s15, s[0:3], 0xe ; C207810E s_buffer_load_dword s16, s[0:3], 0xf ; C208010F s_buffer_load_dword s17, s[0:3], 0x10 ; C2088110 s_buffer_load_dword s18, s[0:3], 0x11 ; C2090111 s_waitcnt lgkmcnt(0) ; BF8C007F v_rcp_f32_e32 v2, s9 ; 7E045409 v_sub_f32_e32 v3, s9, v1 ; 08060209 v_mul_f32_e32 v2, v2, v3 ; 10040702 v_rcp_f32_e32 v3, s7 ; 7E065407 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_log_f32_e32 v2, v2 ; 7E044F02 v_sub_f32_e32 v1, s7, v1 ; 08020207 v_mul_f32_e32 v1, v3, v1 ; 10020303 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_log_f32_e32 v1, v1 ; 7E024F01 v_mul_legacy_f32_e32 v2, s10, v2 ; 0E04040A v_exp_f32_e32 v6, v2 ; 7E0C4B02 v_mul_legacy_f32_e32 v1, s8, v1 ; 0E020208 v_exp_f32_e32 v1, v1 ; 7E024B01 v_mul_f32_e32 v3, s11, v1 ; 1006020B v_mul_f32_e32 v4, s14, v1 ; 1008020E v_mul_f32_e32 v5, s15, v1 ; 100A020F v_mul_f32_e32 v2, s16, v1 ; 10040210 v_mac_f32_e32 v3, s17, v6 ; 3E060C11 v_mac_f32_e32 v4, s18, v6 ; 3E080C12 v_mac_f32_e32 v5, s12, v6 ; 3E0A0C0C v_mac_f32_e32 v2, s13, v6 ; 3E040C0D s_or_b64 exec, exec, s[4:5] ; 88FE047E v_mul_f32_e32 v1, s6, v3 ; 10020606 v_mul_f32_e32 v3, s6, v4 ; 10060806 v_mul_f32_e32 v4, s6, v5 ; 10080A06 v_mul_f32_e32 v2, s6, v2 ; 10040406 v_mul_f32_e32 v1, v0, v1 ; 10020300 v_mul_f32_e32 v3, v0, v3 ; 10060700 v_mul_f32_e32 v4, v0, v4 ; 10080900 v_mul_f32_e32 v0, v0, v2 ; 10000500 v_cvt_pkrtz_f16_f32_e32 v1, v1, v3 ; 5E020701 v_cvt_pkrtz_f16_f32_e32 v0, v4, v0 ; 5E000104 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 16 Code Size: 780 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..7] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: DP3 TEMP[0].x, IN[0].xyzz, IN[0].xyzz 1: RSQ TEMP[1].x, TEMP[0].xxxx 2: MUL TEMP[0].xyz, IN[0].xyzz, TEMP[1].xxxx 3: MUL TEMP[1], CONST[0], TEMP[0].xxxx 4: MAD TEMP[1], CONST[1], TEMP[0].yyyy, TEMP[1] 5: MAD TEMP[0].xyz, CONST[2], TEMP[0].zzzz, TEMP[1] 6: MUL TEMP[1], CONST[4], IN[0].xxxx 7: MAD TEMP[1], CONST[5], IN[0].yyyy, TEMP[1] 8: MAD TEMP[1], CONST[6], IN[0].zzzz, TEMP[1] 9: MAD TEMP[1], CONST[7], IN[0].wwww, TEMP[1] 10: MUL TEMP[2], CONST[0], IN[0].xxxx 11: MAD TEMP[2], CONST[1], IN[0].yyyy, TEMP[2] 12: MAD TEMP[2], CONST[2], IN[0].zzzz, TEMP[2] 13: MAD TEMP[2].xyz, CONST[3], IN[0].wwww, TEMP[2] 14: MOV TEMP[2].xyz, TEMP[2].xyzx 15: MOV TEMP[2].w, TEMP[0].xxxx 16: MOV TEMP[0].xy, TEMP[0].yzyy 17: MOV OUT[2], TEMP[0] 18: MOV OUT[1], TEMP[2] 19: MOV OUT[0], TEMP[1] 20: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %41 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0 %43 = add i32 %5, %7 %44 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %43) %45 = extractelement <4 x float> %44, i32 0 %46 = extractelement <4 x float> %44, i32 1 %47 = extractelement <4 x float> %44, i32 2 %48 = extractelement <4 x float> %44, i32 3 %49 = fmul float %45, %45 %50 = fmul float %46, %46 %51 = fadd float %50, %49 %52 = fmul float %47, %47 %53 = fadd float %51, %52 %54 = call float @llvm.AMDGPU.rsq.clamped.f32(float %53) %55 = fmul float %45, %54 %56 = fmul float %46, %54 %57 = fmul float %47, %54 %58 = fmul float %13, %55 %59 = fmul float %14, %55 %60 = fmul float %15, %55 %61 = fmul float %16, %56 %62 = fadd float %61, %58 %63 = fmul float %17, %56 %64 = fadd float %63, %59 %65 = fmul float %18, %56 %66 = fadd float %65, %60 %67 = fmul float %19, %57 %68 = fadd float %67, %62 %69 = fmul float %20, %57 %70 = fadd float %69, %64 %71 = fmul float %21, %57 %72 = fadd float %71, %66 %73 = fmul float %25, %45 %74 = fmul float %26, %45 %75 = fmul float %27, %45 %76 = fmul float %28, %45 %77 = fmul float %29, %46 %78 = fadd float %77, %73 %79 = fmul float %30, %46 %80 = fadd float %79, %74 %81 = fmul float %31, %46 %82 = fadd float %81, %75 %83 = fmul float %32, %46 %84 = fadd float %83, %76 %85 = fmul float %33, %47 %86 = fadd float %85, %78 %87 = fmul float %34, %47 %88 = fadd float %87, %80 %89 = fmul float %35, %47 %90 = fadd float %89, %82 %91 = fmul float %36, %47 %92 = fadd float %91, %84 %93 = fmul float %37, %48 %94 = fadd float %93, %86 %95 = fmul float %38, %48 %96 = fadd float %95, %88 %97 = fmul float %39, %48 %98 = fadd float %97, %90 %99 = fmul float %40, %48 %100 = fadd float %99, %92 %101 = fmul float %13, %45 %102 = fmul float %14, %45 %103 = fmul float %15, %45 %104 = fmul float %16, %46 %105 = fadd float %104, %101 %106 = fmul float %17, %46 %107 = fadd float %106, %102 %108 = fmul float %18, %46 %109 = fadd float %108, %103 %110 = fmul float %19, %47 %111 = fadd float %110, %105 %112 = fmul float %20, %47 %113 = fadd float %112, %107 %114 = fmul float %21, %47 %115 = fadd float %114, %109 %116 = fmul float %22, %48 %117 = fadd float %116, %111 %118 = fmul float %23, %48 %119 = fadd float %118, %113 %120 = fmul float %24, %48 %121 = fadd float %120, %115 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %117, float %119, float %121, float %68) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %70, float %72, float %72, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %94, float %96, float %98, float %100) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x4 ; C2038104 s_buffer_load_dword s8, s[0:3], 0x5 ; C2040105 s_buffer_load_dword s9, s[0:3], 0x6 ; C2048106 s_buffer_load_dword s10, s[0:3], 0x8 ; C2050108 s_buffer_load_dword s11, s[0:3], 0x9 ; C2058109 s_buffer_load_dword s12, s[0:3], 0xa ; C206010A s_buffer_load_dword s13, s[0:3], 0xc ; C206810C s_buffer_load_dword s14, s[0:3], 0xd ; C207010D s_buffer_load_dword s15, s[0:3], 0xe ; C207810E s_buffer_load_dword s16, s[0:3], 0x10 ; C2080110 s_buffer_load_dword s17, s[0:3], 0x11 ; C2088111 s_buffer_load_dword s18, s[0:3], 0x12 ; C2090112 s_buffer_load_dword s19, s[0:3], 0x13 ; C2098113 s_buffer_load_dword s20, s[0:3], 0x14 ; C20A0114 s_buffer_load_dword s21, s[0:3], 0x15 ; C20A8115 s_buffer_load_dword s22, s[0:3], 0x16 ; C20B0116 s_buffer_load_dword s23, s[0:3], 0x17 ; C20B8117 s_buffer_load_dword s24, s[0:3], 0x18 ; C20C0118 s_buffer_load_dword s25, s[0:3], 0x19 ; C20C8119 s_buffer_load_dword s26, s[0:3], 0x1a ; C20D011A s_buffer_load_dword s27, s[0:3], 0x1b ; C20D811B s_buffer_load_dword s28, s[0:3], 0x1c ; C20E011C s_buffer_load_dword s29, s[0:3], 0x1d ; C20E811D s_buffer_load_dword s30, s[0:3], 0x1e ; C20F011E s_buffer_load_dword s0, s[0:3], 0x1f ; C200011F s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s16, v0 ; 10080010 v_mac_f32_e32 v4, s20, v1 ; 3E080214 v_mul_f32_e32 v5, s17, v0 ; 100A0011 v_mac_f32_e32 v5, s21, v1 ; 3E0A0215 v_mul_f32_e32 v6, s18, v0 ; 100C0012 v_mac_f32_e32 v6, s22, v1 ; 3E0C0216 v_mul_f32_e32 v7, s19, v0 ; 100E0013 v_mac_f32_e32 v7, s23, v1 ; 3E0E0217 v_mac_f32_e32 v4, s24, v2 ; 3E080418 v_mac_f32_e32 v5, s25, v2 ; 3E0A0419 v_mac_f32_e32 v6, s26, v2 ; 3E0C041A v_mac_f32_e32 v7, s27, v2 ; 3E0E041B v_mac_f32_e32 v4, s28, v3 ; 3E08061C v_mac_f32_e32 v5, s29, v3 ; 3E0A061D v_mac_f32_e32 v6, s30, v3 ; 3E0C061E v_mac_f32_e32 v7, s0, v3 ; 3E0E0600 v_mul_f32_e32 v8, s4, v0 ; 10100004 v_mac_f32_e32 v8, s7, v1 ; 3E100207 v_mac_f32_e32 v8, s10, v2 ; 3E10040A v_mac_f32_e32 v8, s13, v3 ; 3E10060D v_mul_f32_e32 v9, s5, v0 ; 10120005 v_mac_f32_e32 v9, s8, v1 ; 3E120208 v_mac_f32_e32 v9, s11, v2 ; 3E12040B v_mac_f32_e32 v9, s14, v3 ; 3E12060E v_mul_f32_e32 v10, v0, v0 ; 10140100 v_mac_f32_e32 v10, v1, v1 ; 3E140301 v_mac_f32_e32 v10, v2, v2 ; 3E140502 v_rsq_clamp_f32_e32 v10, v10 ; 7E14590A v_mul_f32_e32 v11, s6, v0 ; 10160006 v_mac_f32_e32 v11, s9, v1 ; 3E160209 v_mac_f32_e32 v11, s12, v2 ; 3E16040C v_mac_f32_e32 v11, s15, v3 ; 3E16060F v_mul_f32_e32 v0, v10, v0 ; 1000010A v_mul_f32_e32 v1, v10, v1 ; 1002030A v_mul_f32_e32 v2, v10, v2 ; 1004050A v_mul_f32_e32 v3, s4, v0 ; 10060004 v_mac_f32_e32 v3, s7, v1 ; 3E060207 v_mac_f32_e32 v3, s10, v2 ; 3E06040A exp 15, 32, 0, 0, 0, v8, v9, v11, v3 ; F800020F 030B0908 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v3, s5, v0 ; 10060005 v_mul_f32_e32 v0, s6, v0 ; 10000006 v_mac_f32_e32 v3, s8, v1 ; 3E060208 v_mac_f32_e32 v0, s9, v1 ; 3E000209 v_mac_f32_e32 v3, s11, v2 ; 3E06040B v_mac_f32_e32 v0, s12, v2 ; 3E00040C v_mov_b32_e32 v1, 0 ; 7E020280 exp 15, 33, 0, 0, 0, v3, v0, v0, v1 ; F800021F 01000003 exp 15, 12, 0, 1, 0, v4, v5, v6, v7 ; F80008CF 07060504 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 12 Code Size: 356 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL CONST[0..4] DCL TEMP[0..4], LOCAL IMM[0] FLT32 { 2.0000, 1.3000, 0.5000, 1.0000} IMM[1] FLT32 { -1.0000, 3.0000, 0.0000, 0.0000} 0: MOV TEMP[0].x, IN[0].wwww 1: MOV TEMP[0].yz, IN[1].yxyy 2: MOV TEMP[1].xyz, -CONST[0].xyzx 3: ADD TEMP[2].xyz, CONST[1].xyzz, TEMP[1].xyzz 4: ADD TEMP[1].xyz, IN[0].xyzz, TEMP[1].xyzz 5: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz 6: RSQ TEMP[3].x, TEMP[3].xxxx 7: MUL TEMP[3].xyz, TEMP[2].xyzz, TEMP[3].xxxx 8: DP3 TEMP[4].x, TEMP[1].xyzz, TEMP[1].xyzz 9: RSQ TEMP[4].x, TEMP[4].xxxx 10: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[4].xxxx 11: DP3 TEMP[1].x, TEMP[3].xyzz, TEMP[1].xyzz 12: DP3 TEMP[3].x, TEMP[0].xyzz, TEMP[0].xyzz 13: RSQ TEMP[3].x, TEMP[3].xxxx 14: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[3].xxxx 15: DP3 TEMP[3].x, CONST[4].xyzz, CONST[4].xyzz 16: RSQ TEMP[3].x, TEMP[3].xxxx 17: MUL TEMP[3].xyz, CONST[4].xyzz, TEMP[3].xxxx 18: DP3 TEMP[0].x, TEMP[0].xyzz, TEMP[3].xyzz 19: ADD TEMP[0].x, TEMP[0].xxxx, IMM[0].xxxx 20: POW TEMP[0].x, TEMP[0].xxxx, IMM[0].yyyy 21: MUL TEMP[3].x, CONST[3].xxxx, TEMP[0].xxxx 22: MUL TEMP[0], CONST[2], TEMP[0].xxxx 23: MUL TEMP[0].xyz, TEMP[0], IMM[0].zzzz 24: MOV TEMP[0].xyz, TEMP[0].xyzx 25: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[1].xxxx 26: ADD TEMP[1].x, IMM[0].wwww, -TEMP[1].xxxx 27: SQRT TEMP[1].x, TEMP[1].xxxx 28: DP3 TEMP[2].x, TEMP[2].xyzz, TEMP[2].xyzz 29: SQRT TEMP[2].x, TEMP[2].xxxx 30: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx 31: RCP TEMP[2].x, CONST[1].wwww 32: MAD TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx, IMM[1].xxxx 33: ABS TEMP[1].x, TEMP[1].xxxx 34: ADD TEMP[1].x, TEMP[3].xxxx, -TEMP[1].xxxx 35: RCP TEMP[2].x, TEMP[3].xxxx 36: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx 37: MOV_SAT TEMP[1].x, TEMP[1].xxxx 38: POW TEMP[1].x, TEMP[1].xxxx, IMM[1].yyyy 39: MOV TEMP[0].w, TEMP[1].xxxx 40: MOV OUT[0], TEMP[0] 41: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %38 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %39 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %40 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %41 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %42 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %43 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %44 = fsub float %27, %24 %45 = fsub float %28, %25 %46 = fsub float %29, %26 %47 = fsub float %38, %24 %48 = fsub float %39, %25 %49 = fsub float %40, %26 %50 = fmul float %44, %44 %51 = fmul float %45, %45 %52 = fadd float %51, %50 %53 = fmul float %46, %46 %54 = fadd float %52, %53 %55 = call float @llvm.AMDGPU.rsq.clamped.f32(float %54) %56 = fmul float %44, %55 %57 = fmul float %45, %55 %58 = fmul float %46, %55 %59 = fmul float %47, %47 %60 = fmul float %48, %48 %61 = fadd float %60, %59 %62 = fmul float %49, %49 %63 = fadd float %61, %62 %64 = call float @llvm.AMDGPU.rsq.clamped.f32(float %63) %65 = fmul float %47, %64 %66 = fmul float %48, %64 %67 = fmul float %49, %64 %68 = fmul float %56, %65 %69 = fmul float %57, %66 %70 = fadd float %69, %68 %71 = fmul float %58, %67 %72 = fadd float %70, %71 %73 = fmul float %41, %41 %74 = fmul float %42, %42 %75 = fadd float %74, %73 %76 = fmul float %43, %43 %77 = fadd float %75, %76 %78 = call float @llvm.AMDGPU.rsq.clamped.f32(float %77) %79 = fmul float %41, %78 %80 = fmul float %42, %78 %81 = fmul float %43, %78 %82 = fmul float %35, %35 %83 = fmul float %36, %36 %84 = fadd float %83, %82 %85 = fmul float %37, %37 %86 = fadd float %84, %85 %87 = call float @llvm.AMDGPU.rsq.clamped.f32(float %86) %88 = fmul float %35, %87 %89 = fmul float %36, %87 %90 = fmul float %37, %87 %91 = fmul float %79, %88 %92 = fmul float %80, %89 %93 = fadd float %92, %91 %94 = fmul float %81, %90 %95 = fadd float %93, %94 %96 = fadd float %95, 2.000000e+00 %97 = call float @llvm.pow.f32(float %96, float 0x3FF4CCCCC0000000) %98 = fmul float %34, %97 %99 = fmul float %31, %97 %100 = fmul float %32, %97 %101 = fmul float %33, %97 %102 = fmul float %99, 5.000000e-01 %103 = fmul float %100, 5.000000e-01 %104 = fmul float %101, 5.000000e-01 %105 = fmul float %72, %72 %106 = fsub float 1.000000e+00, %105 %107 = call float @llvm.sqrt.f32(float %106) %108 = fmul float %44, %44 %109 = fmul float %45, %45 %110 = fadd float %109, %108 %111 = fmul float %46, %46 %112 = fadd float %110, %111 %113 = call float @llvm.sqrt.f32(float %112) %114 = fmul float %107, %113 %115 = fdiv float 1.000000e+00, %30 %116 = fmul float %114, %115 %117 = fadd float %116, -1.000000e+00 %118 = call float @llvm.fabs.f32(float %117) %119 = fsub float %98, %118 %120 = fdiv float 1.000000e+00, %98 %121 = fmul float %119, %120 %122 = call float @llvm.AMDIL.clamp.(float %121, float 0.000000e+00, float 1.000000e+00) %123 = call float @llvm.pow.f32(float %122, float 3.000000e+00) %124 = call i32 @llvm.SI.packf16(float %102, float %103) %125 = bitcast i32 %124 to float %126 = call i32 @llvm.SI.packf16(float %104, float %123) %127 = bitcast i32 %126 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %125, float %127, float %125, float %127) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x4 ; C2038104 s_buffer_load_dword s8, s[0:3], 0x5 ; C2040105 s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_mov_b32 m0, s9 ; BEFC0309 s_buffer_load_dword s9, s[0:3], 0xc ; C204810C s_buffer_load_dword s15, s[0:3], 0x10 ; C2078110 s_buffer_load_dword s16, s[0:3], 0x11 ; C2080111 s_buffer_load_dword s0, s[0:3], 0x12 ; C2000112 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v0, v0, 1, 1, [m0] ; C8000500 v_interp_p2_f32 v0, [v0], v1, 1, 1, [m0] ; C8010501 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v1, s4, v2 ; 0A020404 v_mov_b32_e32 v2, s4 ; 7E040204 v_sub_f32_e32 v2, s7, v2 ; 08040407 v_subrev_f32_e32 v3, s5, v3 ; 0A060605 v_mov_b32_e32 v7, s5 ; 7E0E0205 v_sub_f32_e32 v7, s8, v7 ; 080E0E08 v_subrev_f32_e32 v4, s6, v4 ; 0A080806 v_mov_b32_e32 v8, s6 ; 7E100206 v_sub_f32_e32 v8, s10, v8 ; 0810100A v_mul_f32_e32 v9, v2, v2 ; 10120502 v_mac_f32_e32 v9, v7, v7 ; 3E120F07 v_mac_f32_e32 v9, v8, v8 ; 3E121108 v_rsq_clamp_f32_e32 v10, v9 ; 7E145909 v_mul_f32_e32 v11, v1, v1 ; 10160301 v_mac_f32_e32 v11, v3, v3 ; 3E160703 v_mac_f32_e32 v11, v4, v4 ; 3E160904 v_rsq_clamp_f32_e32 v11, v11 ; 7E16590B v_mul_f32_e32 v2, v10, v2 ; 1004050A v_mul_f32_e32 v7, v10, v7 ; 100E0F0A v_mul_f32_e32 v8, v10, v8 ; 1010110A v_mul_f32_e32 v1, v11, v1 ; 1002030B v_mul_f32_e32 v1, v1, v2 ; 10020501 v_mul_f32_e32 v2, v11, v3 ; 1004070B v_mac_f32_e32 v1, v2, v7 ; 3E020F02 v_mul_f32_e32 v2, v5, v5 ; 10040B05 v_mac_f32_e32 v2, v6, v6 ; 3E040D06 v_mac_f32_e32 v2, v0, v0 ; 3E040100 v_rsq_clamp_f32_e32 v2, v2 ; 7E045902 v_mul_f32_e64 v3, s15, s15 ; D2100003 00001E0F v_mac_f32_e64 v3, s16, s16 ; D23E0003 00002010 v_mac_f32_e64 v3, s0, s0 ; D23E0003 00000000 v_rsq_clamp_f32_e32 v3, v3 ; 7E065903 v_mul_f32_e32 v4, v11, v4 ; 1008090B v_mac_f32_e32 v1, v4, v8 ; 3E021104 v_mul_f32_e32 v4, v2, v5 ; 10080B02 v_mul_f32_e32 v5, s15, v3 ; 100A060F v_mul_f32_e32 v4, v5, v4 ; 10080905 v_mul_f32_e32 v5, v2, v6 ; 100A0D02 v_mul_f32_e32 v6, s16, v3 ; 100C0610 v_mac_f32_e32 v4, v6, v5 ; 3E080B06 v_mul_f32_e32 v0, v2, v0 ; 10000102 v_mul_f32_e32 v2, s0, v3 ; 10040600 v_mac_f32_e32 v4, v2, v0 ; 3E080102 v_add_f32_e32 v0, 2.0, v4 ; 060008F4 v_log_f32_e32 v0, v0 ; 7E004F00 v_mad_f32 v1, -v1, v1, 1.0 ; D2820001 23CA0301 v_rcp_f32_e32 v2, s11 ; 7E04540B v_sqrt_f32_e32 v1, v1 ; 7E026701 v_sqrt_f32_e32 v3, v9 ; 7E066709 v_mul_f32_e32 v1, v3, v1 ; 10020303 v_mad_f32 v1, v1, v2, -1.0 ; D2820001 03CE0501 v_mul_legacy_f32_e32 v0, 0x3fa66666, v0 ; 0E0000FF 3FA66666 v_exp_f32_e32 v0, v0 ; 7E004B00 v_mad_f32 v1, s9, v0, -|v1| ; D2820401 84060009 v_mul_f32_e32 v2, s9, v0 ; 10040009 v_rcp_f32_e32 v2, v2 ; 7E045502 v_mul_f32_e32 v3, s12, v0 ; 1006000C v_mul_f32_e32 v4, s13, v0 ; 1008000D v_mul_f32_e32 v0, s14, v0 ; 1000000E v_mul_f32_e32 v1, v2, v1 ; 10020302 v_mul_f32_e32 v2, 0.5, v3 ; 100406F0 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_log_f32_e32 v1, v1 ; 7E024F01 v_mul_f32_e32 v3, 0.5, v4 ; 100608F0 v_cvt_pkrtz_f16_f32_e32 v2, v2, v3 ; 5E040702 v_mul_f32_e32 v0, 0.5, v0 ; 100000F0 v_mul_legacy_f32_e32 v1, 0x40400000, v1 ; 0E0202FF 40400000 v_exp_f32_e32 v1, v1 ; 7E024B01 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 exp 15, 0, 1, 1, 1, v2, v0, v2, v0 ; F8001C0F 00020002 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 444 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..5] DCL TEMP[0..2], LOCAL 0: MUL TEMP[0], CONST[2], IN[0].xxxx 1: MAD TEMP[0], CONST[3], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[4], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[5], IN[0].wwww, TEMP[0] 4: MAD TEMP[1].xy, IN[2].xyyy, CONST[1].xyyy, CONST[1].zwww 5: MAD TEMP[2].x, TEMP[0].zzzz, CONST[0].zzzz, CONST[0].wwww 6: MOV TEMP[1].z, TEMP[2].xxxx 7: MOV OUT[2], TEMP[1] 8: MOV OUT[0], TEMP[0] 9: MOV OUT[1], IN[1] 10: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %35 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0 %37 = add i32 %5, %7 %38 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %36, i32 0, i32 %37) %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = extractelement <4 x float> %38, i32 2 %42 = extractelement <4 x float> %38, i32 3 %43 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0 %45 = add i32 %5, %7 %46 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %45) %47 = extractelement <4 x float> %46, i32 0 %48 = extractelement <4 x float> %46, i32 1 %49 = extractelement <4 x float> %46, i32 2 %50 = extractelement <4 x float> %46, i32 3 %51 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %52 = load <16 x i8>, <16 x i8> addrspace(2)* %51, align 16, !tbaa !0 %53 = add i32 %5, %7 %54 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %52, i32 0, i32 %53) %55 = extractelement <4 x float> %54, i32 0 %56 = extractelement <4 x float> %54, i32 1 %57 = fmul float %19, %39 %58 = fmul float %20, %39 %59 = fmul float %21, %39 %60 = fmul float %22, %39 %61 = fmul float %23, %40 %62 = fadd float %61, %57 %63 = fmul float %24, %40 %64 = fadd float %63, %58 %65 = fmul float %25, %40 %66 = fadd float %65, %59 %67 = fmul float %26, %40 %68 = fadd float %67, %60 %69 = fmul float %27, %41 %70 = fadd float %69, %62 %71 = fmul float %28, %41 %72 = fadd float %71, %64 %73 = fmul float %29, %41 %74 = fadd float %73, %66 %75 = fmul float %30, %41 %76 = fadd float %75, %68 %77 = fmul float %31, %42 %78 = fadd float %77, %70 %79 = fmul float %32, %42 %80 = fadd float %79, %72 %81 = fmul float %33, %42 %82 = fadd float %81, %74 %83 = fmul float %34, %42 %84 = fadd float %83, %76 %85 = fmul float %55, %15 %86 = fadd float %85, %17 %87 = fmul float %56, %16 %88 = fadd float %87, %18 %89 = fmul float %82, %13 %90 = fadd float %89, %14 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %47, float %48, float %49, float %50) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %86, float %88, float %90, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %78, float %80, float %82, float %84) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s16, s[0:3], 0x7 ; C2080107 buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 buffer_load_format_xyzw v[9:12], v0, s[8:11], 0 idxen ; E00C2000 80020900 s_buffer_load_dword s5, s[0:3], 0x9 ; C2028109 s_buffer_load_dword s6, s[0:3], 0xa ; C203010A s_buffer_load_dword s7, s[0:3], 0xb ; C203810B s_buffer_load_dword s8, s[0:3], 0xc ; C204010C s_buffer_load_dword s9, s[0:3], 0xd ; C204810D s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s16 ; 7E000210 s_buffer_load_dword s10, s[0:3], 0xe ; C205010E s_buffer_load_dword s11, s[0:3], 0xf ; C205810F s_buffer_load_dword s12, s[0:3], 0x10 ; C2060110 s_buffer_load_dword s13, s[0:3], 0x2 ; C2068102 s_buffer_load_dword s14, s[0:3], 0x6 ; C2070106 s_buffer_load_dword s15, s[0:3], 0x4 ; C2078104 s_buffer_load_dword s16, s[0:3], 0x5 ; C2080105 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v11, s14 ; 7E16020E v_mac_f32_e32 v11, s15, v9 ; 3E16120F v_mac_f32_e32 v0, s16, v10 ; 3E001410 s_buffer_load_dword s14, s[0:3], 0x3 ; C2070103 s_buffer_load_dword s15, s[0:3], 0x11 ; C2078111 s_buffer_load_dword s16, s[0:3], 0x12 ; C2080112 s_buffer_load_dword s17, s[0:3], 0x13 ; C2088113 s_buffer_load_dword s18, s[0:3], 0x14 ; C2090114 s_buffer_load_dword s19, s[0:3], 0x15 ; C2098115 s_buffer_load_dword s20, s[0:3], 0x16 ; C20A0116 s_buffer_load_dword s0, s[0:3], 0x17 ; C2000117 v_mul_f32_e32 v9, s4, v1 ; 10120204 v_mac_f32_e32 v9, s8, v2 ; 3E120408 v_mul_f32_e32 v10, s5, v1 ; 10140205 v_mac_f32_e32 v10, s9, v2 ; 3E140409 v_mul_f32_e32 v12, s6, v1 ; 10180206 v_mac_f32_e32 v12, s10, v2 ; 3E18040A v_mul_f32_e32 v1, s7, v1 ; 10020207 v_mac_f32_e32 v1, s11, v2 ; 3E02040B v_mac_f32_e32 v9, s12, v3 ; 3E12060C s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v10, s15, v3 ; 3E14060F v_mac_f32_e32 v12, s16, v3 ; 3E180610 v_mac_f32_e32 v1, s17, v3 ; 3E020611 v_mac_f32_e32 v9, s18, v4 ; 3E120812 v_mac_f32_e32 v10, s19, v4 ; 3E140813 v_mac_f32_e32 v12, s20, v4 ; 3E180814 v_mac_f32_e32 v1, s0, v4 ; 3E020800 exp 15, 32, 0, 0, 0, v5, v6, v7, v8 ; F800020F 08070605 v_mov_b32_e32 v2, s14 ; 7E04020E v_mac_f32_e32 v2, s13, v12 ; 3E04180D v_mov_b32_e32 v3, 0 ; 7E060280 exp 15, 33, 0, 0, 0, v11, v0, v2, v3 ; F800021F 0302000B exp 15, 12, 0, 1, 0, v9, v10, v12, v1 ; F80008CF 010C0A09 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 16 Code Size: 268 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xy, IN[1].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MUL TEMP[0], IN[0], TEMP[0] 3: MOV TEMP[1].w, TEMP[0].wwww 4: MUL TEMP[1].xyz, TEMP[0].xyzz, TEMP[0].wwww 5: MOV_SAT TEMP[0].x, IN[1].zzzz 6: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[0].xxxx 7: MOV OUT[0], TEMP[1] 8: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %32 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %33 = bitcast float %30 to i32 %34 = bitcast float %31 to i32 %35 = insertelement <2 x i32> undef, i32 %33, i32 0 %36 = insertelement <2 x i32> %35, i32 %34, i32 1 %37 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %36, <32 x i8> %23, <16 x i8> %25, i32 2) %38 = extractelement <4 x float> %37, i32 0 %39 = extractelement <4 x float> %37, i32 1 %40 = extractelement <4 x float> %37, i32 2 %41 = extractelement <4 x float> %37, i32 3 %42 = fmul float %26, %38 %43 = fmul float %27, %39 %44 = fmul float %28, %40 %45 = fmul float %29, %41 %46 = fmul float %42, %45 %47 = fmul float %43, %45 %48 = fmul float %44, %45 %49 = call float @llvm.AMDIL.clamp.(float %32, float 0.000000e+00, float 1.000000e+00) %50 = fmul float %46, %49 %51 = fmul float %47, %49 %52 = fmul float %48, %49 %53 = call i32 @llvm.SI.packf16(float %50, float %51) %54 = bitcast i32 %53 to float %55 = call i32 @llvm.SI.packf16(float %52, float %45) %56 = bitcast i32 %55 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %54, float %56, float %54, float %56) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 v_interp_p1_f32 v0, v0, 2, 1, [m0] ; C8000600 v_interp_p2_f32 v0, [v0], v1, 2, 1, [m0] ; C8010601 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[6:9], 15, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[8:15], s[0:3] ; F0800F00 00020606 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v1, v6, v2 ; 10020506 v_mul_f32_e32 v2, v7, v3 ; 10040707 v_mul_f32_e32 v3, v8, v4 ; 10060908 v_mul_f32_e32 v4, v9, v5 ; 10080B09 v_mul_f32_e32 v1, v4, v1 ; 10020304 v_mul_f32_e32 v2, v4, v2 ; 10040504 v_mul_f32_e32 v3, v4, v3 ; 10060704 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_mul_f32_e32 v1, v0, v1 ; 10020300 v_mul_f32_e32 v2, v0, v2 ; 10040500 v_mul_f32_e32 v0, v0, v3 ; 10000700 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_cvt_pkrtz_f16_f32_e32 v0, v0, v4 ; 5E000900 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 12 Code Size: 156 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL CONST[0..4] DCL TEMP[0..4], LOCAL IMM[0] FLT32 { 0.0000, -0.5000, 0.5000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].xxxx 1: ADD TEMP[0].xy, IN[1].xyyy, CONST[0].xyyy 2: MOV TEMP[1].zw, IMM[0].xxxx 3: MAD TEMP[1].xy, CONST[0].xyyy, IMM[0].yyyy, IN[1].xyyy 4: MOV TEMP[2].zw, IMM[0].xxxx 5: MAD TEMP[2].xy, CONST[0].xyyy, IMM[0].zyyy, IN[1].xyyy 6: MOV TEMP[3].zw, IMM[0].xxxx 7: MAD TEMP[3].xy, CONST[0].xyyy, IMM[0].yzzz, IN[1].xyyy 8: MUL TEMP[4], CONST[1], IN[0].xxxx 9: MAD TEMP[4], CONST[2], IN[0].yyyy, TEMP[4] 10: MAD TEMP[4], CONST[3], IN[0].zzzz, TEMP[4] 11: MAD TEMP[4], CONST[4], IN[0].wwww, TEMP[4] 12: MOV OUT[1], TEMP[0] 13: MOV OUT[2], TEMP[1] 14: MOV OUT[3], TEMP[2] 15: MOV OUT[4], TEMP[3] 16: MOV OUT[0], TEMP[4] 17: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %31 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %32 = load <16 x i8>, <16 x i8> addrspace(2)* %31, align 16, !tbaa !0 %33 = add i32 %5, %7 %34 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %32, i32 0, i32 %33) %35 = extractelement <4 x float> %34, i32 0 %36 = extractelement <4 x float> %34, i32 1 %37 = extractelement <4 x float> %34, i32 2 %38 = extractelement <4 x float> %34, i32 3 %39 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %40 = load <16 x i8>, <16 x i8> addrspace(2)* %39, align 16, !tbaa !0 %41 = add i32 %5, %7 %42 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %40, i32 0, i32 %41) %43 = extractelement <4 x float> %42, i32 0 %44 = extractelement <4 x float> %42, i32 1 %45 = fadd float %43, %13 %46 = fadd float %44, %14 %47 = fmul float %13, -5.000000e-01 %48 = fadd float %47, %43 %49 = fmul float %14, -5.000000e-01 %50 = fadd float %49, %44 %51 = fmul float %13, 5.000000e-01 %52 = fadd float %51, %43 %53 = fmul float %14, -5.000000e-01 %54 = fadd float %53, %44 %55 = fmul float %13, -5.000000e-01 %56 = fadd float %55, %43 %57 = fmul float %14, 5.000000e-01 %58 = fadd float %57, %44 %59 = fmul float %15, %35 %60 = fmul float %16, %35 %61 = fmul float %17, %35 %62 = fmul float %18, %35 %63 = fmul float %19, %36 %64 = fadd float %63, %59 %65 = fmul float %20, %36 %66 = fadd float %65, %60 %67 = fmul float %21, %36 %68 = fadd float %67, %61 %69 = fmul float %22, %36 %70 = fadd float %69, %62 %71 = fmul float %23, %37 %72 = fadd float %71, %64 %73 = fmul float %24, %37 %74 = fadd float %73, %66 %75 = fmul float %25, %37 %76 = fadd float %75, %68 %77 = fmul float %26, %37 %78 = fadd float %77, %70 %79 = fmul float %27, %38 %80 = fadd float %79, %72 %81 = fmul float %28, %38 %82 = fadd float %81, %74 %83 = fmul float %29, %38 %84 = fadd float %83, %76 %85 = fmul float %30, %38 %86 = fadd float %85, %78 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %45, float %46, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %48, float %50, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %52, float %54, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %56, float %58, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %80, float %82, float %84, float %86) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 v_mov_b32_e32 v0, 0 ; 7E000280 s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104 s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105 s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v7, s4, v5 ; 060E0A04 v_add_f32_e32 v8, s5, v6 ; 06100C05 exp 15, 32, 0, 0, 0, v7, v8, v0, v0 ; F800020F 00000807 s_waitcnt expcnt(0) ; BF8C070F v_mad_f32 v7, -0.5, s4, v5 ; D2820007 041408F1 v_mad_f32 v5, 0.5, s4, v5 ; D2820005 041408F0 v_mad_f32 v8, -0.5, s5, v6 ; D2820008 04180AF1 exp 15, 33, 0, 0, 0, v7, v8, v0, v0 ; F800021F 00000807 exp 15, 34, 0, 0, 0, v5, v8, v0, v0 ; F800022F 00000805 v_mac_f32_e64 v6, 0.5, s5 ; D23E0006 00000AF0 exp 15, 35, 0, 0, 0, v7, v6, v0, v0 ; F800023F 00000607 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_buffer_load_dword s5, s[0:3], 0x8 ; C2028108 s_buffer_load_dword s9, s[0:3], 0x9 ; C2048109 s_buffer_load_dword s10, s[0:3], 0xa ; C205010A s_buffer_load_dword s11, s[0:3], 0xb ; C205810B s_buffer_load_dword s12, s[0:3], 0xc ; C206010C s_buffer_load_dword s13, s[0:3], 0xd ; C206810D s_buffer_load_dword s14, s[0:3], 0xe ; C207010E s_buffer_load_dword s15, s[0:3], 0xf ; C207810F s_buffer_load_dword s16, s[0:3], 0x10 ; C2080110 s_buffer_load_dword s17, s[0:3], 0x11 ; C2088111 s_buffer_load_dword s18, s[0:3], 0x12 ; C2090112 s_buffer_load_dword s0, s[0:3], 0x13 ; C2000113 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s6, v1 ; 10000206 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v0, s5, v2 ; 3E000405 v_mul_f32_e32 v5, s7, v1 ; 100A0207 v_mac_f32_e32 v5, s9, v2 ; 3E0A0409 v_mul_f32_e32 v6, s8, v1 ; 100C0208 v_mac_f32_e32 v6, s10, v2 ; 3E0C040A v_mul_f32_e32 v1, s4, v1 ; 10020204 v_mac_f32_e32 v1, s11, v2 ; 3E02040B v_mac_f32_e32 v0, s12, v3 ; 3E00060C v_mac_f32_e32 v5, s13, v3 ; 3E0A060D v_mac_f32_e32 v6, s14, v3 ; 3E0C060E v_mac_f32_e32 v1, s15, v3 ; 3E02060F v_mac_f32_e32 v0, s16, v4 ; 3E000810 v_mac_f32_e32 v5, s17, v4 ; 3E0A0811 v_mac_f32_e32 v6, s18, v4 ; 3E0C0812 v_mac_f32_e32 v1, s0, v4 ; 3E020800 exp 15, 12, 0, 1, 0, v0, v5, v6, v1 ; F80008CF 01060500 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 280 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.2500, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MOV TEMP[1].xy, IN[1].xyyy 3: TEX TEMP[1], TEMP[1], SAMP[0], 2D 4: ADD TEMP[0], TEMP[0], TEMP[1] 5: MOV TEMP[1].xy, IN[2].xyyy 6: TEX TEMP[1], TEMP[1], SAMP[0], 2D 7: MOV TEMP[2].xy, IN[3].xyyy 8: TEX TEMP[2], TEMP[2], SAMP[0], 2D 9: ADD TEMP[1], TEMP[1], TEMP[2] 10: ADD TEMP[0], TEMP[0], TEMP[1] 11: MUL TEMP[0], TEMP[0], IMM[0].xxxx 12: MAX TEMP[0], TEMP[0], IMM[0].yyyy 13: MOV OUT[0], TEMP[0] 14: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %23 = load <8 x i32>, <8 x i32> addrspace(2)* %22, align 32, !tbaa !0 %24 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %25 = load <4 x i32>, <4 x i32> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %32 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %34 = bitcast float %26 to i32 %35 = bitcast float %27 to i32 %36 = insertelement <2 x i32> undef, i32 %34, i32 0 %37 = insertelement <2 x i32> %36, i32 %35, i32 1 %38 = bitcast <8 x i32> %23 to <32 x i8> %39 = bitcast <4 x i32> %25 to <16 x i8> %40 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %37, <32 x i8> %38, <16 x i8> %39, i32 2) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = bitcast float %28 to i32 %46 = bitcast float %29 to i32 %47 = insertelement <2 x i32> undef, i32 %45, i32 0 %48 = insertelement <2 x i32> %47, i32 %46, i32 1 %49 = bitcast <8 x i32> %23 to <32 x i8> %50 = bitcast <4 x i32> %25 to <16 x i8> %51 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %48, <32 x i8> %49, <16 x i8> %50, i32 2) %52 = extractelement <4 x float> %51, i32 0 %53 = extractelement <4 x float> %51, i32 1 %54 = extractelement <4 x float> %51, i32 2 %55 = extractelement <4 x float> %51, i32 3 %56 = fadd float %41, %52 %57 = fadd float %42, %53 %58 = fadd float %43, %54 %59 = fadd float %44, %55 %60 = bitcast float %30 to i32 %61 = bitcast float %31 to i32 %62 = insertelement <2 x i32> undef, i32 %60, i32 0 %63 = insertelement <2 x i32> %62, i32 %61, i32 1 %64 = bitcast <8 x i32> %23 to <32 x i8> %65 = bitcast <4 x i32> %25 to <16 x i8> %66 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %63, <32 x i8> %64, <16 x i8> %65, i32 2) %67 = extractelement <4 x float> %66, i32 0 %68 = extractelement <4 x float> %66, i32 1 %69 = extractelement <4 x float> %66, i32 2 %70 = extractelement <4 x float> %66, i32 3 %71 = bitcast float %32 to i32 %72 = bitcast float %33 to i32 %73 = insertelement <2 x i32> undef, i32 %71, i32 0 %74 = insertelement <2 x i32> %73, i32 %72, i32 1 %75 = bitcast <8 x i32> %23 to <32 x i8> %76 = bitcast <4 x i32> %25 to <16 x i8> %77 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %74, <32 x i8> %75, <16 x i8> %76, i32 2) %78 = extractelement <4 x float> %77, i32 0 %79 = extractelement <4 x float> %77, i32 1 %80 = extractelement <4 x float> %77, i32 2 %81 = extractelement <4 x float> %77, i32 3 %82 = fadd float %67, %78 %83 = fadd float %68, %79 %84 = fadd float %69, %80 %85 = fadd float %70, %81 %86 = fadd float %56, %82 %87 = fadd float %57, %83 %88 = fadd float %58, %84 %89 = fadd float %59, %85 %90 = fmul float %86, 2.500000e-01 %91 = fmul float %87, 2.500000e-01 %92 = fmul float %88, 2.500000e-01 %93 = fmul float %89, 2.500000e-01 %94 = call float @llvm.maxnum.f32(float %90, float 0.000000e+00) %95 = call float @llvm.maxnum.f32(float %91, float 0.000000e+00) %96 = call float @llvm.maxnum.f32(float %92, float 0.000000e+00) %97 = call float @llvm.maxnum.f32(float %93, float 0.000000e+00) %98 = call i32 @llvm.SI.packf16(float %94, float %95) %99 = bitcast i32 %98 to float %100 = call i32 @llvm.SI.packf16(float %96, float %97) %101 = bitcast i32 %100 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %99, float %101, float %99, float %101) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400 v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401 v_interp_p1_f32 v5, v0, 1, 1, [m0] ; C8140500 s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p2_f32 v5, [v5], v1, 1, 1, [m0] ; C8150501 v_interp_p1_f32 v6, v0, 0, 2, [m0] ; C8180800 v_interp_p2_f32 v6, [v6], v1, 0, 2, [m0] ; C8190801 v_interp_p1_f32 v7, v0, 1, 2, [m0] ; C81C0900 v_interp_p2_f32 v7, [v7], v1, 1, 2, [m0] ; C81D0901 v_interp_p1_f32 v8, v0, 0, 3, [m0] ; C8200C00 v_interp_p2_f32 v8, [v8], v1, 0, 3, [m0] ; C8210C01 v_interp_p1_f32 v9, v0, 1, 3, [m0] ; C8240D00 v_interp_p2_f32 v9, [v9], v1, 1, 3, [m0] ; C8250D01 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[8:15], s[0:3] ; F0800F00 00020002 image_sample v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[8:15], s[0:3] ; F0800F00 00020A04 image_sample v[4:7], 15, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[8:15], s[0:3] ; F0800F00 00020406 image_sample v[14:17], 15, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[8:15], s[0:3] ; F0800F00 00020E08 s_waitcnt vmcnt(2) ; BF8C0772 v_add_f32_e32 v0, v10, v0 ; 0600010A v_add_f32_e32 v1, v11, v1 ; 0602030B v_add_f32_e32 v2, v12, v2 ; 0604050C v_add_f32_e32 v3, v13, v3 ; 0606070D s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v4, v14, v4 ; 0608090E v_add_f32_e32 v5, v15, v5 ; 060A0B0F v_add_f32_e32 v6, v16, v6 ; 060C0D10 v_add_f32_e32 v7, v17, v7 ; 060E0F11 v_add_f32_e32 v0, v4, v0 ; 06000104 v_add_f32_e32 v1, v5, v1 ; 06020305 v_add_f32_e32 v2, v6, v2 ; 06040506 v_add_f32_e32 v3, v7, v3 ; 06060707 v_mov_b32_e32 v4, 0x3e800000 ; 7E0802FF 3E800000 v_mul_f32_e32 v0, v4, v0 ; 10000104 v_mul_f32_e32 v1, v4, v1 ; 10020304 v_mul_f32_e32 v2, v4, v2 ; 10040504 v_mul_f32_e32 v3, v4, v3 ; 10060704 v_max_f32_e32 v0, 0, v0 ; 20000080 v_max_f32_e32 v1, 0, v1 ; 20020280 v_max_f32_e32 v2, 0, v2 ; 20040480 v_max_f32_e32 v3, 0, v3 ; 20060680 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 20 Code Size: 232 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..5] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].xxxx 1: MOV TEMP[0].xy, IN[1].xyxx 2: MOV TEMP[1].zw, IMM[0].xxxx 3: MUL TEMP[2].xy, CONST[0].xyyy, IMM[0].yxxx 4: MUL TEMP[1].xy, TEMP[2].xyyy, CONST[1].xxxx 5: MUL TEMP[2], CONST[2], IN[0].xxxx 6: MAD TEMP[2], CONST[3], IN[0].yyyy, TEMP[2] 7: MAD TEMP[2], CONST[4], IN[0].zzzz, TEMP[2] 8: MAD TEMP[2], CONST[5], IN[0].wwww, TEMP[2] 9: MOV OUT[1], TEMP[0] 10: MOV OUT[2], TEMP[1] 11: MOV OUT[0], TEMP[2] 12: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %32 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %33 = load <16 x i8>, <16 x i8> addrspace(2)* %32, align 16, !tbaa !0 %34 = add i32 %5, %7 %35 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %33, i32 0, i32 %34) %36 = extractelement <4 x float> %35, i32 0 %37 = extractelement <4 x float> %35, i32 1 %38 = extractelement <4 x float> %35, i32 2 %39 = extractelement <4 x float> %35, i32 3 %40 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %41 = load <16 x i8>, <16 x i8> addrspace(2)* %40, align 16, !tbaa !0 %42 = add i32 %5, %7 %43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %41, i32 0, i32 %42) %44 = extractelement <4 x float> %43, i32 0 %45 = extractelement <4 x float> %43, i32 1 %46 = fmul float %13, 0.000000e+00 %47 = fmul float %46, %15 %48 = fmul float %14, %15 %49 = fmul float %16, %36 %50 = fmul float %17, %36 %51 = fmul float %18, %36 %52 = fmul float %19, %36 %53 = fmul float %20, %37 %54 = fadd float %53, %49 %55 = fmul float %21, %37 %56 = fadd float %55, %50 %57 = fmul float %22, %37 %58 = fadd float %57, %51 %59 = fmul float %23, %37 %60 = fadd float %59, %52 %61 = fmul float %24, %38 %62 = fadd float %61, %54 %63 = fmul float %25, %38 %64 = fadd float %63, %56 %65 = fmul float %26, %38 %66 = fadd float %65, %58 %67 = fmul float %27, %38 %68 = fadd float %67, %60 %69 = fmul float %28, %39 %70 = fadd float %69, %62 %71 = fmul float %29, %39 %72 = fadd float %71, %64 %73 = fmul float %30, %39 %74 = fadd float %73, %66 %75 = fmul float %31, %39 %76 = fadd float %75, %68 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %44, float %45, float 1.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %47, float %48, float 1.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %70, float %72, float %74, float %76) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[0:3], 0x0 ; C2060100 buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 buffer_load_format_xyzw v[5:8], v0, s[8:11], 0 idxen ; E00C2000 80020500 s_buffer_load_dword s5, s[0:3], 0x4 ; C2028104 s_buffer_load_dword s6, s[0:3], 0x8 ; C2030108 s_buffer_load_dword s7, s[0:3], 0x9 ; C2038109 s_buffer_load_dword s8, s[0:3], 0xa ; C204010A s_buffer_load_dword s9, s[0:3], 0xb ; C204810B s_buffer_load_dword s10, s[0:3], 0xc ; C205010C s_buffer_load_dword s11, s[0:3], 0xd ; C205810D s_buffer_load_dword s13, s[0:3], 0xe ; C206810E s_buffer_load_dword s14, s[0:3], 0xf ; C207010F s_buffer_load_dword s15, s[0:3], 0x10 ; C2078110 s_buffer_load_dword s16, s[0:3], 0x11 ; C2080111 s_buffer_load_dword s17, s[0:3], 0x12 ; C2088112 s_buffer_load_dword s18, s[0:3], 0x13 ; C2090113 s_buffer_load_dword s19, s[0:3], 0x14 ; C2098114 s_buffer_load_dword s20, s[0:3], 0x15 ; C20A0115 s_buffer_load_dword s21, s[0:3], 0x16 ; C20A8116 s_buffer_load_dword s0, s[0:3], 0x17 ; C2000117 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v0, s6, v1 ; 10000206 v_mac_f32_e32 v0, s10, v2 ; 3E00040A v_mul_f32_e32 v7, s7, v1 ; 100E0207 v_mac_f32_e32 v7, s11, v2 ; 3E0E040B v_mul_f32_e32 v8, s8, v1 ; 10100208 v_mac_f32_e32 v8, s13, v2 ; 3E10040D v_mul_f32_e32 v1, s9, v1 ; 10020209 v_mac_f32_e32 v1, s14, v2 ; 3E02040E v_mac_f32_e32 v0, s15, v3 ; 3E00060F v_mac_f32_e32 v7, s16, v3 ; 3E0E0610 v_mac_f32_e32 v8, s17, v3 ; 3E100611 v_mac_f32_e32 v1, s18, v3 ; 3E020612 v_mac_f32_e32 v0, s19, v4 ; 3E000813 v_mac_f32_e32 v7, s20, v4 ; 3E0E0814 v_mac_f32_e32 v8, s21, v4 ; 3E100815 v_mac_f32_e32 v1, s0, v4 ; 3E020800 v_mov_b32_e32 v2, 1.0 ; 7E0402F2 exp 15, 32, 0, 0, 0, v5, v6, v2, v2 ; F800020F 02020605 v_mul_f32_e64 v3, 0, s12 ; D2100003 00001880 v_mul_f32_e32 v3, s5, v3 ; 10060605 v_mov_b32_e32 v4, s5 ; 7E080205 v_mul_f32_e32 v4, s4, v4 ; 10080804 exp 15, 33, 0, 0, 0, v3, v4, v2, v2 ; F800021F 02020403 exp 15, 12, 0, 1, 0, v0, v7, v8, v1 ; F80008CF 01080700 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 232 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 3.0000, 0.0205, 0.0000, 0.0855} IMM[1] FLT32 { 0.2320, 0.0000, 0.3240, 1.0000} 0: MUL TEMP[0].xy, IN[1].xyyy, IMM[0].xxxx 1: ADD TEMP[0].xy, IN[0].xyyy, -TEMP[0].xyyy 2: MOV TEMP[1].xy, TEMP[0].xyyy 3: TEX TEMP[1], TEMP[1], SAMP[0], 2D 4: MUL TEMP[1], TEMP[1], IMM[0].yyyz 5: ADD TEMP[0].xy, TEMP[0].xyyy, IN[1].xyyy 6: MOV TEMP[2].xy, TEMP[0].xyyy 7: TEX TEMP[2], TEMP[2], SAMP[0], 2D 8: MAD TEMP[1], TEMP[2], IMM[0].wwwz, TEMP[1] 9: ADD TEMP[0].xy, TEMP[0].xyyy, IN[1].xyyy 10: MOV TEMP[2].xy, TEMP[0].xyyy 11: TEX TEMP[2], TEMP[2], SAMP[0], 2D 12: MAD TEMP[1], TEMP[2], IMM[1].xxxy, TEMP[1] 13: ADD TEMP[0].xy, TEMP[0].xyyy, IN[1].xyyy 14: MOV TEMP[2].xy, TEMP[0].xyyy 15: TEX TEMP[2], TEMP[2], SAMP[0], 2D 16: MAD TEMP[1], TEMP[2], IMM[1].zzzw, TEMP[1] 17: ADD TEMP[0].xy, TEMP[0].xyyy, IN[1].xyyy 18: MOV TEMP[2].xy, TEMP[0].xyyy 19: TEX TEMP[2], TEMP[2], SAMP[0], 2D 20: MAD TEMP[1], TEMP[2], IMM[1].xxxy, TEMP[1] 21: ADD TEMP[0].xy, TEMP[0].xyyy, IN[1].xyyy 22: MOV TEMP[2].xy, TEMP[0].xyyy 23: TEX TEMP[2], TEMP[2], SAMP[0], 2D 24: MAD TEMP[1], TEMP[2], IMM[0].wwwz, TEMP[1] 25: ADD TEMP[0].xy, TEMP[0].xyyy, IN[1].xyyy 26: MOV TEMP[0].xy, TEMP[0].xyyy 27: TEX TEMP[0], TEMP[0], SAMP[0], 2D 28: MAD TEMP[1], TEMP[0], IMM[0].yyyz, TEMP[1] 29: MOV OUT[0], TEMP[1] 30: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %23 = load <8 x i32>, <8 x i32> addrspace(2)* %22, align 32, !tbaa !0 %24 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %25 = load <4 x i32>, <4 x i32> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %30 = fmul float %28, 3.000000e+00 %31 = fmul float %29, 3.000000e+00 %32 = fsub float %26, %30 %33 = fsub float %27, %31 %34 = bitcast float %32 to i32 %35 = bitcast float %33 to i32 %36 = insertelement <2 x i32> undef, i32 %34, i32 0 %37 = insertelement <2 x i32> %36, i32 %35, i32 1 %38 = bitcast <8 x i32> %23 to <32 x i8> %39 = bitcast <4 x i32> %25 to <16 x i8> %40 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %37, <32 x i8> %38, <16 x i8> %39, i32 2) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = fmul float %41, 0x3F94FDF3C0000000 %46 = fmul float %42, 0x3F94FDF3C0000000 %47 = fmul float %43, 0x3F94FDF3C0000000 %48 = fmul float %44, 0.000000e+00 %49 = fadd float %32, %28 %50 = fadd float %33, %29 %51 = bitcast float %49 to i32 %52 = bitcast float %50 to i32 %53 = insertelement <2 x i32> undef, i32 %51, i32 0 %54 = insertelement <2 x i32> %53, i32 %52, i32 1 %55 = bitcast <8 x i32> %23 to <32 x i8> %56 = bitcast <4 x i32> %25 to <16 x i8> %57 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %54, <32 x i8> %55, <16 x i8> %56, i32 2) %58 = extractelement <4 x float> %57, i32 0 %59 = extractelement <4 x float> %57, i32 1 %60 = extractelement <4 x float> %57, i32 2 %61 = extractelement <4 x float> %57, i32 3 %62 = fmul float %58, 0x3FB5E35400000000 %63 = fadd float %62, %45 %64 = fmul float %59, 0x3FB5E35400000000 %65 = fadd float %64, %46 %66 = fmul float %60, 0x3FB5E35400000000 %67 = fadd float %66, %47 %68 = fmul float %61, 0.000000e+00 %69 = fadd float %68, %48 %70 = fadd float %49, %28 %71 = fadd float %50, %29 %72 = bitcast float %70 to i32 %73 = bitcast float %71 to i32 %74 = insertelement <2 x i32> undef, i32 %72, i32 0 %75 = insertelement <2 x i32> %74, i32 %73, i32 1 %76 = bitcast <8 x i32> %23 to <32 x i8> %77 = bitcast <4 x i32> %25 to <16 x i8> %78 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %75, <32 x i8> %76, <16 x i8> %77, i32 2) %79 = extractelement <4 x float> %78, i32 0 %80 = extractelement <4 x float> %78, i32 1 %81 = extractelement <4 x float> %78, i32 2 %82 = extractelement <4 x float> %78, i32 3 %83 = fmul float %79, 0x3FCDB22D00000000 %84 = fadd float %83, %63 %85 = fmul float %80, 0x3FCDB22D00000000 %86 = fadd float %85, %65 %87 = fmul float %81, 0x3FCDB22D00000000 %88 = fadd float %87, %67 %89 = fmul float %82, 0.000000e+00 %90 = fadd float %89, %69 %91 = fadd float %70, %28 %92 = fadd float %71, %29 %93 = bitcast float %91 to i32 %94 = bitcast float %92 to i32 %95 = insertelement <2 x i32> undef, i32 %93, i32 0 %96 = insertelement <2 x i32> %95, i32 %94, i32 1 %97 = bitcast <8 x i32> %23 to <32 x i8> %98 = bitcast <4 x i32> %25 to <16 x i8> %99 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %96, <32 x i8> %97, <16 x i8> %98, i32 2) %100 = extractelement <4 x float> %99, i32 0 %101 = extractelement <4 x float> %99, i32 1 %102 = extractelement <4 x float> %99, i32 2 %103 = extractelement <4 x float> %99, i32 3 %104 = fmul float %100, 0x3FD4BC6A80000000 %105 = fadd float %104, %84 %106 = fmul float %101, 0x3FD4BC6A80000000 %107 = fadd float %106, %86 %108 = fmul float %102, 0x3FD4BC6A80000000 %109 = fadd float %108, %88 %110 = fadd float %103, %90 %111 = fadd float %91, %28 %112 = fadd float %92, %29 %113 = bitcast float %111 to i32 %114 = bitcast float %112 to i32 %115 = insertelement <2 x i32> undef, i32 %113, i32 0 %116 = insertelement <2 x i32> %115, i32 %114, i32 1 %117 = bitcast <8 x i32> %23 to <32 x i8> %118 = bitcast <4 x i32> %25 to <16 x i8> %119 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %116, <32 x i8> %117, <16 x i8> %118, i32 2) %120 = extractelement <4 x float> %119, i32 0 %121 = extractelement <4 x float> %119, i32 1 %122 = extractelement <4 x float> %119, i32 2 %123 = extractelement <4 x float> %119, i32 3 %124 = fmul float %120, 0x3FCDB22D00000000 %125 = fadd float %124, %105 %126 = fmul float %121, 0x3FCDB22D00000000 %127 = fadd float %126, %107 %128 = fmul float %122, 0x3FCDB22D00000000 %129 = fadd float %128, %109 %130 = fmul float %123, 0.000000e+00 %131 = fadd float %130, %110 %132 = fadd float %111, %28 %133 = fadd float %112, %29 %134 = bitcast float %132 to i32 %135 = bitcast float %133 to i32 %136 = insertelement <2 x i32> undef, i32 %134, i32 0 %137 = insertelement <2 x i32> %136, i32 %135, i32 1 %138 = bitcast <8 x i32> %23 to <32 x i8> %139 = bitcast <4 x i32> %25 to <16 x i8> %140 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %137, <32 x i8> %138, <16 x i8> %139, i32 2) %141 = extractelement <4 x float> %140, i32 0 %142 = extractelement <4 x float> %140, i32 1 %143 = extractelement <4 x float> %140, i32 2 %144 = extractelement <4 x float> %140, i32 3 %145 = fmul float %141, 0x3FB5E35400000000 %146 = fadd float %145, %125 %147 = fmul float %142, 0x3FB5E35400000000 %148 = fadd float %147, %127 %149 = fmul float %143, 0x3FB5E35400000000 %150 = fadd float %149, %129 %151 = fmul float %144, 0.000000e+00 %152 = fadd float %151, %131 %153 = fadd float %132, %28 %154 = fadd float %133, %29 %155 = bitcast float %153 to i32 %156 = bitcast float %154 to i32 %157 = insertelement <2 x i32> undef, i32 %155, i32 0 %158 = insertelement <2 x i32> %157, i32 %156, i32 1 %159 = bitcast <8 x i32> %23 to <32 x i8> %160 = bitcast <4 x i32> %25 to <16 x i8> %161 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %158, <32 x i8> %159, <16 x i8> %160, i32 2) %162 = extractelement <4 x float> %161, i32 0 %163 = extractelement <4 x float> %161, i32 1 %164 = extractelement <4 x float> %161, i32 2 %165 = extractelement <4 x float> %161, i32 3 %166 = fmul float %162, 0x3F94FDF3C0000000 %167 = fadd float %166, %146 %168 = fmul float %163, 0x3F94FDF3C0000000 %169 = fadd float %168, %148 %170 = fmul float %164, 0x3F94FDF3C0000000 %171 = fadd float %170, %150 %172 = fmul float %165, 0.000000e+00 %173 = fadd float %172, %152 %174 = call i32 @llvm.SI.packf16(float %167, float %169) %175 = bitcast i32 %174 to float %176 = call i32 @llvm.SI.packf16(float %171, float %173) %177 = bitcast i32 %176 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %175, float %177, float %175, float %177) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400 v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401 v_interp_p1_f32 v0, v0, 1, 1, [m0] ; C8000500 v_interp_p2_f32 v0, [v0], v1, 1, 1, [m0] ; C8010501 v_mov_b32_e32 v1, 0x40400000 ; 7E0202FF 40400000 v_mad_f32 v5, -v4, v1, v2 ; D2820005 240A0304 v_mad_f32 v6, -v0, v1, v3 ; D2820006 240E0300 v_mov_b32_e32 v1, 0x3ca7ef9e ; 7E0202FF 3CA7EF9E s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[7:10], 15, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[8:15], s[0:3] ; F0800F00 00020705 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v2, v1, v7 ; 10040F01 v_mul_f32_e32 v3, v1, v8 ; 10061101 v_mul_f32_e32 v7, v1, v9 ; 100E1301 v_mul_f32_e32 v8, 0, v10 ; 10101480 v_add_f32_e32 v9, v4, v5 ; 06120B04 v_add_f32_e32 v10, v0, v6 ; 06140D00 v_mov_b32_e32 v5, 0x3daf1aa0 ; 7E0A02FF 3DAF1AA0 image_sample v[11:14], 15, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[8:15], s[0:3] ; F0800F00 00020B09 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v2, v5, v11 ; 3E041705 v_mac_f32_e32 v3, v5, v12 ; 3E061905 v_mac_f32_e32 v7, v5, v13 ; 3E0E1B05 v_mac_f32_e32 v8, 0, v14 ; 3E101C80 v_add_f32_e32 v11, v4, v9 ; 06161304 v_add_f32_e32 v12, v0, v10 ; 06181500 v_mov_b32_e32 v6, 0x3e6d9168 ; 7E0C02FF 3E6D9168 image_sample v[13:16], 15, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[8:15], s[0:3] ; F0800F00 00020D0B s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v2, v6, v13 ; 3E041B06 v_mac_f32_e32 v3, v6, v14 ; 3E061D06 v_mac_f32_e32 v7, v6, v15 ; 3E0E1F06 v_mac_f32_e32 v8, 0, v16 ; 3E102080 v_add_f32_e32 v9, v4, v11 ; 06121704 v_add_f32_e32 v10, v0, v12 ; 06141900 v_mov_b32_e32 v11, 0x3ea5e354 ; 7E1602FF 3EA5E354 image_sample v[12:15], 15, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[8:15], s[0:3] ; F0800F00 00020C09 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v2, v11, v12 ; 3E04190B v_mac_f32_e32 v3, v11, v13 ; 3E061B0B v_mac_f32_e32 v7, v11, v14 ; 3E0E1D0B v_add_f32_e32 v8, v8, v15 ; 06101F08 v_add_f32_e32 v11, v4, v9 ; 06161304 v_add_f32_e32 v12, v0, v10 ; 06181500 v_add_f32_e32 v9, v4, v11 ; 06121704 v_add_f32_e32 v10, v0, v12 ; 06141900 v_add_f32_e32 v13, v4, v9 ; 061A1304 v_add_f32_e32 v14, v0, v10 ; 061C1500 image_sample v[15:18], 15, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[8:15], s[0:3] ; F0800F00 00020F0B image_sample v[9:12], 15, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[8:15], s[0:3] ; F0800F00 00020909 image_sample v[19:22], 15, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[8:15], s[0:3] ; F0800F00 0002130D s_waitcnt vmcnt(2) ; BF8C0772 v_mac_f32_e32 v2, v6, v15 ; 3E041F06 v_mac_f32_e32 v3, v6, v16 ; 3E062106 v_mac_f32_e32 v7, v6, v17 ; 3E0E2306 v_mac_f32_e32 v8, 0, v18 ; 3E102480 s_waitcnt vmcnt(1) ; BF8C0771 v_mac_f32_e32 v2, v5, v9 ; 3E041305 v_mac_f32_e32 v3, v5, v10 ; 3E061505 v_mac_f32_e32 v7, v5, v11 ; 3E0E1705 v_mac_f32_e32 v8, 0, v12 ; 3E101880 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v2, v1, v19 ; 3E042701 v_mac_f32_e32 v3, v1, v20 ; 3E062901 v_mac_f32_e32 v7, v1, v21 ; 3E0E2B01 v_mac_f32_e32 v8, 0, v22 ; 3E102C80 v_cvt_pkrtz_f16_f32_e32 v0, v2, v3 ; 5E000702 v_cvt_pkrtz_f16_f32_e32 v1, v7, v8 ; 5E021107 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 24 Code Size: 372 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..5] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].xxxx 1: MOV TEMP[0].xy, IN[1].xyxx 2: MOV TEMP[1].zw, IMM[0].xxxx 3: MUL TEMP[2].xy, CONST[0].xyyy, IMM[0].xyyy 4: MUL TEMP[1].xy, TEMP[2].xyyy, CONST[1].xxxx 5: MUL TEMP[2], CONST[2], IN[0].xxxx 6: MAD TEMP[2], CONST[3], IN[0].yyyy, TEMP[2] 7: MAD TEMP[2], CONST[4], IN[0].zzzz, TEMP[2] 8: MAD TEMP[2], CONST[5], IN[0].wwww, TEMP[2] 9: MOV OUT[1], TEMP[0] 10: MOV OUT[2], TEMP[1] 11: MOV OUT[0], TEMP[2] 12: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %32 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %33 = load <16 x i8>, <16 x i8> addrspace(2)* %32, align 16, !tbaa !0 %34 = add i32 %5, %7 %35 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %33, i32 0, i32 %34) %36 = extractelement <4 x float> %35, i32 0 %37 = extractelement <4 x float> %35, i32 1 %38 = extractelement <4 x float> %35, i32 2 %39 = extractelement <4 x float> %35, i32 3 %40 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %41 = load <16 x i8>, <16 x i8> addrspace(2)* %40, align 16, !tbaa !0 %42 = add i32 %5, %7 %43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %41, i32 0, i32 %42) %44 = extractelement <4 x float> %43, i32 0 %45 = extractelement <4 x float> %43, i32 1 %46 = fmul float %14, 0.000000e+00 %47 = fmul float %13, %15 %48 = fmul float %46, %15 %49 = fmul float %16, %36 %50 = fmul float %17, %36 %51 = fmul float %18, %36 %52 = fmul float %19, %36 %53 = fmul float %20, %37 %54 = fadd float %53, %49 %55 = fmul float %21, %37 %56 = fadd float %55, %50 %57 = fmul float %22, %37 %58 = fadd float %57, %51 %59 = fmul float %23, %37 %60 = fadd float %59, %52 %61 = fmul float %24, %38 %62 = fadd float %61, %54 %63 = fmul float %25, %38 %64 = fadd float %63, %56 %65 = fmul float %26, %38 %66 = fadd float %65, %58 %67 = fmul float %27, %38 %68 = fadd float %67, %60 %69 = fmul float %28, %39 %70 = fadd float %69, %62 %71 = fmul float %29, %39 %72 = fadd float %71, %64 %73 = fmul float %30, %39 %74 = fadd float %73, %66 %75 = fmul float %31, %39 %76 = fadd float %75, %68 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %44, float %45, float 1.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %47, float %48, float 1.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %70, float %72, float %74, float %76) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[0:3], 0x0 ; C2060100 buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 buffer_load_format_xyzw v[5:8], v0, s[8:11], 0 idxen ; E00C2000 80020500 s_buffer_load_dword s5, s[0:3], 0x4 ; C2028104 s_buffer_load_dword s6, s[0:3], 0x8 ; C2030108 s_buffer_load_dword s7, s[0:3], 0x9 ; C2038109 s_buffer_load_dword s8, s[0:3], 0xa ; C204010A s_buffer_load_dword s9, s[0:3], 0xb ; C204810B s_buffer_load_dword s10, s[0:3], 0xc ; C205010C s_buffer_load_dword s11, s[0:3], 0xd ; C205810D s_buffer_load_dword s13, s[0:3], 0xe ; C206810E s_buffer_load_dword s14, s[0:3], 0xf ; C207010F s_buffer_load_dword s15, s[0:3], 0x10 ; C2078110 s_buffer_load_dword s16, s[0:3], 0x11 ; C2080111 s_buffer_load_dword s17, s[0:3], 0x12 ; C2088112 s_buffer_load_dword s18, s[0:3], 0x13 ; C2090113 s_buffer_load_dword s19, s[0:3], 0x14 ; C2098114 s_buffer_load_dword s20, s[0:3], 0x15 ; C20A0115 s_buffer_load_dword s21, s[0:3], 0x16 ; C20A8116 s_buffer_load_dword s0, s[0:3], 0x17 ; C2000117 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v0, s6, v1 ; 10000206 v_mac_f32_e32 v0, s10, v2 ; 3E00040A v_mul_f32_e32 v7, s7, v1 ; 100E0207 v_mac_f32_e32 v7, s11, v2 ; 3E0E040B v_mul_f32_e32 v8, s8, v1 ; 10100208 v_mac_f32_e32 v8, s13, v2 ; 3E10040D v_mul_f32_e32 v1, s9, v1 ; 10020209 v_mac_f32_e32 v1, s14, v2 ; 3E02040E v_mac_f32_e32 v0, s15, v3 ; 3E00060F v_mac_f32_e32 v7, s16, v3 ; 3E0E0610 v_mac_f32_e32 v8, s17, v3 ; 3E100611 v_mac_f32_e32 v1, s18, v3 ; 3E020612 v_mac_f32_e32 v0, s19, v4 ; 3E000813 v_mac_f32_e32 v7, s20, v4 ; 3E0E0814 v_mac_f32_e32 v8, s21, v4 ; 3E100815 v_mac_f32_e32 v1, s0, v4 ; 3E020800 v_mov_b32_e32 v2, 1.0 ; 7E0402F2 exp 15, 32, 0, 0, 0, v5, v6, v2, v2 ; F800020F 02020605 v_mul_f32_e64 v3, 0, s4 ; D2100003 00000880 v_mul_f32_e32 v3, s5, v3 ; 10060605 v_mov_b32_e32 v4, s5 ; 7E080205 v_mul_f32_e32 v4, s12, v4 ; 1008080C exp 15, 33, 0, 0, 0, v4, v3, v2, v2 ; F800021F 02020304 exp 15, 12, 0, 1, 0, v0, v7, v8, v1 ; F80008CF 01080700 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 232 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 3.0000, 0.0205, 0.0000, 0.0855} IMM[1] FLT32 { 0.2320, 0.0000, 0.3240, 1.0000} 0: MUL TEMP[0].xy, IN[1].xyyy, IMM[0].xxxx 1: ADD TEMP[0].xy, IN[0].xyyy, -TEMP[0].xyyy 2: MOV TEMP[1].xy, TEMP[0].xyyy 3: TEX TEMP[1], TEMP[1], SAMP[0], 2D 4: MUL TEMP[1], TEMP[1], IMM[0].yyyz 5: ADD TEMP[0].xy, TEMP[0].xyyy, IN[1].xyyy 6: MOV TEMP[2].xy, TEMP[0].xyyy 7: TEX TEMP[2], TEMP[2], SAMP[0], 2D 8: MAD TEMP[1], TEMP[2], IMM[0].wwwz, TEMP[1] 9: ADD TEMP[0].xy, TEMP[0].xyyy, IN[1].xyyy 10: MOV TEMP[2].xy, TEMP[0].xyyy 11: TEX TEMP[2], TEMP[2], SAMP[0], 2D 12: MAD TEMP[1], TEMP[2], IMM[1].xxxy, TEMP[1] 13: ADD TEMP[0].xy, TEMP[0].xyyy, IN[1].xyyy 14: MOV TEMP[2].xy, TEMP[0].xyyy 15: TEX TEMP[2], TEMP[2], SAMP[0], 2D 16: MAD TEMP[1], TEMP[2], IMM[1].zzzw, TEMP[1] 17: ADD TEMP[0].xy, TEMP[0].xyyy, IN[1].xyyy 18: MOV TEMP[2].xy, TEMP[0].xyyy 19: TEX TEMP[2], TEMP[2], SAMP[0], 2D 20: MAD TEMP[1], TEMP[2], IMM[1].xxxy, TEMP[1] 21: ADD TEMP[0].xy, TEMP[0].xyyy, IN[1].xyyy 22: MOV TEMP[2].xy, TEMP[0].xyyy 23: TEX TEMP[2], TEMP[2], SAMP[0], 2D 24: MAD TEMP[1], TEMP[2], IMM[0].wwwz, TEMP[1] 25: ADD TEMP[0].xy, TEMP[0].xyyy, IN[1].xyyy 26: MOV TEMP[0].xy, TEMP[0].xyyy 27: TEX TEMP[0], TEMP[0], SAMP[0], 2D 28: MAD TEMP[1], TEMP[0], IMM[0].yyyz, TEMP[1] 29: MOV OUT[0], TEMP[1] 30: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %23 = load <8 x i32>, <8 x i32> addrspace(2)* %22, align 32, !tbaa !0 %24 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %25 = load <4 x i32>, <4 x i32> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %30 = fmul float %28, 3.000000e+00 %31 = fmul float %29, 3.000000e+00 %32 = fsub float %26, %30 %33 = fsub float %27, %31 %34 = bitcast float %32 to i32 %35 = bitcast float %33 to i32 %36 = insertelement <2 x i32> undef, i32 %34, i32 0 %37 = insertelement <2 x i32> %36, i32 %35, i32 1 %38 = bitcast <8 x i32> %23 to <32 x i8> %39 = bitcast <4 x i32> %25 to <16 x i8> %40 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %37, <32 x i8> %38, <16 x i8> %39, i32 2) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = fmul float %41, 0x3F94FDF3C0000000 %46 = fmul float %42, 0x3F94FDF3C0000000 %47 = fmul float %43, 0x3F94FDF3C0000000 %48 = fmul float %44, 0.000000e+00 %49 = fadd float %32, %28 %50 = fadd float %33, %29 %51 = bitcast float %49 to i32 %52 = bitcast float %50 to i32 %53 = insertelement <2 x i32> undef, i32 %51, i32 0 %54 = insertelement <2 x i32> %53, i32 %52, i32 1 %55 = bitcast <8 x i32> %23 to <32 x i8> %56 = bitcast <4 x i32> %25 to <16 x i8> %57 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %54, <32 x i8> %55, <16 x i8> %56, i32 2) %58 = extractelement <4 x float> %57, i32 0 %59 = extractelement <4 x float> %57, i32 1 %60 = extractelement <4 x float> %57, i32 2 %61 = extractelement <4 x float> %57, i32 3 %62 = fmul float %58, 0x3FB5E35400000000 %63 = fadd float %62, %45 %64 = fmul float %59, 0x3FB5E35400000000 %65 = fadd float %64, %46 %66 = fmul float %60, 0x3FB5E35400000000 %67 = fadd float %66, %47 %68 = fmul float %61, 0.000000e+00 %69 = fadd float %68, %48 %70 = fadd float %49, %28 %71 = fadd float %50, %29 %72 = bitcast float %70 to i32 %73 = bitcast float %71 to i32 %74 = insertelement <2 x i32> undef, i32 %72, i32 0 %75 = insertelement <2 x i32> %74, i32 %73, i32 1 %76 = bitcast <8 x i32> %23 to <32 x i8> %77 = bitcast <4 x i32> %25 to <16 x i8> %78 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %75, <32 x i8> %76, <16 x i8> %77, i32 2) %79 = extractelement <4 x float> %78, i32 0 %80 = extractelement <4 x float> %78, i32 1 %81 = extractelement <4 x float> %78, i32 2 %82 = extractelement <4 x float> %78, i32 3 %83 = fmul float %79, 0x3FCDB22D00000000 %84 = fadd float %83, %63 %85 = fmul float %80, 0x3FCDB22D00000000 %86 = fadd float %85, %65 %87 = fmul float %81, 0x3FCDB22D00000000 %88 = fadd float %87, %67 %89 = fmul float %82, 0.000000e+00 %90 = fadd float %89, %69 %91 = fadd float %70, %28 %92 = fadd float %71, %29 %93 = bitcast float %91 to i32 %94 = bitcast float %92 to i32 %95 = insertelement <2 x i32> undef, i32 %93, i32 0 %96 = insertelement <2 x i32> %95, i32 %94, i32 1 %97 = bitcast <8 x i32> %23 to <32 x i8> %98 = bitcast <4 x i32> %25 to <16 x i8> %99 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %96, <32 x i8> %97, <16 x i8> %98, i32 2) %100 = extractelement <4 x float> %99, i32 0 %101 = extractelement <4 x float> %99, i32 1 %102 = extractelement <4 x float> %99, i32 2 %103 = extractelement <4 x float> %99, i32 3 %104 = fmul float %100, 0x3FD4BC6A80000000 %105 = fadd float %104, %84 %106 = fmul float %101, 0x3FD4BC6A80000000 %107 = fadd float %106, %86 %108 = fmul float %102, 0x3FD4BC6A80000000 %109 = fadd float %108, %88 %110 = fadd float %103, %90 %111 = fadd float %91, %28 %112 = fadd float %92, %29 %113 = bitcast float %111 to i32 %114 = bitcast float %112 to i32 %115 = insertelement <2 x i32> undef, i32 %113, i32 0 %116 = insertelement <2 x i32> %115, i32 %114, i32 1 %117 = bitcast <8 x i32> %23 to <32 x i8> %118 = bitcast <4 x i32> %25 to <16 x i8> %119 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %116, <32 x i8> %117, <16 x i8> %118, i32 2) %120 = extractelement <4 x float> %119, i32 0 %121 = extractelement <4 x float> %119, i32 1 %122 = extractelement <4 x float> %119, i32 2 %123 = extractelement <4 x float> %119, i32 3 %124 = fmul float %120, 0x3FCDB22D00000000 %125 = fadd float %124, %105 %126 = fmul float %121, 0x3FCDB22D00000000 %127 = fadd float %126, %107 %128 = fmul float %122, 0x3FCDB22D00000000 %129 = fadd float %128, %109 %130 = fmul float %123, 0.000000e+00 %131 = fadd float %130, %110 %132 = fadd float %111, %28 %133 = fadd float %112, %29 %134 = bitcast float %132 to i32 %135 = bitcast float %133 to i32 %136 = insertelement <2 x i32> undef, i32 %134, i32 0 %137 = insertelement <2 x i32> %136, i32 %135, i32 1 %138 = bitcast <8 x i32> %23 to <32 x i8> %139 = bitcast <4 x i32> %25 to <16 x i8> %140 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %137, <32 x i8> %138, <16 x i8> %139, i32 2) %141 = extractelement <4 x float> %140, i32 0 %142 = extractelement <4 x float> %140, i32 1 %143 = extractelement <4 x float> %140, i32 2 %144 = extractelement <4 x float> %140, i32 3 %145 = fmul float %141, 0x3FB5E35400000000 %146 = fadd float %145, %125 %147 = fmul float %142, 0x3FB5E35400000000 %148 = fadd float %147, %127 %149 = fmul float %143, 0x3FB5E35400000000 %150 = fadd float %149, %129 %151 = fmul float %144, 0.000000e+00 %152 = fadd float %151, %131 %153 = fadd float %132, %28 %154 = fadd float %133, %29 %155 = bitcast float %153 to i32 %156 = bitcast float %154 to i32 %157 = insertelement <2 x i32> undef, i32 %155, i32 0 %158 = insertelement <2 x i32> %157, i32 %156, i32 1 %159 = bitcast <8 x i32> %23 to <32 x i8> %160 = bitcast <4 x i32> %25 to <16 x i8> %161 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %158, <32 x i8> %159, <16 x i8> %160, i32 2) %162 = extractelement <4 x float> %161, i32 0 %163 = extractelement <4 x float> %161, i32 1 %164 = extractelement <4 x float> %161, i32 2 %165 = extractelement <4 x float> %161, i32 3 %166 = fmul float %162, 0x3F94FDF3C0000000 %167 = fadd float %166, %146 %168 = fmul float %163, 0x3F94FDF3C0000000 %169 = fadd float %168, %148 %170 = fmul float %164, 0x3F94FDF3C0000000 %171 = fadd float %170, %150 %172 = fmul float %165, 0.000000e+00 %173 = fadd float %172, %152 %174 = call i32 @llvm.SI.packf16(float %167, float %169) %175 = bitcast i32 %174 to float %176 = call i32 @llvm.SI.packf16(float %171, float %173) %177 = bitcast i32 %176 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %175, float %177, float %175, float %177) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400 v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401 v_interp_p1_f32 v0, v0, 1, 1, [m0] ; C8000500 v_interp_p2_f32 v0, [v0], v1, 1, 1, [m0] ; C8010501 v_mov_b32_e32 v1, 0x40400000 ; 7E0202FF 40400000 v_mad_f32 v5, -v4, v1, v2 ; D2820005 240A0304 v_mad_f32 v6, -v0, v1, v3 ; D2820006 240E0300 v_mov_b32_e32 v1, 0x3ca7ef9e ; 7E0202FF 3CA7EF9E s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[7:10], 15, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[8:15], s[0:3] ; F0800F00 00020705 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v2, v1, v7 ; 10040F01 v_mul_f32_e32 v3, v1, v8 ; 10061101 v_mul_f32_e32 v7, v1, v9 ; 100E1301 v_mul_f32_e32 v8, 0, v10 ; 10101480 v_add_f32_e32 v9, v4, v5 ; 06120B04 v_add_f32_e32 v10, v0, v6 ; 06140D00 v_mov_b32_e32 v5, 0x3daf1aa0 ; 7E0A02FF 3DAF1AA0 image_sample v[11:14], 15, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[8:15], s[0:3] ; F0800F00 00020B09 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v2, v5, v11 ; 3E041705 v_mac_f32_e32 v3, v5, v12 ; 3E061905 v_mac_f32_e32 v7, v5, v13 ; 3E0E1B05 v_mac_f32_e32 v8, 0, v14 ; 3E101C80 v_add_f32_e32 v11, v4, v9 ; 06161304 v_add_f32_e32 v12, v0, v10 ; 06181500 v_mov_b32_e32 v6, 0x3e6d9168 ; 7E0C02FF 3E6D9168 image_sample v[13:16], 15, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[8:15], s[0:3] ; F0800F00 00020D0B s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v2, v6, v13 ; 3E041B06 v_mac_f32_e32 v3, v6, v14 ; 3E061D06 v_mac_f32_e32 v7, v6, v15 ; 3E0E1F06 v_mac_f32_e32 v8, 0, v16 ; 3E102080 v_add_f32_e32 v9, v4, v11 ; 06121704 v_add_f32_e32 v10, v0, v12 ; 06141900 v_mov_b32_e32 v11, 0x3ea5e354 ; 7E1602FF 3EA5E354 image_sample v[12:15], 15, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[8:15], s[0:3] ; F0800F00 00020C09 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v2, v11, v12 ; 3E04190B v_mac_f32_e32 v3, v11, v13 ; 3E061B0B v_mac_f32_e32 v7, v11, v14 ; 3E0E1D0B v_add_f32_e32 v8, v8, v15 ; 06101F08 v_add_f32_e32 v11, v4, v9 ; 06161304 v_add_f32_e32 v12, v0, v10 ; 06181500 v_add_f32_e32 v9, v4, v11 ; 06121704 v_add_f32_e32 v10, v0, v12 ; 06141900 v_add_f32_e32 v13, v4, v9 ; 061A1304 v_add_f32_e32 v14, v0, v10 ; 061C1500 image_sample v[15:18], 15, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[8:15], s[0:3] ; F0800F00 00020F0B image_sample v[9:12], 15, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[8:15], s[0:3] ; F0800F00 00020909 image_sample v[19:22], 15, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[8:15], s[0:3] ; F0800F00 0002130D s_waitcnt vmcnt(2) ; BF8C0772 v_mac_f32_e32 v2, v6, v15 ; 3E041F06 v_mac_f32_e32 v3, v6, v16 ; 3E062106 v_mac_f32_e32 v7, v6, v17 ; 3E0E2306 v_mac_f32_e32 v8, 0, v18 ; 3E102480 s_waitcnt vmcnt(1) ; BF8C0771 v_mac_f32_e32 v2, v5, v9 ; 3E041305 v_mac_f32_e32 v3, v5, v10 ; 3E061505 v_mac_f32_e32 v7, v5, v11 ; 3E0E1705 v_mac_f32_e32 v8, 0, v12 ; 3E101880 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v2, v1, v19 ; 3E042701 v_mac_f32_e32 v3, v1, v20 ; 3E062901 v_mac_f32_e32 v7, v1, v21 ; 3E0E2B01 v_mac_f32_e32 v8, 0, v22 ; 3E102C80 v_cvt_pkrtz_f16_f32_e32 v0, v2, v3 ; 5E000702 v_cvt_pkrtz_f16_f32_e32 v1, v7, v8 ; 5E021107 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 24 Code Size: 372 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL CONST[0..3] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].xxxx 1: MOV TEMP[0].xy, IN[1].xyxx 2: MUL TEMP[1], CONST[0], IN[0].xxxx 3: MAD TEMP[1], CONST[1], IN[0].yyyy, TEMP[1] 4: MAD TEMP[1], CONST[2], IN[0].zzzz, TEMP[1] 5: MAD TEMP[1], CONST[3], IN[0].wwww, TEMP[1] 6: MOV OUT[1], TEMP[0] 7: MOV OUT[0], TEMP[1] 8: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = fmul float %13, %33 %44 = fmul float %14, %33 %45 = fmul float %15, %33 %46 = fmul float %16, %33 %47 = fmul float %17, %34 %48 = fadd float %47, %43 %49 = fmul float %18, %34 %50 = fadd float %49, %44 %51 = fmul float %19, %34 %52 = fadd float %51, %45 %53 = fmul float %20, %34 %54 = fadd float %53, %46 %55 = fmul float %21, %35 %56 = fadd float %55, %48 %57 = fmul float %22, %35 %58 = fadd float %57, %50 %59 = fmul float %23, %35 %60 = fadd float %59, %52 %61 = fmul float %24, %35 %62 = fadd float %61, %54 %63 = fmul float %25, %36 %64 = fadd float %63, %56 %65 = fmul float %26, %36 %66 = fadd float %65, %58 %67 = fmul float %27, %36 %68 = fadd float %67, %60 %69 = fmul float %28, %36 %70 = fadd float %69, %62 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float 1.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %64, float %66, float %68, float %70) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_mov_b32_e32 v1, 1.0 ; 7E0202F2 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[0:3], 0x0 ; C2060100 s_buffer_load_dword s13, s[0:3], 0x1 ; C2068101 buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[8:11], 0 idxen ; E00C2000 80020600 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_buffer_load_dword s5, s[0:3], 0x3 ; C2028103 s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104 s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105 s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106 s_buffer_load_dword s9, s[0:3], 0x7 ; C2048107 s_buffer_load_dword s10, s[0:3], 0x8 ; C2050108 s_buffer_load_dword s11, s[0:3], 0x9 ; C2058109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 v_mul_f32_e32 v0, s12, v2 ; 1000040C s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v0, s6, v3 ; 3E000606 v_mul_f32_e32 v8, s13, v2 ; 1010040D v_mac_f32_e32 v8, s7, v3 ; 3E100607 v_mul_f32_e32 v9, s4, v2 ; 10120404 v_mac_f32_e32 v9, s8, v3 ; 3E120608 v_mul_f32_e32 v2, s5, v2 ; 10040405 v_mac_f32_e32 v2, s9, v3 ; 3E040609 v_mac_f32_e32 v0, s10, v4 ; 3E00080A v_mac_f32_e32 v8, s11, v4 ; 3E10080B v_mac_f32_e32 v9, s14, v4 ; 3E12080E v_mac_f32_e32 v2, s15, v4 ; 3E04080F v_mac_f32_e32 v0, s16, v5 ; 3E000A10 v_mac_f32_e32 v8, s17, v5 ; 3E100A11 v_mac_f32_e32 v9, s18, v5 ; 3E120A12 v_mac_f32_e32 v2, s0, v5 ; 3E040A00 exp 15, 32, 0, 0, 0, v6, v7, v1, v1 ; F800020F 01010706 exp 15, 12, 0, 1, 0, v0, v8, v9, v2 ; F80008CF 02090800 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 196 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL SAMP[7] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL SVIEW[4], 2D, FLOAT DCL SVIEW[5], 2D, FLOAT DCL SVIEW[6], 2D, FLOAT DCL SVIEW[7], 2D, FLOAT DCL CONST[8..9] DCL TEMP[0..8], LOCAL IMM[0] FLT32 { 0.2300, 0.3500, 0.4500, 0.6000} IMM[1] FLT32 { 0.5000, 0.4545, 0.8000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MOV TEMP[1].w, TEMP[0].wwww 3: MOV TEMP[2].xy, IN[0].xyyy 4: TEX TEMP[2].xyz, TEMP[2], SAMP[1], 2D 5: MOV TEMP[3].xy, IN[0].xyyy 6: TEX TEMP[3].xyz, TEMP[3], SAMP[2], 2D 7: MOV TEMP[4].xy, IN[0].xyyy 8: TEX TEMP[4].xyz, TEMP[4], SAMP[3], 2D 9: MOV TEMP[5].xy, IN[0].xyyy 10: TEX TEMP[5].xyz, TEMP[5], SAMP[4], 2D 11: MOV TEMP[6].xy, IN[0].xyyy 12: TEX TEMP[6].xyz, TEMP[6], SAMP[5], 2D 13: MOV TEMP[7].xy, IN[0].xyyy 14: TEX TEMP[7].xyz, TEMP[7], SAMP[6], 2D 15: MUL TEMP[8].xyz, TEMP[2].xyzz, IMM[1].xxxx 16: MAD TEMP[8].xyz, IMM[0].wwww, TEMP[3].xyzz, TEMP[8].xyzz 17: MAD TEMP[8].xyz, TEMP[4].xyzz, IMM[0].wwww, TEMP[8].xyzz 18: MAD TEMP[8].xyz, TEMP[5].xyzz, IMM[0].zzzz, TEMP[8].xyzz 19: MAD TEMP[8].xyz, TEMP[6].xyzz, IMM[0].yyyy, TEMP[8].xyzz 20: MAD TEMP[8].xyz, TEMP[7].xyzz, IMM[0].xxxx, TEMP[8].xyzz 21: MUL TEMP[8].xyz, TEMP[8].xyzz, IMM[1].yyyy 22: LRP TEMP[1].xyz, CONST[8].xxxx, TEMP[8].xyzz, TEMP[0].xyzz 23: MAD TEMP[0].xyz, TEMP[3].xyzz, IMM[1].zzzz, TEMP[2].xyzz 24: MAD TEMP[0].xyz, TEMP[4].xyzz, IMM[0].wwww, TEMP[0].xyzz 25: MAD TEMP[0].xyz, TEMP[5].xyzz, IMM[0].zzzz, TEMP[0].xyzz 26: MAD TEMP[0].xyz, TEMP[6].xyzz, IMM[0].yyyy, TEMP[0].xyzz 27: MAD TEMP[0].xyz, TEMP[7].xyzz, IMM[0].xxxx, TEMP[0].xyzz 28: MOV TEMP[2].xy, IN[0].xyyy 29: TEX TEMP[2].xyz, TEMP[2], SAMP[7], 2D 30: MUL TEMP[2].xyz, TEMP[2].xyzz, CONST[9].xxxx 31: MOV_SAT TEMP[2].xyz, TEMP[2].xyzz 32: LRP TEMP[1].xyz, TEMP[2].xyzz, TEMP[0].xyzz, TEMP[1].xyzz 33: MOV OUT[0], TEMP[1] 34: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %26 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %27 = load <32 x i8>, <32 x i8> addrspace(2)* %26, align 32, !tbaa !0 %28 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %29 = load <16 x i8>, <16 x i8> addrspace(2)* %28, align 16, !tbaa !0 %30 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %31 = bitcast <8 x i32> addrspace(2)* %30 to <32 x i8> addrspace(2)* %32 = load <32 x i8>, <32 x i8> addrspace(2)* %31, align 32, !tbaa !0 %33 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %34 = bitcast <4 x i32> addrspace(2)* %33 to <16 x i8> addrspace(2)* %35 = load <16 x i8>, <16 x i8> addrspace(2)* %34, align 16, !tbaa !0 %36 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %37 = bitcast <8 x i32> addrspace(2)* %36 to <32 x i8> addrspace(2)* %38 = load <32 x i8>, <32 x i8> addrspace(2)* %37, align 32, !tbaa !0 %39 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %40 = bitcast <4 x i32> addrspace(2)* %39 to <16 x i8> addrspace(2)* %41 = load <16 x i8>, <16 x i8> addrspace(2)* %40, align 16, !tbaa !0 %42 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %43 = bitcast <8 x i32> addrspace(2)* %42 to <32 x i8> addrspace(2)* %44 = load <32 x i8>, <32 x i8> addrspace(2)* %43, align 32, !tbaa !0 %45 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %46 = bitcast <4 x i32> addrspace(2)* %45 to <16 x i8> addrspace(2)* %47 = load <16 x i8>, <16 x i8> addrspace(2)* %46, align 16, !tbaa !0 %48 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %49 = bitcast <8 x i32> addrspace(2)* %48 to <32 x i8> addrspace(2)* %50 = load <32 x i8>, <32 x i8> addrspace(2)* %49, align 32, !tbaa !0 %51 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %52 = bitcast <4 x i32> addrspace(2)* %51 to <16 x i8> addrspace(2)* %53 = load <16 x i8>, <16 x i8> addrspace(2)* %52, align 16, !tbaa !0 %54 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5 %55 = bitcast <8 x i32> addrspace(2)* %54 to <32 x i8> addrspace(2)* %56 = load <32 x i8>, <32 x i8> addrspace(2)* %55, align 32, !tbaa !0 %57 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5 %58 = bitcast <4 x i32> addrspace(2)* %57 to <16 x i8> addrspace(2)* %59 = load <16 x i8>, <16 x i8> addrspace(2)* %58, align 16, !tbaa !0 %60 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 6 %61 = bitcast <8 x i32> addrspace(2)* %60 to <32 x i8> addrspace(2)* %62 = load <32 x i8>, <32 x i8> addrspace(2)* %61, align 32, !tbaa !0 %63 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 6 %64 = bitcast <4 x i32> addrspace(2)* %63 to <16 x i8> addrspace(2)* %65 = load <16 x i8>, <16 x i8> addrspace(2)* %64, align 16, !tbaa !0 %66 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 7 %67 = bitcast <8 x i32> addrspace(2)* %66 to <32 x i8> addrspace(2)* %68 = load <32 x i8>, <32 x i8> addrspace(2)* %67, align 32, !tbaa !0 %69 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 7 %70 = bitcast <4 x i32> addrspace(2)* %69 to <16 x i8> addrspace(2)* %71 = load <16 x i8>, <16 x i8> addrspace(2)* %70, align 16, !tbaa !0 %72 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %73 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %74 = bitcast float %72 to i32 %75 = bitcast float %73 to i32 %76 = insertelement <2 x i32> undef, i32 %74, i32 0 %77 = insertelement <2 x i32> %76, i32 %75, i32 1 %78 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %77, <32 x i8> %27, <16 x i8> %29, i32 2) %79 = extractelement <4 x float> %78, i32 0 %80 = extractelement <4 x float> %78, i32 1 %81 = extractelement <4 x float> %78, i32 2 %82 = extractelement <4 x float> %78, i32 3 %83 = bitcast float %72 to i32 %84 = bitcast float %73 to i32 %85 = insertelement <2 x i32> undef, i32 %83, i32 0 %86 = insertelement <2 x i32> %85, i32 %84, i32 1 %87 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %86, <32 x i8> %32, <16 x i8> %35, i32 2) %88 = extractelement <4 x float> %87, i32 0 %89 = extractelement <4 x float> %87, i32 1 %90 = extractelement <4 x float> %87, i32 2 %91 = bitcast float %72 to i32 %92 = bitcast float %73 to i32 %93 = insertelement <2 x i32> undef, i32 %91, i32 0 %94 = insertelement <2 x i32> %93, i32 %92, i32 1 %95 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %94, <32 x i8> %38, <16 x i8> %41, i32 2) %96 = extractelement <4 x float> %95, i32 0 %97 = extractelement <4 x float> %95, i32 1 %98 = extractelement <4 x float> %95, i32 2 %99 = bitcast float %72 to i32 %100 = bitcast float %73 to i32 %101 = insertelement <2 x i32> undef, i32 %99, i32 0 %102 = insertelement <2 x i32> %101, i32 %100, i32 1 %103 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %102, <32 x i8> %44, <16 x i8> %47, i32 2) %104 = extractelement <4 x float> %103, i32 0 %105 = extractelement <4 x float> %103, i32 1 %106 = extractelement <4 x float> %103, i32 2 %107 = bitcast float %72 to i32 %108 = bitcast float %73 to i32 %109 = insertelement <2 x i32> undef, i32 %107, i32 0 %110 = insertelement <2 x i32> %109, i32 %108, i32 1 %111 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %110, <32 x i8> %50, <16 x i8> %53, i32 2) %112 = extractelement <4 x float> %111, i32 0 %113 = extractelement <4 x float> %111, i32 1 %114 = extractelement <4 x float> %111, i32 2 %115 = bitcast float %72 to i32 %116 = bitcast float %73 to i32 %117 = insertelement <2 x i32> undef, i32 %115, i32 0 %118 = insertelement <2 x i32> %117, i32 %116, i32 1 %119 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %118, <32 x i8> %56, <16 x i8> %59, i32 2) %120 = extractelement <4 x float> %119, i32 0 %121 = extractelement <4 x float> %119, i32 1 %122 = extractelement <4 x float> %119, i32 2 %123 = bitcast float %72 to i32 %124 = bitcast float %73 to i32 %125 = insertelement <2 x i32> undef, i32 %123, i32 0 %126 = insertelement <2 x i32> %125, i32 %124, i32 1 %127 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %126, <32 x i8> %62, <16 x i8> %65, i32 2) %128 = extractelement <4 x float> %127, i32 0 %129 = extractelement <4 x float> %127, i32 1 %130 = extractelement <4 x float> %127, i32 2 %131 = fmul float %88, 5.000000e-01 %132 = fmul float %89, 5.000000e-01 %133 = fmul float %90, 5.000000e-01 %134 = fmul float %96, 0x3FE3333340000000 %135 = fadd float %134, %131 %136 = fmul float %97, 0x3FE3333340000000 %137 = fadd float %136, %132 %138 = fmul float %98, 0x3FE3333340000000 %139 = fadd float %138, %133 %140 = fmul float %104, 0x3FE3333340000000 %141 = fadd float %140, %135 %142 = fmul float %105, 0x3FE3333340000000 %143 = fadd float %142, %137 %144 = fmul float %106, 0x3FE3333340000000 %145 = fadd float %144, %139 %146 = fmul float %112, 0x3FDCCCCCC0000000 %147 = fadd float %146, %141 %148 = fmul float %113, 0x3FDCCCCCC0000000 %149 = fadd float %148, %143 %150 = fmul float %114, 0x3FDCCCCCC0000000 %151 = fadd float %150, %145 %152 = fmul float %120, 0x3FD6666660000000 %153 = fadd float %152, %147 %154 = fmul float %121, 0x3FD6666660000000 %155 = fadd float %154, %149 %156 = fmul float %122, 0x3FD6666660000000 %157 = fadd float %156, %151 %158 = fmul float %128, 0x3FCD70A3E0000000 %159 = fadd float %158, %153 %160 = fmul float %129, 0x3FCD70A3E0000000 %161 = fadd float %160, %155 %162 = fmul float %130, 0x3FCD70A3E0000000 %163 = fadd float %162, %157 %164 = fmul float %159, 0x3FDD1745C0000000 %165 = fmul float %161, 0x3FDD1745C0000000 %166 = fmul float %163, 0x3FDD1745C0000000 %167 = call float @llvm.AMDGPU.lrp(float %24, float %164, float %79) %168 = call float @llvm.AMDGPU.lrp(float %24, float %165, float %80) %169 = call float @llvm.AMDGPU.lrp(float %24, float %166, float %81) %170 = fmul float %96, 0x3FE99999A0000000 %171 = fadd float %170, %88 %172 = fmul float %97, 0x3FE99999A0000000 %173 = fadd float %172, %89 %174 = fmul float %98, 0x3FE99999A0000000 %175 = fadd float %174, %90 %176 = fmul float %104, 0x3FE3333340000000 %177 = fadd float %176, %171 %178 = fmul float %105, 0x3FE3333340000000 %179 = fadd float %178, %173 %180 = fmul float %106, 0x3FE3333340000000 %181 = fadd float %180, %175 %182 = fmul float %112, 0x3FDCCCCCC0000000 %183 = fadd float %182, %177 %184 = fmul float %113, 0x3FDCCCCCC0000000 %185 = fadd float %184, %179 %186 = fmul float %114, 0x3FDCCCCCC0000000 %187 = fadd float %186, %181 %188 = fmul float %120, 0x3FD6666660000000 %189 = fadd float %188, %183 %190 = fmul float %121, 0x3FD6666660000000 %191 = fadd float %190, %185 %192 = fmul float %122, 0x3FD6666660000000 %193 = fadd float %192, %187 %194 = fmul float %128, 0x3FCD70A3E0000000 %195 = fadd float %194, %189 %196 = fmul float %129, 0x3FCD70A3E0000000 %197 = fadd float %196, %191 %198 = fmul float %130, 0x3FCD70A3E0000000 %199 = fadd float %198, %193 %200 = bitcast float %72 to i32 %201 = bitcast float %73 to i32 %202 = insertelement <2 x i32> undef, i32 %200, i32 0 %203 = insertelement <2 x i32> %202, i32 %201, i32 1 %204 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %203, <32 x i8> %68, <16 x i8> %71, i32 2) %205 = extractelement <4 x float> %204, i32 0 %206 = extractelement <4 x float> %204, i32 1 %207 = extractelement <4 x float> %204, i32 2 %208 = fmul float %205, %25 %209 = fmul float %206, %25 %210 = fmul float %207, %25 %211 = call float @llvm.AMDIL.clamp.(float %208, float 0.000000e+00, float 1.000000e+00) %212 = call float @llvm.AMDIL.clamp.(float %209, float 0.000000e+00, float 1.000000e+00) %213 = call float @llvm.AMDIL.clamp.(float %210, float 0.000000e+00, float 1.000000e+00) %214 = call float @llvm.AMDGPU.lrp(float %211, float %195, float %167) %215 = call float @llvm.AMDGPU.lrp(float %212, float %197, float %168) %216 = call float @llvm.AMDGPU.lrp(float %213, float %199, float %169) %217 = call i32 @llvm.SI.packf16(float %214, float %215) %218 = bitcast i32 %217 to float %219 = call i32 @llvm.SI.packf16(float %216, float %82) %220 = bitcast i32 %219 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %218, float %220, float %218, float %220) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_mov_b32_e32 v2, 0x3f19999a ; 7E0402FF 3F19999A v_mov_b32_e32 v3, 0x3ee66666 ; 7E0602FF 3EE66666 v_mov_b32_e32 v4, 0x3eb33333 ; 7E0802FF 3EB33333 v_mov_b32_e32 v5, 0x3e6b851f ; 7E0A02FF 3E6B851F v_mov_b32_e32 v6, 0x3ee8ba2e ; 7E0C02FF 3EE8BA2E v_mov_b32_e32 v7, 0x3f4ccccd ; 7E0E02FF 3F4CCCCD v_interp_p1_f32 v8, v0, 0, 0, [m0] ; C8200000 v_interp_p2_f32 v8, [v8], v1, 0, 0, [m0] ; C8210001 v_interp_p1_f32 v9, v0, 1, 0, [m0] ; C8240100 v_interp_p2_f32 v9, [v9], v1, 1, 0, [m0] ; C8250101 s_load_dwordx4 s[12:15], s[2:3], 0x0 ; C0860300 s_load_dwordx4 s[28:31], s[4:5], 0x0 ; C08E0500 s_load_dwordx4 s[32:35], s[4:5], 0x4 ; C0900504 s_load_dwordx4 s[36:39], s[4:5], 0x8 ; C0920508 s_load_dwordx4 s[40:43], s[4:5], 0xc ; C094050C s_load_dwordx4 s[44:47], s[4:5], 0x10 ; C0960510 s_load_dwordx4 s[48:51], s[4:5], 0x14 ; C0980514 s_load_dwordx4 s[0:3], s[4:5], 0x18 ; C0800518 s_load_dwordx4 s[8:11], s[4:5], 0x1c ; C084051C s_load_dwordx8 s[52:59], s[6:7], 0x0 ; C0DA0700 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[12:15], 0x20 ; C2020D20 s_buffer_load_dword s5, s[12:15], 0x24 ; C2028D24 s_load_dwordx8 s[60:67], s[6:7], 0x8 ; C0DE0708 s_load_dwordx8 s[68:75], s[6:7], 0x10 ; C0E20710 s_load_dwordx8 s[76:83], s[6:7], 0x18 ; C0E60718 s_load_dwordx8 s[84:91], s[6:7], 0x20 ; C0EA0720 s_load_dwordx8 s[92:99], s[6:7], 0x28 ; C0EE0728 s_load_dwordx8 s[12:19], s[6:7], 0x30 ; C0C60730 s_load_dwordx8 s[20:27], s[6:7], 0x38 ; C0CA0738 image_sample v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[52:59], s[28:31] ; F0800F00 00ED0A08 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e64 v0, 1.0, s4 ; D2080000 000008F2 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v1, v10, v0 ; 1002010A image_sample v[14:16], 7, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[60:67], s[32:35] ; F0800700 010F0E08 image_sample v[17:19], 7, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[68:75], s[36:39] ; F0800700 01311108 image_sample v[20:22], 7, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[76:83], s[40:43] ; F0800700 01531408 image_sample v[23:25], 7, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[84:91], s[44:47] ; F0800700 01751708 s_waitcnt vmcnt(3) ; BF8C0773 v_mul_f32_e32 v10, 0.5, v14 ; 10141CF0 s_waitcnt vmcnt(2) ; BF8C0772 v_mac_f32_e32 v10, v2, v17 ; 3E142302 s_waitcnt vmcnt(1) ; BF8C0771 v_mac_f32_e32 v10, v2, v20 ; 3E142902 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v10, v3, v23 ; 3E142F03 v_mul_f32_e32 v26, 0.5, v15 ; 10341EF0 v_mac_f32_e32 v26, v2, v18 ; 3E342502 v_mac_f32_e32 v26, v2, v21 ; 3E342B02 v_mac_f32_e32 v26, v3, v24 ; 3E343103 v_mul_f32_e32 v27, 0.5, v16 ; 103620F0 v_mac_f32_e32 v27, v2, v19 ; 3E362702 v_mac_f32_e32 v27, v2, v22 ; 3E362D02 v_mac_f32_e32 v27, v3, v25 ; 3E363303 v_mad_f32 v14, v7, v17, v14 ; D282000E 043A2307 v_mac_f32_e32 v14, v2, v20 ; 3E1C2902 v_mac_f32_e32 v14, v3, v23 ; 3E1C2F03 v_mad_f32 v15, v7, v18, v15 ; D282000F 043E2507 v_mac_f32_e32 v16, v7, v19 ; 3E202707 v_mac_f32_e32 v15, v2, v21 ; 3E1E2B02 v_mac_f32_e32 v16, v2, v22 ; 3E202D02 v_mac_f32_e32 v15, v3, v24 ; 3E1E3103 v_mac_f32_e32 v16, v3, v25 ; 3E203303 image_sample v[17:19], 7, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[92:99], s[48:51] ; F0800700 01971108 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v10, v4, v17 ; 3E142304 v_mac_f32_e32 v26, v4, v18 ; 3E342504 v_mac_f32_e32 v27, v4, v19 ; 3E362704 v_mac_f32_e32 v14, v4, v17 ; 3E1C2304 v_mac_f32_e32 v15, v4, v18 ; 3E1E2504 v_mac_f32_e32 v16, v4, v19 ; 3E202704 image_sample v[2:4], 7, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[12:19], s[0:3] ; F0800700 00030208 image_sample v[7:9], 7, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[20:27], s[8:11] ; F0800700 00450708 s_waitcnt vmcnt(1) ; BF8C0771 v_mac_f32_e32 v10, v5, v2 ; 3E140505 v_mac_f32_e32 v26, v5, v3 ; 3E340705 v_mac_f32_e32 v27, v5, v4 ; 3E360905 v_mul_f32_e32 v10, v6, v10 ; 10141506 v_mac_f32_e32 v1, s4, v10 ; 3E021404 v_mul_f32_e32 v10, v6, v26 ; 10143506 v_mul_f32_e32 v6, v6, v27 ; 100C3706 v_mac_f32_e32 v14, v5, v2 ; 3E1C0505 v_mac_f32_e32 v15, v5, v3 ; 3E1E0705 v_mac_f32_e32 v16, v5, v4 ; 3E200905 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v2, s5, v7 ; 10040E05 v_mul_f32_e32 v3, s5, v8 ; 10061005 v_mul_f32_e32 v4, s5, v9 ; 10081205 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_sub_f32_e32 v5, 1.0, v2 ; 080A04F2 v_mul_f32_e32 v1, v1, v5 ; 10020B01 v_mac_f32_e32 v1, v14, v2 ; 3E02050E v_mul_f32_e32 v2, v11, v0 ; 1004010B v_mac_f32_e32 v2, s4, v10 ; 3E041404 v_mul_f32_e32 v0, v12, v0 ; 1000010C v_mac_f32_e32 v0, s4, v6 ; 3E000C04 v_add_f32_e64 v3, 0, v3 clamp ; D2060803 00020680 v_sub_f32_e32 v5, 1.0, v3 ; 080A06F2 v_mul_f32_e32 v2, v2, v5 ; 10040B02 v_add_f32_e64 v4, 0, v4 clamp ; D2060804 00020880 v_mac_f32_e32 v2, v15, v3 ; 3E04070F v_sub_f32_e32 v3, 1.0, v4 ; 080608F2 v_mul_f32_e32 v0, v0, v3 ; 10000700 v_mac_f32_e32 v0, v16, v4 ; 3E000910 v_cvt_pkrtz_f16_f32_e32 v0, v0, v13 ; 5E001B00 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 104 VGPRS: 28 Code Size: 528 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL CONST[0..3] DCL TEMP[0..1], LOCAL 0: MUL TEMP[0], CONST[0], IN[0].xxxx 1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0] 4: MOV TEMP[1].xy, IN[1].xyxx 5: MOV OUT[1], TEMP[1] 6: MOV OUT[0], TEMP[0] 7: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = fmul float %13, %33 %44 = fmul float %14, %33 %45 = fmul float %15, %33 %46 = fmul float %16, %33 %47 = fmul float %17, %34 %48 = fadd float %47, %43 %49 = fmul float %18, %34 %50 = fadd float %49, %44 %51 = fmul float %19, %34 %52 = fadd float %51, %45 %53 = fmul float %20, %34 %54 = fadd float %53, %46 %55 = fmul float %21, %35 %56 = fadd float %55, %48 %57 = fmul float %22, %35 %58 = fadd float %57, %50 %59 = fmul float %23, %35 %60 = fadd float %59, %52 %61 = fmul float %24, %35 %62 = fadd float %61, %54 %63 = fmul float %25, %36 %64 = fadd float %63, %56 %65 = fmul float %26, %36 %66 = fadd float %65, %58 %67 = fmul float %27, %36 %68 = fadd float %67, %60 %69 = fmul float %28, %36 %70 = fadd float %69, %62 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %64, float %66, float %68, float %70) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_mov_b32_e32 v1, 0 ; 7E020280 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[0:3], 0x0 ; C2060100 s_buffer_load_dword s13, s[0:3], 0x1 ; C2068101 buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[8:11], 0 idxen ; E00C2000 80020600 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_buffer_load_dword s5, s[0:3], 0x3 ; C2028103 s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104 s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105 s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106 s_buffer_load_dword s9, s[0:3], 0x7 ; C2048107 s_buffer_load_dword s10, s[0:3], 0x8 ; C2050108 s_buffer_load_dword s11, s[0:3], 0x9 ; C2058109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 v_mul_f32_e32 v0, s12, v2 ; 1000040C s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v0, s6, v3 ; 3E000606 v_mul_f32_e32 v8, s13, v2 ; 1010040D v_mac_f32_e32 v8, s7, v3 ; 3E100607 v_mul_f32_e32 v9, s4, v2 ; 10120404 v_mac_f32_e32 v9, s8, v3 ; 3E120608 v_mul_f32_e32 v2, s5, v2 ; 10040405 v_mac_f32_e32 v2, s9, v3 ; 3E040609 v_mac_f32_e32 v0, s10, v4 ; 3E00080A v_mac_f32_e32 v8, s11, v4 ; 3E10080B v_mac_f32_e32 v9, s14, v4 ; 3E12080E v_mac_f32_e32 v2, s15, v4 ; 3E04080F v_mac_f32_e32 v0, s16, v5 ; 3E000A10 v_mac_f32_e32 v8, s17, v5 ; 3E100A11 v_mac_f32_e32 v9, s18, v5 ; 3E120A12 v_mac_f32_e32 v2, s0, v5 ; 3E040A00 exp 15, 32, 0, 0, 0, v6, v7, v1, v1 ; F800020F 01010706 exp 15, 12, 0, 1, 0, v0, v8, v9, v2 ; F80008CF 02090800 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 196 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1] DCL TEMP[0..37], LOCAL IMM[0] FLT32 { 0.0000, -1.0000, 1.0000, 1.9632} IMM[1] FLT32 { 0.0417, 0.1250, 0.7500, 0.2500} IMM[2] FLT32 { -0.2500, 1.3333, 0.1111, -0.5000} IMM[3] FLT32 { 0.5000, -1.5000, 1.5000, 2.0000} IMM[4] UINT32 {0, 4294967295, 0, 0} IMM[5] INT32 {0, 8, 1, 0} 0: MOV TEMP[0].xy, CONST[1].xyxx 1: MOV TEMP[1].zw, IMM[0].xxxx 2: MAD TEMP[1].xy, IMM[0].xyyy, CONST[1].xyyy, IN[0].xyyy 3: MOV TEMP[1].xy, TEMP[1].xyyy 4: MOV TEMP[1].w, IMM[0].xxxx 5: TXL TEMP[1], TEMP[1], SAMP[0], 2D 6: MOV TEMP[2], TEMP[1] 7: MOV TEMP[0].zw, IMM[0].xxxx 8: MAD TEMP[0].xy, IMM[0].yxxx, CONST[1].xyyy, IN[0].xyyy 9: MOV TEMP[0].xy, TEMP[0].xyyy 10: MOV TEMP[0].w, IMM[0].xxxx 11: TXL TEMP[0], TEMP[0], SAMP[0], 2D 12: MOV TEMP[3], TEMP[0] 13: MOV TEMP[4].xy, IN[0].xyyy 14: MOV TEMP[4].w, IMM[0].xxxx 15: TXL TEMP[4], TEMP[4], SAMP[0], 2D 16: MOV TEMP[5], TEMP[4] 17: MOV TEMP[6].zw, IMM[0].xxxx 18: MAD TEMP[6].xy, IMM[0].zxxx, CONST[1].xyyy, IN[0].xyyy 19: MOV TEMP[6].xy, TEMP[6].xyyy 20: MOV TEMP[6].w, IMM[0].xxxx 21: TXL TEMP[6], TEMP[6], SAMP[0], 2D 22: MOV TEMP[7], TEMP[6] 23: MOV TEMP[8].zw, IMM[0].xxxx 24: MAD TEMP[8].xy, IMM[0].xzzz, CONST[1].xyyy, IN[0].xyyy 25: MOV TEMP[8].xy, TEMP[8].xyyy 26: MOV TEMP[8].w, IMM[0].xxxx 27: TXL TEMP[8], TEMP[8], SAMP[0], 2D 28: MOV TEMP[9], TEMP[8] 29: MAD TEMP[10].x, TEMP[1].yyyy, IMM[0].wwww, TEMP[1].xxxx 30: MOV TEMP[2].x, TEMP[10].xxxx 31: MAD TEMP[11].x, TEMP[0].yyyy, IMM[0].wwww, TEMP[0].xxxx 32: MAD TEMP[12].x, TEMP[4].yyyy, IMM[0].wwww, TEMP[4].xxxx 33: MAD TEMP[13].x, TEMP[6].yyyy, IMM[0].wwww, TEMP[6].xxxx 34: MAD TEMP[14].x, TEMP[8].yyyy, IMM[0].wwww, TEMP[8].xxxx 35: MOV TEMP[3].x, TEMP[14].xxxx 36: MAX TEMP[15].x, TEMP[12].xxxx, TEMP[10].xxxx 37: MAX TEMP[16].x, TEMP[11].xxxx, TEMP[14].xxxx 38: MAX TEMP[16].x, TEMP[16].xxxx, TEMP[13].xxxx 39: MAX TEMP[15].x, TEMP[15].xxxx, TEMP[16].xxxx 40: MIN TEMP[16].x, TEMP[12].xxxx, TEMP[10].xxxx 41: MIN TEMP[5].x, TEMP[11].xxxx, TEMP[14].xxxx 42: MIN TEMP[5].x, TEMP[5].xxxx, TEMP[13].xxxx 43: MIN TEMP[16].x, TEMP[16].xxxx, TEMP[5].xxxx 44: ADD TEMP[16].x, TEMP[15].xxxx, -TEMP[16].xxxx 45: MUL TEMP[15].x, TEMP[15].xxxx, IMM[1].yyyy 46: MAX TEMP[15].x, IMM[1].xxxx, TEMP[15].xxxx 47: FSLT TEMP[15].x, TEMP[16].xxxx, TEMP[15].xxxx 48: UIF TEMP[15].xxxx :0 49: MOV TEMP[15].xyz, TEMP[4].xyzx 50: ELSE :0 51: ADD TEMP[5].x, TEMP[10].xxxx, TEMP[11].xxxx 52: ADD TEMP[17].x, TEMP[13].xxxx, TEMP[14].xxxx 53: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[17].xxxx 54: MAD TEMP[5].x, TEMP[5].xxxx, IMM[1].wwww, -TEMP[12].xxxx 55: ABS TEMP[5].x, TEMP[5].xxxx 56: RCP TEMP[16].x, TEMP[16].xxxx 57: MAD TEMP[16].x, TEMP[5].xxxx, TEMP[16].xxxx, IMM[2].xxxx 58: MAX TEMP[16].x, IMM[0].xxxx, TEMP[16].xxxx 59: MUL TEMP[16].x, TEMP[16].xxxx, IMM[2].yyyy 60: MIN TEMP[16].x, IMM[1].zzzz, TEMP[16].xxxx 61: MOV TEMP[5].zw, IMM[0].xxxx 62: ADD TEMP[5].xy, IN[0].xyyy, -CONST[1].xyyy 63: MOV TEMP[5].xy, TEMP[5].xyyy 64: MOV TEMP[5].w, IMM[0].xxxx 65: TXL TEMP[5], TEMP[5], SAMP[0], 2D 66: MOV TEMP[18], TEMP[5] 67: MOV TEMP[17].zw, IMM[0].xxxx 68: MAD TEMP[17].xy, IMM[0].zyyy, CONST[1].xyyy, IN[0].xyyy 69: MOV TEMP[17].xy, TEMP[17].xyyy 70: MOV TEMP[17].w, IMM[0].xxxx 71: TXL TEMP[17], TEMP[17], SAMP[0], 2D 72: MOV TEMP[19], TEMP[17] 73: MOV TEMP[20].zw, IMM[0].xxxx 74: MAD TEMP[20].xy, IMM[0].yzzz, CONST[1].xyyy, IN[0].xyyy 75: MOV TEMP[20].xy, TEMP[20].xyyy 76: MOV TEMP[20].w, IMM[0].xxxx 77: TXL TEMP[20], TEMP[20], SAMP[0], 2D 78: MOV TEMP[21], TEMP[20] 79: MOV TEMP[22].zw, IMM[0].xxxx 80: ADD TEMP[22].xy, IN[0].xyyy, CONST[1].xyyy 81: MOV TEMP[22].xy, TEMP[22].xyyy 82: MOV TEMP[22].w, IMM[0].xxxx 83: TXL TEMP[22], TEMP[22], SAMP[0], 2D 84: MOV TEMP[23], TEMP[22] 85: ADD TEMP[1].xyz, TEMP[1].xyzz, TEMP[0].xyzz 86: ADD TEMP[0].xyz, TEMP[4].xyzz, TEMP[6].xyzz 87: ADD TEMP[1].xyz, TEMP[1].xyzz, TEMP[0].xyzz 88: ADD TEMP[0].xyz, TEMP[8].xyzz, TEMP[5].xyzz 89: ADD TEMP[4].xyz, TEMP[17].xyzz, TEMP[20].xyzz 90: ADD TEMP[4].xyz, TEMP[4].xyzz, TEMP[22].xyzz 91: ADD TEMP[0].xyz, TEMP[0].xyzz, TEMP[4].xyzz 92: ADD TEMP[1].xyz, TEMP[1].xyzz, TEMP[0].xyzz 93: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[2].zzzz 94: MAD TEMP[0].x, TEMP[5].yyyy, IMM[0].wwww, TEMP[5].xxxx 95: MAD TEMP[4].x, TEMP[17].yyyy, IMM[0].wwww, TEMP[17].xxxx 96: MAD TEMP[6].x, TEMP[20].yyyy, IMM[0].wwww, TEMP[20].xxxx 97: MAD TEMP[8].x, TEMP[22].yyyy, IMM[0].wwww, TEMP[22].xxxx 98: MUL TEMP[5].x, IMM[1].wwww, TEMP[0].xxxx 99: MAD TEMP[5].x, IMM[2].wwww, TEMP[11].xxxx, TEMP[5].xxxx 100: MAD TEMP[5].x, IMM[1].wwww, TEMP[6].xxxx, TEMP[5].xxxx 101: ABS TEMP[5].x, TEMP[5].xxxx 102: MAD TEMP[17].x, IMM[3].xxxx, TEMP[10].xxxx, -TEMP[12].xxxx 103: MAD TEMP[17].x, IMM[3].xxxx, TEMP[14].xxxx, TEMP[17].xxxx 104: ABS TEMP[17].x, TEMP[17].xxxx 105: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[17].xxxx 106: MUL TEMP[17].x, IMM[1].wwww, TEMP[4].xxxx 107: MAD TEMP[17].x, IMM[2].wwww, TEMP[13].xxxx, TEMP[17].xxxx 108: MAD TEMP[17].x, IMM[1].wwww, TEMP[8].xxxx, TEMP[17].xxxx 109: ABS TEMP[17].x, TEMP[17].xxxx 110: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[17].xxxx 111: MUL TEMP[0].x, IMM[1].wwww, TEMP[0].xxxx 112: MAD TEMP[0].x, IMM[2].wwww, TEMP[10].xxxx, TEMP[0].xxxx 113: MAD TEMP[0].x, IMM[1].wwww, TEMP[4].xxxx, TEMP[0].xxxx 114: ABS TEMP[0].x, TEMP[0].xxxx 115: MAD TEMP[4].x, IMM[3].xxxx, TEMP[11].xxxx, -TEMP[12].xxxx 116: MAD TEMP[4].x, IMM[3].xxxx, TEMP[13].xxxx, TEMP[4].xxxx 117: ABS TEMP[4].x, TEMP[4].xxxx 118: ADD TEMP[0].x, TEMP[0].xxxx, TEMP[4].xxxx 119: MUL TEMP[4].x, IMM[1].wwww, TEMP[6].xxxx 120: MAD TEMP[4].x, IMM[2].wwww, TEMP[14].xxxx, TEMP[4].xxxx 121: MAD TEMP[4].x, IMM[1].wwww, TEMP[8].xxxx, TEMP[4].xxxx 122: ABS TEMP[4].x, TEMP[4].xxxx 123: ADD TEMP[0].x, TEMP[0].xxxx, TEMP[4].xxxx 124: FSGE TEMP[0].x, TEMP[5].xxxx, TEMP[0].xxxx 125: UIF TEMP[0].xxxx :0 126: MOV TEMP[4].x, -CONST[1].yyyy 127: ELSE :0 128: MOV TEMP[4].x, -CONST[1].xxxx 129: ENDIF 130: MOV TEMP[6].x, TEMP[4].xxxx 131: NOT TEMP[8].x, TEMP[0].xxxx 132: UIF TEMP[8].xxxx :0 133: MOV TEMP[2].x, TEMP[11].xxxx 134: ENDIF 135: NOT TEMP[8].x, TEMP[0].xxxx 136: UIF TEMP[8].xxxx :0 137: MOV TEMP[3].x, TEMP[13].xxxx 138: ENDIF 139: ADD TEMP[8].x, TEMP[2].xxxx, -TEMP[12].xxxx 140: ABS TEMP[8].x, TEMP[8].xxxx 141: MOV TEMP[10].x, TEMP[8].xxxx 142: ADD TEMP[11].x, TEMP[3].xxxx, -TEMP[12].xxxx 143: ABS TEMP[11].x, TEMP[11].xxxx 144: ADD TEMP[13].x, TEMP[2].xxxx, TEMP[12].xxxx 145: MUL TEMP[2].x, TEMP[13].xxxx, IMM[3].xxxx 146: ADD TEMP[13].x, TEMP[3].xxxx, TEMP[12].xxxx 147: MUL TEMP[13].x, TEMP[13].xxxx, IMM[3].xxxx 148: MOV TEMP[3].x, TEMP[13].xxxx 149: FSGE TEMP[8].x, TEMP[8].xxxx, TEMP[11].xxxx 150: NOT TEMP[14].x, TEMP[8].xxxx 151: UIF TEMP[14].xxxx :0 152: MOV TEMP[2].x, TEMP[13].xxxx 153: ENDIF 154: NOT TEMP[13].x, TEMP[8].xxxx 155: UIF TEMP[13].xxxx :0 156: MOV TEMP[10].x, TEMP[11].xxxx 157: ENDIF 158: NOT TEMP[8].x, TEMP[8].xxxx 159: UIF TEMP[8].xxxx :0 160: MOV TEMP[6].x, -TEMP[4].xxxx 161: ENDIF 162: UIF TEMP[0].xxxx :0 163: MOV TEMP[4].x, IMM[0].xxxx 164: ELSE :0 165: MUL TEMP[4].x, TEMP[6].xxxx, IMM[3].xxxx 166: ENDIF 167: ADD TEMP[4].x, IN[0].xxxx, TEMP[4].xxxx 168: UIF TEMP[0].xxxx :0 169: MUL TEMP[8].x, TEMP[6].xxxx, IMM[3].xxxx 170: ELSE :0 171: MOV TEMP[8].x, IMM[0].xxxx 172: ENDIF 173: ADD TEMP[8].x, IN[0].yyyy, TEMP[8].xxxx 174: MOV TEMP[4].y, TEMP[8].xxxx 175: MUL TEMP[10].x, TEMP[10].xxxx, IMM[1].wwww 176: MOV TEMP[8].xy, TEMP[4].xyxx 177: UIF TEMP[0].xxxx :0 178: MOV TEMP[11].y, IMM[0].xxxx 179: MOV TEMP[11].x, CONST[1].xxxx 180: MOV TEMP[11].xy, TEMP[11].xyxx 181: ELSE :0 182: MOV TEMP[13].x, IMM[0].xxxx 183: MOV TEMP[13].y, CONST[1].yyyy 184: MOV TEMP[11].xy, TEMP[13].xyxx 185: ENDIF 186: MOV TEMP[13].x, TEMP[2].xxxx 187: MOV TEMP[14].x, TEMP[2].xxxx 188: MOV TEMP[3].x, IMM[4].xxxx 189: MOV TEMP[5].x, IMM[4].xxxx 190: MAD TEMP[4].xy, TEMP[11].xyyy, IMM[3].yyyy, TEMP[4].xyyy 191: MAD TEMP[8].xy, TEMP[11].xyyy, IMM[3].zzzz, TEMP[8].xyyy 192: MUL TEMP[11].xy, TEMP[11].xyyy, IMM[3].wwww 193: MOV TEMP[17].x, IMM[5].xxxx 194: BGNLOOP :0 195: ISGE TEMP[20].x, TEMP[17].xxxx, IMM[5].yyyy 196: UIF TEMP[20].xxxx :0 197: BRK 198: ENDIF 199: NOT TEMP[22].x, TEMP[3].xxxx 200: UIF TEMP[22].xxxx :0 201: MOV TEMP[24].xy, TEMP[4].xyyy 202: TXD TEMP[7], TEMP[24], TEMP[11].xyyy, TEMP[11].xyyy, SAMP[0], 2D 203: MOV TEMP[25], TEMP[7] 204: MAD TEMP[13].x, TEMP[7].yyyy, IMM[0].wwww, TEMP[7].xxxx 205: ENDIF 206: NOT TEMP[26].x, TEMP[5].xxxx 207: UIF TEMP[26].xxxx :0 208: MOV TEMP[27].xy, TEMP[8].xyyy 209: TXD TEMP[28], TEMP[27], TEMP[11].xyyy, TEMP[11].xyyy, SAMP[0], 2D 210: MOV TEMP[29], TEMP[28] 211: MAD TEMP[14].x, TEMP[28].yyyy, IMM[0].wwww, TEMP[28].xxxx 212: ENDIF 213: UIF TEMP[3].xxxx :0 214: MOV TEMP[30].x, IMM[4].yyyy 215: ELSE :0 216: ADD TEMP[9].x, TEMP[13].xxxx, -TEMP[2].xxxx 217: ABS TEMP[31].x, TEMP[9].xxxx 218: FSGE TEMP[30].x, TEMP[31].xxxx, TEMP[10].xxxx 219: ENDIF 220: MOV TEMP[3].x, TEMP[30].xxxx 221: UIF TEMP[5].xxxx :0 222: MOV TEMP[32].x, IMM[4].yyyy 223: ELSE :0 224: ADD TEMP[33].x, TEMP[14].xxxx, -TEMP[2].xxxx 225: ABS TEMP[34].x, TEMP[33].xxxx 226: FSGE TEMP[32].x, TEMP[34].xxxx, TEMP[10].xxxx 227: ENDIF 228: MOV TEMP[5].x, TEMP[32].xxxx 229: AND TEMP[35].x, TEMP[30].xxxx, TEMP[32].xxxx 230: UIF TEMP[35].xxxx :0 231: BRK 232: ENDIF 233: NOT TEMP[36].x, TEMP[30].xxxx 234: UIF TEMP[36].xxxx :0 235: ADD TEMP[4].xy, TEMP[4].xyyy, -TEMP[11].xyyy 236: ENDIF 237: NOT TEMP[37].x, TEMP[32].xxxx 238: UIF TEMP[37].xxxx :0 239: ADD TEMP[8].xy, TEMP[8].xyyy, TEMP[11].xyyy 240: ENDIF 241: UADD TEMP[17].x, TEMP[17].xxxx, IMM[5].zzzz 242: ENDLOOP :0 243: UIF TEMP[0].xxxx :0 244: ADD TEMP[10].x, IN[0].xxxx, -TEMP[4].xxxx 245: ELSE :0 246: ADD TEMP[10].x, IN[0].yyyy, -TEMP[4].yyyy 247: ENDIF 248: UIF TEMP[0].xxxx :0 249: ADD TEMP[4].x, TEMP[8].xxxx, -IN[0].xxxx 250: ELSE :0 251: ADD TEMP[4].x, TEMP[8].yyyy, -IN[0].yyyy 252: ENDIF 253: FSLT TEMP[8].x, TEMP[10].xxxx, TEMP[4].xxxx 254: UIF TEMP[8].xxxx :0 255: MOV TEMP[11].x, TEMP[13].xxxx 256: ELSE :0 257: MOV TEMP[11].x, TEMP[14].xxxx 258: ENDIF 259: FSLT TEMP[12].x, TEMP[12].xxxx, TEMP[2].xxxx 260: FSLT TEMP[2].x, TEMP[11].xxxx, TEMP[2].xxxx 261: USEQ TEMP[2].x, TEMP[12].xxxx, TEMP[2].xxxx 262: UIF TEMP[2].xxxx :0 263: MOV TEMP[6].x, IMM[0].xxxx 264: ENDIF 265: ADD TEMP[2].x, TEMP[4].xxxx, TEMP[10].xxxx 266: UIF TEMP[8].xxxx :0 267: MOV TEMP[8].x, TEMP[10].xxxx 268: ELSE :0 269: MOV TEMP[8].x, TEMP[4].xxxx 270: ENDIF 271: RCP TEMP[4].x, TEMP[2].xxxx 272: MAD TEMP[4].x, TEMP[8].xxxx, -TEMP[4].xxxx, IMM[3].xxxx 273: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[6].xxxx 274: UIF TEMP[0].xxxx :0 275: MOV TEMP[6].x, IMM[0].xxxx 276: ELSE :0 277: MOV TEMP[6].x, TEMP[4].xxxx 278: ENDIF 279: UIF TEMP[0].xxxx :0 280: MOV TEMP[0].x, TEMP[4].xxxx 281: ELSE :0 282: MOV TEMP[0].x, IMM[0].xxxx 283: ENDIF 284: ADD TEMP[4].x, IN[0].xxxx, TEMP[6].xxxx 285: ADD TEMP[0].x, IN[0].yyyy, TEMP[0].xxxx 286: MOV TEMP[4].y, TEMP[0].xxxx 287: MOV TEMP[0].xy, TEMP[4].xyyy 288: MOV TEMP[0].w, IMM[0].xxxx 289: TXL TEMP[0].xyz, TEMP[0], SAMP[0], 2D 290: MOV TEMP[4].x, -TEMP[16].xxxx 291: MOV TEMP[4].y, -TEMP[16].xxxx 292: MOV TEMP[4].z, -TEMP[16].xxxx 293: MAD TEMP[1].xyz, TEMP[1].xyzz, TEMP[16].xxxx, TEMP[0].xyzz 294: MAD TEMP[15].xyz, TEMP[4].xyzz, TEMP[0].xyzz, TEMP[1].xyzz 295: ENDIF 296: MOV TEMP[1].w, IMM[0].xxxx 297: MOV TEMP[1].xyz, TEMP[15].xyzx 298: MOV OUT[0], TEMP[1] 299: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %26 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %27 = load <8 x i32>, <8 x i32> addrspace(2)* %26, align 32, !tbaa !0 %28 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %29 = load <4 x i32>, <4 x i32> addrspace(2)* %28, align 16, !tbaa !0 %30 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %32 = fmul float %24, 0.000000e+00 %33 = fadd float %32, %30 %34 = fsub float %31, %25 %35 = bitcast float %33 to i32 %36 = bitcast float %34 to i32 %37 = insertelement <4 x i32> undef, i32 %35, i32 0 %38 = insertelement <4 x i32> %37, i32 %36, i32 1 %39 = insertelement <4 x i32> %38, i32 0, i32 2 %40 = bitcast <8 x i32> %27 to <32 x i8> %41 = bitcast <4 x i32> %29 to <16 x i8> %42 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %39, <32 x i8> %40, <16 x i8> %41, i32 2) %43 = extractelement <4 x float> %42, i32 0 %44 = extractelement <4 x float> %42, i32 1 %45 = fsub float %30, %24 %46 = fmul float %25, 0.000000e+00 %47 = fadd float %46, %31 %48 = bitcast float %45 to i32 %49 = bitcast float %47 to i32 %50 = insertelement <4 x i32> undef, i32 %48, i32 0 %51 = insertelement <4 x i32> %50, i32 %49, i32 1 %52 = insertelement <4 x i32> %51, i32 0, i32 2 %53 = bitcast <8 x i32> %27 to <32 x i8> %54 = bitcast <4 x i32> %29 to <16 x i8> %55 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %52, <32 x i8> %53, <16 x i8> %54, i32 2) %56 = extractelement <4 x float> %55, i32 0 %57 = extractelement <4 x float> %55, i32 1 %58 = bitcast float %30 to i32 %59 = bitcast float %31 to i32 %60 = insertelement <4 x i32> undef, i32 %58, i32 0 %61 = insertelement <4 x i32> %60, i32 %59, i32 1 %62 = insertelement <4 x i32> %61, i32 0, i32 2 %63 = bitcast <8 x i32> %27 to <32 x i8> %64 = bitcast <4 x i32> %29 to <16 x i8> %65 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %62, <32 x i8> %63, <16 x i8> %64, i32 2) %66 = extractelement <4 x float> %65, i32 0 %67 = extractelement <4 x float> %65, i32 1 %68 = extractelement <4 x float> %65, i32 2 %69 = fadd float %24, %30 %70 = fmul float %25, 0.000000e+00 %71 = fadd float %70, %31 %72 = bitcast float %69 to i32 %73 = bitcast float %71 to i32 %74 = insertelement <4 x i32> undef, i32 %72, i32 0 %75 = insertelement <4 x i32> %74, i32 %73, i32 1 %76 = insertelement <4 x i32> %75, i32 0, i32 2 %77 = bitcast <8 x i32> %27 to <32 x i8> %78 = bitcast <4 x i32> %29 to <16 x i8> %79 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %76, <32 x i8> %77, <16 x i8> %78, i32 2) %80 = extractelement <4 x float> %79, i32 0 %81 = extractelement <4 x float> %79, i32 1 %82 = fmul float %24, 0.000000e+00 %83 = fadd float %82, %30 %84 = fadd float %25, %31 %85 = bitcast float %83 to i32 %86 = bitcast float %84 to i32 %87 = insertelement <4 x i32> undef, i32 %85, i32 0 %88 = insertelement <4 x i32> %87, i32 %86, i32 1 %89 = insertelement <4 x i32> %88, i32 0, i32 2 %90 = bitcast <8 x i32> %27 to <32 x i8> %91 = bitcast <4 x i32> %29 to <16 x i8> %92 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %89, <32 x i8> %90, <16 x i8> %91, i32 2) %93 = extractelement <4 x float> %92, i32 0 %94 = extractelement <4 x float> %92, i32 1 %95 = fmul float %44, 0x3FFF695000000000 %96 = fadd float %95, %43 %97 = fmul float %57, 0x3FFF695000000000 %98 = fadd float %97, %56 %99 = fmul float %67, 0x3FFF695000000000 %100 = fadd float %99, %66 %101 = fmul float %81, 0x3FFF695000000000 %102 = fadd float %101, %80 %103 = fmul float %94, 0x3FFF695000000000 %104 = fadd float %103, %93 %105 = call float @llvm.maxnum.f32(float %100, float %96) %106 = call float @llvm.maxnum.f32(float %98, float %104) %107 = call float @llvm.maxnum.f32(float %106, float %102) %108 = call float @llvm.maxnum.f32(float %105, float %107) %109 = call float @llvm.minnum.f32(float %100, float %96) %110 = call float @llvm.minnum.f32(float %98, float %104) %111 = call float @llvm.minnum.f32(float %110, float %102) %112 = call float @llvm.minnum.f32(float %109, float %111) %113 = fsub float %108, %112 %114 = fmul float %108, 1.250000e-01 %115 = call float @llvm.maxnum.f32(float %114, float 0x3FA5555580000000) %116 = fcmp olt float %113, %115 br i1 %116, label %ENDIF, label %ELSE ELSE: ; preds = %main_body %117 = extractelement <4 x float> %92, i32 2 %118 = extractelement <4 x float> %79, i32 2 %119 = extractelement <4 x float> %55, i32 2 %120 = extractelement <4 x float> %42, i32 2 %121 = fadd float %96, %98 %122 = fadd float %102, %104 %123 = fadd float %121, %122 %124 = fmul float %123, 2.500000e-01 %125 = fsub float %124, %100 %126 = call float @llvm.fabs.f32(float %125) %127 = fdiv float 1.000000e+00, %113 %128 = fmul float %126, %127 %129 = fadd float %128, -2.500000e-01 %130 = call float @llvm.maxnum.f32(float %129, float 0.000000e+00) %131 = fmul float %130, 0x3FF5555500000000 %132 = call float @llvm.minnum.f32(float %131, float 7.500000e-01) %133 = fsub float %30, %24 %134 = fsub float %31, %25 %135 = bitcast float %133 to i32 %136 = bitcast float %134 to i32 %137 = insertelement <4 x i32> undef, i32 %135, i32 0 %138 = insertelement <4 x i32> %137, i32 %136, i32 1 %139 = insertelement <4 x i32> %138, i32 0, i32 2 %140 = bitcast <8 x i32> %27 to <32 x i8> %141 = bitcast <4 x i32> %29 to <16 x i8> %142 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %139, <32 x i8> %140, <16 x i8> %141, i32 2) %143 = extractelement <4 x float> %142, i32 0 %144 = extractelement <4 x float> %142, i32 1 %145 = extractelement <4 x float> %142, i32 2 %146 = fadd float %24, %30 %147 = fsub float %31, %25 %148 = bitcast float %146 to i32 %149 = bitcast float %147 to i32 %150 = insertelement <4 x i32> undef, i32 %148, i32 0 %151 = insertelement <4 x i32> %150, i32 %149, i32 1 %152 = insertelement <4 x i32> %151, i32 0, i32 2 %153 = bitcast <8 x i32> %27 to <32 x i8> %154 = bitcast <4 x i32> %29 to <16 x i8> %155 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %152, <32 x i8> %153, <16 x i8> %154, i32 2) %156 = extractelement <4 x float> %155, i32 0 %157 = extractelement <4 x float> %155, i32 1 %158 = extractelement <4 x float> %155, i32 2 %159 = fsub float %30, %24 %160 = fadd float %25, %31 %161 = bitcast float %159 to i32 %162 = bitcast float %160 to i32 %163 = insertelement <4 x i32> undef, i32 %161, i32 0 %164 = insertelement <4 x i32> %163, i32 %162, i32 1 %165 = insertelement <4 x i32> %164, i32 0, i32 2 %166 = bitcast <8 x i32> %27 to <32 x i8> %167 = bitcast <4 x i32> %29 to <16 x i8> %168 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %165, <32 x i8> %166, <16 x i8> %167, i32 2) %169 = extractelement <4 x float> %168, i32 0 %170 = extractelement <4 x float> %168, i32 1 %171 = extractelement <4 x float> %168, i32 2 %172 = fadd float %30, %24 %173 = fadd float %31, %25 %174 = bitcast float %172 to i32 %175 = bitcast float %173 to i32 %176 = insertelement <4 x i32> undef, i32 %174, i32 0 %177 = insertelement <4 x i32> %176, i32 %175, i32 1 %178 = insertelement <4 x i32> %177, i32 0, i32 2 %179 = bitcast <8 x i32> %27 to <32 x i8> %180 = bitcast <4 x i32> %29 to <16 x i8> %181 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %178, <32 x i8> %179, <16 x i8> %180, i32 2) %182 = extractelement <4 x float> %181, i32 0 %183 = extractelement <4 x float> %181, i32 1 %184 = extractelement <4 x float> %181, i32 2 %185 = fadd float %43, %56 %186 = fadd float %44, %57 %187 = fadd float %120, %119 %188 = fadd float %66, %80 %189 = fadd float %67, %81 %190 = fadd float %68, %118 %191 = fadd float %185, %188 %192 = fadd float %186, %189 %193 = fadd float %187, %190 %194 = fadd float %93, %143 %195 = fadd float %94, %144 %196 = fadd float %117, %145 %197 = fadd float %156, %169 %198 = fadd float %157, %170 %199 = fadd float %158, %171 %200 = fadd float %197, %182 %201 = fadd float %198, %183 %202 = fadd float %199, %184 %203 = fadd float %194, %200 %204 = fadd float %195, %201 %205 = fadd float %196, %202 %206 = fadd float %191, %203 %207 = fadd float %192, %204 %208 = fadd float %193, %205 %209 = fmul float %206, 0x3FBC71C6E0000000 %210 = fmul float %207, 0x3FBC71C6E0000000 %211 = fmul float %208, 0x3FBC71C6E0000000 %212 = fmul float %144, 0x3FFF695000000000 %213 = fadd float %212, %143 %214 = fmul float %157, 0x3FFF695000000000 %215 = fadd float %214, %156 %216 = fmul float %170, 0x3FFF695000000000 %217 = fadd float %216, %169 %218 = fmul float %183, 0x3FFF695000000000 %219 = fadd float %218, %182 %220 = fmul float %213, 2.500000e-01 %221 = fmul float %98, -5.000000e-01 %222 = fadd float %221, %220 %223 = fmul float %217, 2.500000e-01 %224 = fadd float %223, %222 %225 = call float @llvm.fabs.f32(float %224) %226 = fmul float %96, 5.000000e-01 %227 = fsub float %226, %100 %228 = fmul float %104, 5.000000e-01 %229 = fadd float %228, %227 %230 = call float @llvm.fabs.f32(float %229) %231 = fadd float %225, %230 %232 = fmul float %215, 2.500000e-01 %233 = fmul float %102, -5.000000e-01 %234 = fadd float %233, %232 %235 = fmul float %219, 2.500000e-01 %236 = fadd float %235, %234 %237 = call float @llvm.fabs.f32(float %236) %238 = fadd float %231, %237 %239 = fmul float %213, 2.500000e-01 %240 = fmul float %96, -5.000000e-01 %241 = fadd float %240, %239 %242 = fmul float %215, 2.500000e-01 %243 = fadd float %242, %241 %244 = call float @llvm.fabs.f32(float %243) %245 = fmul float %98, 5.000000e-01 %246 = fsub float %245, %100 %247 = fmul float %102, 5.000000e-01 %248 = fadd float %247, %246 %249 = call float @llvm.fabs.f32(float %248) %250 = fadd float %244, %249 %251 = fmul float %217, 2.500000e-01 %252 = fmul float %104, -5.000000e-01 %253 = fadd float %252, %251 %254 = fmul float %219, 2.500000e-01 %255 = fadd float %254, %253 %256 = call float @llvm.fabs.f32(float %255) %257 = fadd float %250, %256 %258 = fcmp oge float %238, %257 %.sink = select i1 %258, float %25, float %24 %259 = fsub float -0.000000e+00, %.sink %. = select i1 %258, float %96, float %98 %temp12.0 = select i1 %258, float %104, float %102 %260 = fsub float %., %100 %261 = call float @llvm.fabs.f32(float %260) %262 = fsub float %temp12.0, %100 %263 = call float @llvm.fabs.f32(float %262) %264 = fcmp oge float %261, %263 %.234.v.v = select i1 %264, float %., float %temp12.0 %.234.v = fadd float %.234.v.v, %100 %.234 = fmul float %.234.v, 5.000000e-01 %temp40.0 = select i1 %264, float %261, float %263 %temp24.0 = select i1 %264, float %259, float %.sink %265 = fmul float %temp24.0, 5.000000e-01 %temp16.1 = select i1 %258, float 0.000000e+00, float %265 %266 = fadd float %30, %temp16.1 %267 = fmul float %temp24.0, 5.000000e-01 %temp32.0 = select i1 %258, float %267, float 0.000000e+00 %268 = fadd float %31, %temp32.0 %269 = fmul float %temp40.0, 2.500000e-01 %.235 = select i1 %258, float 0.000000e+00, float %25 %.236 = select i1 %258, float %24, float 0.000000e+00 %270 = fmul float %.236, -1.500000e+00 %271 = fadd float %270, %266 %272 = fmul float %.235, -1.500000e+00 %273 = fadd float %272, %268 %274 = fmul float %.236, 1.500000e+00 %275 = fadd float %274, %266 %276 = fmul float %.235, 1.500000e+00 %277 = fadd float %276, %268 %278 = fmul float %.236, 2.000000e+00 %279 = fmul float %.235, 2.000000e+00 %280 = bitcast float %278 to i32 %281 = bitcast float %279 to i32 %282 = bitcast float %278 to i32 %283 = bitcast float %279 to i32 %284 = insertelement <8 x i32> undef, i32 %280, i32 0 %285 = insertelement <8 x i32> %284, i32 %281, i32 1 %286 = insertelement <8 x i32> %285, i32 %282, i32 2 %287 = insertelement <8 x i32> %286, i32 %283, i32 3 %288 = bitcast <8 x i32> %27 to <32 x i8> %289 = bitcast <4 x i32> %29 to <16 x i8> %290 = bitcast float %278 to i32 %291 = bitcast float %279 to i32 %292 = bitcast float %278 to i32 %293 = bitcast float %279 to i32 %294 = insertelement <8 x i32> undef, i32 %290, i32 0 %295 = insertelement <8 x i32> %294, i32 %291, i32 1 %296 = insertelement <8 x i32> %295, i32 %292, i32 2 %297 = insertelement <8 x i32> %296, i32 %293, i32 3 %298 = bitcast <8 x i32> %27 to <32 x i8> %299 = bitcast <4 x i32> %29 to <16 x i8> br label %LOOP ENDIF: ; preds = %main_body, %ENDLOOP %temp60.0 = phi float [ %338, %ENDLOOP ], [ %66, %main_body ] %temp61.0 = phi float [ %340, %ENDLOOP ], [ %67, %main_body ] %temp62.0 = phi float [ %342, %ENDLOOP ], [ %68, %main_body ] %300 = call i32 @llvm.SI.packf16(float %temp60.0, float %temp61.0) %301 = bitcast i32 %300 to float %302 = call i32 @llvm.SI.packf16(float %temp62.0, float 0.000000e+00) %303 = bitcast i32 %302 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %301, float %303, float %301, float %303) ret void LOOP: ; preds = %ENDIF194, %ELSE %temp56.0 = phi float [ %.234, %ELSE ], [ %temp56.2, %ENDIF194 ] %temp52.0 = phi float [ %.234, %ELSE ], [ %temp52.2, %ENDIF194 ] %temp68.0 = phi float [ 0.000000e+00, %ELSE ], [ %393, %ENDIF194 ] %temp33.0 = phi float [ %277, %ELSE ], [ %temp33.1, %ENDIF194 ] %temp32.1 = phi float [ %275, %ELSE ], [ %temp32.2, %ENDIF194 ] %temp20.0 = phi float [ 0.000000e+00, %ELSE ], [ %temp128.0, %ENDIF194 ] %temp17.0 = phi float [ %273, %ELSE ], [ %temp17.1, %ENDIF194 ] %temp16.2 = phi float [ %271, %ELSE ], [ %temp16.3, %ENDIF194 ] %temp12.1 = phi float [ 0.000000e+00, %ELSE ], [ %temp120.0, %ENDIF194 ] %304 = bitcast float %temp68.0 to i32 %305 = icmp sgt i32 %304, 7 br i1 %305, label %ENDLOOP, label %ENDIF179 ENDLOOP: ; preds = %ENDIF191, %LOOP %temp56.1 = phi float [ %temp56.0, %LOOP ], [ %temp56.2, %ENDIF191 ] %temp52.1 = phi float [ %temp52.0, %LOOP ], [ %temp52.2, %ENDIF191 ] %306 = fsub float %30, %temp16.2 %307 = fsub float %31, %temp17.0 %temp40.1 = select i1 %258, float %306, float %307 %308 = fsub float %temp32.1, %30 %309 = fsub float %temp33.0, %31 %temp16.4 = select i1 %258, float %308, float %309 %310 = fcmp olt float %temp40.1, %temp16.4 %temp52.1.temp56.1 = select i1 %310, float %temp52.1, float %temp56.1 %311 = fcmp olt float %100, %.234 %312 = fcmp olt float %temp52.1.temp56.1, %.234 %tmp = xor i1 %311, %312 %temp24.1 = select i1 %tmp, float %temp24.0, float 0.000000e+00 %313 = fadd float %temp16.4, %temp40.1 %temp40.1.temp16.4 = select i1 %310, float %temp40.1, float %temp16.4 %314 = fdiv float 1.000000e+00, %313 %315 = fmul float %314, %temp40.1.temp16.4 %316 = fsub float 5.000000e-01, %315 %317 = fmul float %316, %temp24.1 %temp24.2 = select i1 %258, float 0.000000e+00, float %317 %.237 = select i1 %258, float %317, float 0.000000e+00 %318 = fadd float %30, %temp24.2 %319 = fadd float %31, %.237 %320 = bitcast float %318 to i32 %321 = bitcast float %319 to i32 %322 = insertelement <4 x i32> undef, i32 %320, i32 0 %323 = insertelement <4 x i32> %322, i32 %321, i32 1 %324 = insertelement <4 x i32> %323, i32 0, i32 2 %325 = bitcast <8 x i32> %27 to <32 x i8> %326 = bitcast <4 x i32> %29 to <16 x i8> %327 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %324, <32 x i8> %325, <16 x i8> %326, i32 2) %328 = extractelement <4 x float> %327, i32 0 %329 = extractelement <4 x float> %327, i32 1 %330 = extractelement <4 x float> %327, i32 2 %331 = fmul float %209, %132 %332 = fadd float %331, %328 %333 = fmul float %210, %132 %334 = fadd float %333, %329 %335 = fmul float %211, %132 %336 = fadd float %335, %330 %337 = fmul float %132, %328 %338 = fsub float %332, %337 %339 = fmul float %132, %329 %340 = fsub float %334, %339 %341 = fmul float %132, %330 %342 = fsub float %336, %341 br label %ENDIF ENDIF179: ; preds = %LOOP %343 = bitcast float %temp12.1 to i32 %344 = icmp eq i32 %343, -1 br i1 %344, label %ENDIF182, label %IF183 IF183: ; preds = %ENDIF179 %345 = bitcast float %temp16.2 to i32 %346 = bitcast float %temp17.0 to i32 %347 = insertelement <8 x i32> %287, i32 %345, i32 4 %348 = insertelement <8 x i32> %347, i32 %346, i32 5 %349 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %348, <32 x i8> %288, <16 x i8> %289, i32 2) %350 = extractelement <4 x float> %349, i32 0 %351 = extractelement <4 x float> %349, i32 1 %352 = fmul float %351, 0x3FFF695000000000 %353 = fadd float %352, %350 br label %ENDIF182 ENDIF182: ; preds = %ENDIF179, %IF183 %temp52.2 = phi float [ %353, %IF183 ], [ %temp52.0, %ENDIF179 ] %354 = bitcast float %temp20.0 to i32 %355 = icmp eq i32 %354, -1 br i1 %355, label %ENDIF185, label %IF186 IF186: ; preds = %ENDIF182 %356 = bitcast float %temp32.1 to i32 %357 = bitcast float %temp33.0 to i32 %358 = insertelement <8 x i32> %297, i32 %356, i32 4 %359 = insertelement <8 x i32> %358, i32 %357, i32 5 %360 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %359, <32 x i8> %298, <16 x i8> %299, i32 2) %361 = extractelement <4 x float> %360, i32 0 %362 = extractelement <4 x float> %360, i32 1 %363 = fmul float %362, 0x3FFF695000000000 %364 = fadd float %363, %361 br label %ENDIF185 ENDIF185: ; preds = %ENDIF182, %IF186 %temp56.2 = phi float [ %364, %IF186 ], [ %temp56.0, %ENDIF182 ] %365 = bitcast float %temp12.1 to i32 %366 = icmp eq i32 %365, 0 br i1 %366, label %ELSE190, label %ENDIF188 ELSE190: ; preds = %ENDIF185 %367 = fsub float %temp52.2, %.234 %368 = call float @llvm.fabs.f32(float %367) %369 = fcmp oge float %368, %269 %370 = sext i1 %369 to i32 %371 = bitcast i32 %370 to float br label %ENDIF188 ENDIF188: ; preds = %ENDIF185, %ELSE190 %temp120.0 = phi float [ %371, %ELSE190 ], [ 0xFFFFFFFFE0000000, %ENDIF185 ] %372 = bitcast float %temp20.0 to i32 %373 = icmp eq i32 %372, 0 br i1 %373, label %ELSE193, label %ENDIF191 ELSE193: ; preds = %ENDIF188 %374 = fsub float %temp56.2, %.234 %375 = call float @llvm.fabs.f32(float %374) %376 = fcmp oge float %375, %269 %377 = sext i1 %376 to i32 %378 = bitcast i32 %377 to float br label %ENDIF191 ENDIF191: ; preds = %ENDIF188, %ELSE193 %temp128.0 = phi float [ %378, %ELSE193 ], [ 0xFFFFFFFFE0000000, %ENDIF188 ] %379 = bitcast float %temp120.0 to i32 %380 = bitcast float %temp128.0 to i32 %381 = and i32 %379, %380 %382 = icmp eq i32 %381, 0 br i1 %382, label %ENDIF194, label %ENDLOOP ENDIF194: ; preds = %ENDIF191 %383 = bitcast float %temp120.0 to i32 %384 = icmp ne i32 %383, -1 %385 = fsub float %temp16.2, %278 %386 = fsub float %temp17.0, %279 %temp17.1 = select i1 %384, float %386, float %temp17.0 %temp16.3 = select i1 %384, float %385, float %temp16.2 %387 = bitcast float %temp128.0 to i32 %388 = icmp ne i32 %387, -1 %389 = fadd float %temp32.1, %278 %390 = fadd float %temp33.0, %279 %temp33.1 = select i1 %388, float %390, float %temp33.0 %temp32.2 = select i1 %388, float %389, float %temp32.1 %391 = bitcast float %temp68.0 to i32 %392 = add i32 %391, 1 %393 = bitcast i32 %392 to float br label %LOOP } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sampled.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s10, s[0:3], 0x5 ; C2050105 s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, 0, s8, v2 ; D2820007 04081080 v_subrev_f32_e32 v8, s10, v3 ; 0A10060A v_subrev_f32_e32 v10, s8, v2 ; 0A140408 v_mad_f32 v11, 0, s10, v3 ; D282000B 040C1480 v_mov_b32_e32 v9, 0 ; 7E120280 v_mov_b32_e32 v12, v9 ; 7E180309 v_mov_b32_e32 v4, v9 ; 7E080309 image_sample_l v[20:23], 15, 0, 0, 0, 0, 0, 0, 0, v[7:10], s[16:23], s[12:15] ; F0900F00 00641407 s_waitcnt vmcnt(0) ; BF8C0770 image_sample_l v[23:26], 15, 0, 0, 0, 0, 0, 0, 0, v[10:13], s[16:23], s[12:15] ; F0900F00 0064170A v_add_f32_e32 v10, s8, v2 ; 06140408 v_mov_b32_e32 v12, v9 ; 7E180309 image_sample_l v[4:6], 7, 0, 0, 0, 0, 0, 0, 0, v[2:5], s[16:23], s[12:15] ; F0900700 00640402 image_sample_l v[13:16], 15, 0, 0, 0, 0, 0, 0, 0, v[10:13], s[16:23], s[12:15] ; F0900F00 00640D0A v_add_f32_e32 v8, s10, v3 ; 0610060A s_waitcnt vmcnt(0) ; BF8C0770 image_sample_l v[16:19], 15, 0, 0, 0, 0, 0, 0, 0, v[7:10], s[16:23], s[12:15] ; F0900F00 00641007 v_mov_b32_e32 v1, 0x3ffb4a80 ; 7E0202FF 3FFB4A80 v_mad_f32 v8, v1, v21, v20 ; D2820008 04522B01 v_mad_f32 v10, v1, v24, v23 ; D282000A 045E3101 v_mad_f32 v0, v1, v5, v4 ; D2820000 04120B01 v_mad_f32 v11, v1, v14, v13 ; D282000B 04361D01 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v12, v1, v17, v16 ; D282000C 04422301 v_max3_f32 v1, v10, v12, v11 ; D2A80001 042E190A v_max3_f32 v7, v0, v8, v1 ; D2A80007 04061100 v_min3_f32 v1, v10, v12, v11 ; D2A20001 042E190A v_min3_f32 v1, v0, v8, v1 ; D2A20001 04061100 v_subrev_f32_e32 v1, v1, v7 ; 0A020F01 v_mul_f32_e32 v7, 0x3e000000, v7 ; 100E0EFF 3E000000 v_max_f32_e32 v7, 0x3d2aaaac, v7 ; 200E0EFF 3D2AAAAC v_cmp_nlt_f32_e32 vcc, v1, v7 ; 7C1C0F01 s_and_saveexec_b64 s[6:7], vcc ; BE86246A s_xor_b64 s[6:7], exec, s[6:7] ; 8986067E s_cbranch_execz BB0_3 ; BF880000 v_rcp_f32_e32 v1, v1 ; 7E025501 v_mov_b32_e32 v19, s8 ; 7E260208 v_mov_b32_e32 v28, s10 ; 7E38020A v_add_f32_e32 v7, v10, v8 ; 060E110A v_add_f32_e32 v9, v12, v11 ; 0612170C v_add_f32_e32 v7, v9, v7 ; 060E0F09 v_mov_b32_e32 v29, 0x3e800000 ; 7E3A02FF 3E800000 v_mad_f32 v7, v7, v29, -v0 ; D2820007 84023B07 v_mov_b32_e32 v9, 0xbe800000 ; 7E1202FF BE800000 v_mad_f32 v1, |v7|, v1, v9 ; D2820101 04260307 v_max_f32_e32 v1, 0, v1 ; 20020280 v_mul_f32_e32 v1, 0x3faaaaa8, v1 ; 100202FF 3FAAAAA8 v_min_f32_e32 v1, 0x3f400000, v1 ; 1E0202FF 3F400000 v_add_f32_e32 v7, v23, v20 ; 060E2917 v_add_f32_e32 v9, v24, v21 ; 06122B18 v_add_f32_e32 v20, v25, v22 ; 06282D19 v_subrev_f32_e32 v21, s8, v2 ; 0A2A0408 v_subrev_f32_e32 v22, s10, v3 ; 0A2C060A v_mov_b32_e32 v23, 0 ; 7E2E0280 v_add_f32_e32 v4, v13, v4 ; 0608090D v_add_f32_e32 v5, v14, v5 ; 060A0B0E v_add_f32_e32 v24, s8, v2 ; 06300408 v_add_f32_e32 v6, v15, v6 ; 060C0D0F v_mov_b32_e32 v25, v22 ; 7E320316 image_sample_l v[13:15], 7, 0, 0, 0, 0, 0, 0, 0, v[21:24], s[16:23], s[12:15] ; F0900700 00640D15 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v16, v13, v16 ; 0620210D v_add_f32_e32 v17, v14, v17 ; 0622230E v_add_f32_e32 v15, v15, v18 ; 061E250F v_mov_b32_e32 v26, v23 ; 7E340317 v_add_f32_e32 v22, s10, v3 ; 062C060A image_sample_l v[30:32], 7, 0, 0, 0, 0, 0, 0, 0, v[24:27], s[16:23], s[12:15] ; F0900700 00641E18 v_mov_b32_e32 v25, v22 ; 7E320316 v_mov_b32_e32 v18, 0x3ffb4a80 ; 7E2402FF 3FFB4A80 v_mad_f32 v13, v18, v14, v13 ; D282000D 04361D12 v_mov_b32_e32 v26, v23 ; 7E340317 image_sample_l v[21:23], 7, 0, 0, 0, 0, 0, 0, 0, v[21:24], s[16:23], s[12:15] ; F0900700 00641515 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v14, v21, v30 ; 061C3D15 v_add_f32_e32 v27, v22, v31 ; 06363F16 v_add_f32_e32 v23, v23, v32 ; 062E4117 v_mad_f32 v30, v18, v31, v30 ; D282001E 047A3F12 v_mad_f32 v21, v18, v22, v21 ; D2820015 04562D12 image_sample_l v[24:26], 7, 0, 0, 0, 0, 0, 0, 0, v[24:27], s[16:23], s[12:15] ; F0900700 00641818 v_add_f32_e32 v4, v4, v7 ; 06080F04 v_add_f32_e32 v5, v5, v9 ; 060A1305 v_add_f32_e32 v6, v6, v20 ; 060C2906 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v7, v24, v14 ; 060E1D18 v_add_f32_e32 v7, v7, v16 ; 060E2107 v_add_f32_e32 v9, v25, v27 ; 06123719 v_add_f32_e32 v9, v9, v17 ; 06122309 v_add_f32_e32 v14, v26, v23 ; 061C2F1A v_add_f32_e32 v14, v14, v15 ; 061C1F0E v_add_f32_e32 v4, v7, v4 ; 06080907 v_add_f32_e32 v5, v9, v5 ; 060A0B09 v_add_f32_e32 v7, v14, v6 ; 060E0D0E v_mov_b32_e32 v9, 0x3de38e37 ; 7E1202FF 3DE38E37 v_mul_f32_e32 v6, v9, v4 ; 100C0909 v_mul_f32_e32 v5, v9, v5 ; 100A0B09 v_mul_f32_e32 v4, v9, v7 ; 10080F09 v_mad_f32 v9, v18, v25, v24 ; D2820009 04623312 v_mul_f32_e32 v13, v29, v13 ; 101A1B1D v_mad_f32 v7, -0.5, v10, v13 ; D2820007 043614F1 v_mul_f32_e32 v14, v29, v21 ; 101C2B1D v_mac_f32_e32 v7, v29, v21 ; 3E0E2B1D v_mad_f32 v15, 0.5, v8, -v0 ; D282000F 840210F0 v_mac_f32_e32 v15, 0.5, v12 ; 3E1E18F0 v_add_f32_e64 v7, |v7|, |v15| ; D2060307 00021F07 v_mul_f32_e32 v15, v29, v30 ; 101E3D1D v_mac_f32_e32 v15, -0.5, v11 ; 3E1E16F1 v_mac_f32_e32 v15, v29, v9 ; 3E1E131D v_add_f32_e64 v7, v7, |v15| ; D2060207 00021F07 v_mac_f32_e32 v13, -0.5, v8 ; 3E1A10F1 v_mac_f32_e32 v13, v29, v30 ; 3E1A3D1D v_mad_f32 v15, 0.5, v10, -v0 ; D282000F 840214F0 v_mac_f32_e32 v15, 0.5, v11 ; 3E1E16F0 v_add_f32_e64 v13, |v13|, |v15| ; D206030D 00021F0D v_mac_f32_e32 v14, -0.5, v12 ; 3E1C18F1 v_mac_f32_e32 v14, v29, v9 ; 3E1C131D v_add_f32_e64 v9, v13, |v14| ; D2060209 00021D0D v_cmp_ge_f32_e32 vcc, v7, v9 ; 7C0C1307 v_cndmask_b32_e32 v13, v19, v28 ; 001A3913 v_cndmask_b32_e32 v8, v10, v8 ; 0010110A v_cndmask_b32_e32 v10, v11, v12 ; 0014190B v_subrev_f32_e32 v11, v0, v8 ; 0A161100 v_mov_b32_e32 v12, 0x7fffffff ; 7E1802FF 7FFFFFFF v_and_b32_e32 v14, v11, v12 ; 361C190B v_subrev_f32_e32 v15, v0, v10 ; 0A1E1500 v_and_b32_e32 v12, v15, v12 ; 3618190F v_cmp_ge_f32_e64 s[0:1], |v11|, |v15| ; D00C0300 00021F0B v_cndmask_b32_e64 v10, v10, v8, s[0:1] ; D200000A 0002110A v_xor_b32_e32 v8, 0x80000000, v13 ; 3A101AFF 80000000 v_cndmask_b32_e64 v11, v12, v14, s[0:1] ; D200000B 00021D0C v_cndmask_b32_e64 v8, v13, v8, s[0:1] ; D2000008 0002110D v_mul_f32_e32 v12, 0.5, v8 ; 101810F0 v_cndmask_b32_e64 v13, v12, 0, vcc ; D200000D 01A9010C v_add_f32_e32 v26, v13, v2 ; 0634050D v_cndmask_b32_e32 v12, 0, v12 ; 00181880 v_add_f32_e32 v27, v12, v3 ; 0636070C v_mul_f32_e32 v11, v29, v11 ; 1016171D v_cndmask_b32_e64 v12, v28, 0, vcc ; D200000C 01A9011C v_cndmask_b32_e32 v13, 0, v19 ; 001A2680 v_mov_b32_e32 v14, 0xbfc00000 ; 7E1C02FF BFC00000 v_mad_f32 v18, v14, v13, v26 ; D2820012 046A1B0E v_mov_b32_e32 v15, 0x3fc00000 ; 7E1E02FF 3FC00000 v_mac_f32_e32 v26, v15, v13 ; 3E341B0F v_mad_f32 v28, v14, v12, v27 ; D282001C 046E190E v_mac_f32_e32 v27, v15, v12 ; 3E36190F v_add_f32_e32 v14, v13, v13 ; 061C1B0D v_add_f32_e32 v15, v12, v12 ; 061E190C v_add_f32_e32 v10, v0, v10 ; 06141500 v_mov_b32_e32 v16, v14 ; 7E20030E v_mul_f32_e32 v10, 0.5, v10 ; 101414F0 v_mov_b32_e32 v17, v15 ; 7E22030F v_mov_b32_e32 v19, 0 ; 7E260280 s_mov_b64 s[0:1], 0 ; BE800480 v_mov_b32_e32 v12, v10 ; 7E18030A v_mov_b32_e32 v13, v10 ; 7E1A030A v_mov_b32_e32 v24, 0 ; 7E300280 v_mov_b32_e32 v25, 0 ; 7E320280 v_mov_b32_e32 v23, v18 ; 7E2E0312 v_mov_b32_e32 v22, v28 ; 7E2C031C v_mov_b32_e32 v21, v26 ; 7E2A031A v_mov_b32_e32 v20, v27 ; 7E28031B v_cmp_gt_i32_e32 vcc, 8, v19 ; 7D082688 s_and_saveexec_b64 s[2:3], vcc ; BE82246A s_xor_b64 s[2:3], exec, s[2:3] ; 8982027E s_cbranch_execz BB0_9 ; BF880000 v_cmp_ne_i32_e32 vcc, -1, v25 ; 7D0A32C1 s_and_saveexec_b64 s[4:5], vcc ; BE84246A s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E s_cbranch_execz BB0_10 ; BF880000 v_mov_b32_e32 v18, v23 ; 7E240317 v_mov_b32_e32 v26, v14 ; 7E34030E v_mov_b32_e32 v27, v15 ; 7E36030F v_mov_b32_e32 v28, v16 ; 7E380310 v_mov_b32_e32 v29, v17 ; 7E3A0311 v_mov_b32_e32 v30, v18 ; 7E3C0312 v_mov_b32_e32 v31, v19 ; 7E3E0313 v_mov_b32_e32 v32, v20 ; 7E400314 v_mov_b32_e32 v33, v21 ; 7E420315 v_mov_b32_e32 v31, v22 ; 7E3E0316 image_sample_d v[26:27], 3, 0, 0, 0, 0, 0, 0, 0, v[26:33], s[16:23], s[12:15] ; F0880300 00641A1A s_waitcnt vmcnt(0) ; BF8C0770 v_madmk_f32_e32 v13, v27, v26, 0x3ffb4a80 ; 401A351B 3FFB4A80 s_or_b64 exec, exec, s[4:5] ; 88FE047E v_cmp_ne_i32_e32 vcc, -1, v24 ; 7D0A30C1 s_and_saveexec_b64 s[4:5], vcc ; BE84246A s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E s_cbranch_execz BB0_12 ; BF880000 v_mov_b32_e32 v26, v14 ; 7E34030E v_mov_b32_e32 v27, v15 ; 7E36030F v_mov_b32_e32 v28, v16 ; 7E380310 v_mov_b32_e32 v29, v17 ; 7E3A0311 v_mov_b32_e32 v30, v18 ; 7E3C0312 v_mov_b32_e32 v31, v19 ; 7E3E0313 v_mov_b32_e32 v32, v20 ; 7E400314 v_mov_b32_e32 v33, v21 ; 7E420315 v_mov_b32_e32 v30, v21 ; 7E3C0315 v_mov_b32_e32 v31, v20 ; 7E3E0314 image_sample_d v[26:27], 3, 0, 0, 0, 0, 0, 0, 0, v[26:33], s[16:23], s[12:15] ; F0880300 00641A1A s_waitcnt vmcnt(0) ; BF8C0770 v_madmk_f32_e32 v12, v27, v26, 0x3ffb4a80 ; 4018351B 3FFB4A80 s_or_b64 exec, exec, s[4:5] ; 88FE047E v_cmp_eq_i32_e32 vcc, 0, v25 ; 7D043280 v_mov_b32_e32 v25, -1 ; 7E3202C1 s_and_saveexec_b64 s[4:5], vcc ; BE84246A s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E v_subrev_f32_e32 v18, v10, v13 ; 0A241B0A v_cmp_ge_f32_e64 s[24:25], |v18|, v11 ; D00C0118 00021712 v_cndmask_b32_e64 v25, 0, -1, s[24:25] ; D2000019 00618280 s_or_b64 exec, exec, s[4:5] ; 88FE047E v_cmp_eq_i32_e32 vcc, 0, v24 ; 7D043080 v_mov_b32_e32 v24, -1 ; 7E3002C1 s_and_saveexec_b64 s[4:5], vcc ; BE84246A s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E v_subrev_f32_e32 v18, v10, v12 ; 0A24190A v_cmp_ge_f32_e64 s[24:25], |v18|, v11 ; D00C0118 00021712 v_cndmask_b32_e64 v24, 0, -1, s[24:25] ; D2000018 00618280 s_or_b64 exec, exec, s[4:5] ; 88FE047E v_and_b32_e32 v18, v24, v25 ; 36243318 v_cmp_eq_i32_e32 vcc, 0, v18 ; 7D042480 s_and_saveexec_b64 s[4:5], vcc ; BE84246A s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E s_cbranch_execz BB0_5 ; BF880000 v_cmp_ne_i32_e32 vcc, -1, v25 ; 7D0A32C1 v_subrev_f32_e32 v18, v14, v23 ; 0A242F0E v_subrev_f32_e32 v26, v15, v22 ; 0A342D0F v_cndmask_b32_e32 v28, v22, v26 ; 00383516 v_cndmask_b32_e32 v18, v23, v18 ; 00242517 v_cmp_ne_i32_e32 vcc, -1, v24 ; 7D0A30C1 v_add_f32_e32 v26, v14, v21 ; 06342B0E v_add_f32_e32 v27, v15, v20 ; 0636290F v_cndmask_b32_e32 v27, v20, v27 ; 00363714 v_cndmask_b32_e32 v26, v21, v26 ; 00343515 v_add_i32_e32 v19, 1, v19 ; 4A262681 s_or_b64 exec, exec, s[4:5] ; 88FE047E s_or_b64 s[0:1], s[4:5], s[0:1] ; 88800004 s_or_b64 exec, exec, s[2:3] ; 88FE027E s_or_b64 s[0:1], s[2:3], s[0:1] ; 88800002 s_andn2_b64 exec, exec, s[0:1] ; 8AFE007E s_cbranch_execnz BB0_4 ; BF890000 s_or_b64 exec, exec, s[0:1] ; 88FE007E v_cmp_ge_f32_e32 vcc, v7, v9 ; 7C0C1307 v_subrev_f32_e32 v7, v23, v2 ; 0A0E0517 v_subrev_f32_e32 v9, v22, v3 ; 0A120716 v_cndmask_b32_e32 v7, v9, v7 ; 000E0F09 v_subrev_f32_e32 v9, v2, v21 ; 0A122B02 v_subrev_f32_e32 v11, v3, v20 ; 0A162903 v_cndmask_b32_e32 v9, v11, v9 ; 0012130B v_add_f32_e32 v11, v7, v9 ; 06161307 v_mov_b32_e32 v14, 0x6f800000 ; 7E1C02FF 6F800000 v_cmp_gt_f32_e64 s[0:1], |v11|, v14 ; D0080100 00021D0B v_mov_b32_e32 v14, 0x2f800000 ; 7E1C02FF 2F800000 v_cndmask_b32_e64 v14, 1.0, v14, s[0:1] ; D200000E 00021CF2 v_cmp_lt_f32_e64 s[0:1], v7, v9 ; D0020000 00021307 v_cndmask_b32_e64 v12, v12, v13, s[0:1] ; D200000C 00021B0C v_cmp_lt_f32_e64 s[2:3], v0, v10 ; D0020002 00021500 v_cmp_lt_f32_e64 s[4:5], v12, v10 ; D0020004 0002150C v_mul_f32_e32 v0, v14, v11 ; 1000170E v_rcp_f32_e32 v0, v0 ; 7E005500 s_xor_b64 s[2:3], s[2:3], s[4:5] ; 89820402 v_cndmask_b32_e64 v8, 0, v8, s[2:3] ; D2000008 000A1080 v_cndmask_b32_e64 v7, v9, v7, s[0:1] ; D2000007 00020F09 v_mul_f32_e32 v0, v0, v14 ; 10001D00 v_mad_f32 v0, -v0, v7, 0.5 ; D2820000 23C20F00 v_mul_f32_e32 v0, v8, v0 ; 10000108 v_cndmask_b32_e64 v7, v0, 0, vcc ; D2000007 01A90100 v_cndmask_b32_e32 v0, 0, v0 ; 00000080 v_add_f32_e32 v7, v7, v2 ; 060E0507 v_add_f32_e32 v8, v0, v3 ; 06100700 v_mov_b32_e32 v9, 0 ; 7E120280 image_sample_l v[7:9], 7, 0, 0, 0, 0, 0, 0, 0, v[7:10], s[16:23], s[12:15] ; F0900700 00640707 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v1, v6, v7 ; D2820000 041E0D01 v_mad_f32 v2, v1, v5, v8 ; D2820002 04220B01 v_mad_f32 v3, v1, v4, v9 ; D2820003 04260901 v_mad_f32 v4, -v1, v7, v0 ; D2820004 24020F01 v_mad_f32 v5, -v1, v8, v2 ; D2820005 240A1101 v_mad_f32 v6, -v1, v9, v3 ; D2820006 240E1301 s_or_b64 exec, exec, s[6:7] ; 88FE067E v_cvt_pkrtz_f16_f32_e32 v0, v4, v5 ; 5E000B04 v_cvt_pkrtz_f16_f32_e64 v1, v6, 0 ; D25E0001 00010106 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 36 Code Size: 1504 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..5] DCL TEMP[0..2], LOCAL 0: MUL TEMP[0], CONST[2], IN[0].xxxx 1: MAD TEMP[0], CONST[3], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[4], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[5], IN[0].wwww, TEMP[0] 4: MAD TEMP[1].xy, IN[2].xyyy, CONST[1].xyyy, CONST[1].zwww 5: MAD TEMP[2].x, TEMP[0].zzzz, CONST[0].zzzz, CONST[0].wwww 6: MOV TEMP[1].z, TEMP[2].xxxx 7: MOV OUT[2], TEMP[1] 8: MOV OUT[0], TEMP[0] 9: MOV OUT[1], IN[1] 10: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %35 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0 %37 = add i32 %5, %7 %38 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %36, i32 0, i32 %37) %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = extractelement <4 x float> %38, i32 2 %42 = extractelement <4 x float> %38, i32 3 %43 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0 %45 = add i32 %5, %7 %46 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %45) %47 = extractelement <4 x float> %46, i32 0 %48 = extractelement <4 x float> %46, i32 1 %49 = extractelement <4 x float> %46, i32 2 %50 = extractelement <4 x float> %46, i32 3 %51 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %52 = load <16 x i8>, <16 x i8> addrspace(2)* %51, align 16, !tbaa !0 %53 = add i32 %5, %7 %54 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %52, i32 0, i32 %53) %55 = extractelement <4 x float> %54, i32 0 %56 = extractelement <4 x float> %54, i32 1 %57 = fmul float %19, %39 %58 = fmul float %20, %39 %59 = fmul float %21, %39 %60 = fmul float %22, %39 %61 = fmul float %23, %40 %62 = fadd float %61, %57 %63 = fmul float %24, %40 %64 = fadd float %63, %58 %65 = fmul float %25, %40 %66 = fadd float %65, %59 %67 = fmul float %26, %40 %68 = fadd float %67, %60 %69 = fmul float %27, %41 %70 = fadd float %69, %62 %71 = fmul float %28, %41 %72 = fadd float %71, %64 %73 = fmul float %29, %41 %74 = fadd float %73, %66 %75 = fmul float %30, %41 %76 = fadd float %75, %68 %77 = fmul float %31, %42 %78 = fadd float %77, %70 %79 = fmul float %32, %42 %80 = fadd float %79, %72 %81 = fmul float %33, %42 %82 = fadd float %81, %74 %83 = fmul float %34, %42 %84 = fadd float %83, %76 %85 = fmul float %55, %15 %86 = fadd float %85, %17 %87 = fmul float %56, %16 %88 = fadd float %87, %18 %89 = fmul float %82, %13 %90 = fadd float %89, %14 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %47, float %48, float %49, float %50) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %86, float %88, float %90, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %78, float %80, float %82, float %84) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s16, s[0:3], 0x7 ; C2080107 buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 buffer_load_format_xyzw v[9:12], v0, s[8:11], 0 idxen ; E00C2000 80020900 s_buffer_load_dword s5, s[0:3], 0x9 ; C2028109 s_buffer_load_dword s6, s[0:3], 0xa ; C203010A s_buffer_load_dword s7, s[0:3], 0xb ; C203810B s_buffer_load_dword s8, s[0:3], 0xc ; C204010C s_buffer_load_dword s9, s[0:3], 0xd ; C204810D s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s16 ; 7E000210 s_buffer_load_dword s10, s[0:3], 0xe ; C205010E s_buffer_load_dword s11, s[0:3], 0xf ; C205810F s_buffer_load_dword s12, s[0:3], 0x10 ; C2060110 s_buffer_load_dword s13, s[0:3], 0x2 ; C2068102 s_buffer_load_dword s14, s[0:3], 0x6 ; C2070106 s_buffer_load_dword s15, s[0:3], 0x4 ; C2078104 s_buffer_load_dword s16, s[0:3], 0x5 ; C2080105 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v11, s14 ; 7E16020E v_mac_f32_e32 v11, s15, v9 ; 3E16120F v_mac_f32_e32 v0, s16, v10 ; 3E001410 s_buffer_load_dword s14, s[0:3], 0x3 ; C2070103 s_buffer_load_dword s15, s[0:3], 0x11 ; C2078111 s_buffer_load_dword s16, s[0:3], 0x12 ; C2080112 s_buffer_load_dword s17, s[0:3], 0x13 ; C2088113 s_buffer_load_dword s18, s[0:3], 0x14 ; C2090114 s_buffer_load_dword s19, s[0:3], 0x15 ; C2098115 s_buffer_load_dword s20, s[0:3], 0x16 ; C20A0116 s_buffer_load_dword s0, s[0:3], 0x17 ; C2000117 v_mul_f32_e32 v9, s4, v1 ; 10120204 v_mac_f32_e32 v9, s8, v2 ; 3E120408 v_mul_f32_e32 v10, s5, v1 ; 10140205 v_mac_f32_e32 v10, s9, v2 ; 3E140409 v_mul_f32_e32 v12, s6, v1 ; 10180206 v_mac_f32_e32 v12, s10, v2 ; 3E18040A v_mul_f32_e32 v1, s7, v1 ; 10020207 v_mac_f32_e32 v1, s11, v2 ; 3E02040B v_mac_f32_e32 v9, s12, v3 ; 3E12060C s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v10, s15, v3 ; 3E14060F v_mac_f32_e32 v12, s16, v3 ; 3E180610 v_mac_f32_e32 v1, s17, v3 ; 3E020611 v_mac_f32_e32 v9, s18, v4 ; 3E120812 v_mac_f32_e32 v10, s19, v4 ; 3E140813 v_mac_f32_e32 v12, s20, v4 ; 3E180814 v_mac_f32_e32 v1, s0, v4 ; 3E020800 exp 15, 32, 0, 0, 0, v5, v6, v7, v8 ; F800020F 08070605 v_mov_b32_e32 v2, s14 ; 7E04020E v_mac_f32_e32 v2, s13, v12 ; 3E04180D v_mov_b32_e32 v3, 0 ; 7E060280 exp 15, 33, 0, 0, 0, v11, v0, v2, v3 ; F800021F 0302000B exp 15, 12, 0, 1, 0, v9, v10, v12, v1 ; F80008CF 010C0A09 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 16 Code Size: 268 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 4 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 2.0000, 0.0000, 0.0000, 0.0000} 0: MUL TEMP[0], IMM[0].xxxx, IN[0] 1: MOV TEMP[1].xy, IN[1].xyyy 2: TEX TEMP[1], TEMP[1], SAMP[0], 2D 3: MUL TEMP[1], CONST[1], TEMP[1] 4: MUL TEMP[0], TEMP[0], TEMP[1] 5: MOV TEMP[1].w, TEMP[0].wwww 6: MOV_SAT TEMP[2].x, IN[1].zzzz 7: MUL TEMP[1].xyz, TEMP[0].xyzz, TEMP[2].xxxx 8: MOV OUT[0], TEMP[1] 9: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %28 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %29 = load <32 x i8>, <32 x i8> addrspace(2)* %28, align 32, !tbaa !0 %30 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !tbaa !0 %32 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %35 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %36 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %37 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %38 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %39 = fmul float %32, 2.000000e+00 %40 = fmul float %33, 2.000000e+00 %41 = fmul float %34, 2.000000e+00 %42 = fmul float %35, 2.000000e+00 %43 = bitcast float %36 to i32 %44 = bitcast float %37 to i32 %45 = insertelement <2 x i32> undef, i32 %43, i32 0 %46 = insertelement <2 x i32> %45, i32 %44, i32 1 %47 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %46, <32 x i8> %29, <16 x i8> %31, i32 2) %48 = extractelement <4 x float> %47, i32 0 %49 = extractelement <4 x float> %47, i32 1 %50 = extractelement <4 x float> %47, i32 2 %51 = extractelement <4 x float> %47, i32 3 %52 = fmul float %24, %48 %53 = fmul float %25, %49 %54 = fmul float %26, %50 %55 = fmul float %27, %51 %56 = fmul float %39, %52 %57 = fmul float %40, %53 %58 = fmul float %41, %54 %59 = fmul float %42, %55 %60 = call float @llvm.AMDIL.clamp.(float %38, float 0.000000e+00, float 1.000000e+00) %61 = fmul float %56, %60 %62 = fmul float %57, %60 %63 = fmul float %58, %60 %64 = fcmp ugt float %59, %4 %65 = select i1 %64, float 1.000000e+00, float -1.000000e+00 call void @llvm.AMDGPU.kill(float %65) %66 = call i32 @llvm.SI.packf16(float %61, float %62) %67 = bitcast i32 %66 to float %68 = call i32 @llvm.SI.packf16(float %63, float %59) %69 = bitcast i32 %68 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %67, float %69, float %67, float %69) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_buffer_load_dword s9, s[0:3], 0x6 ; C2048106 s_buffer_load_dword s0, s[0:3], 0x7 ; C2000107 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 v_interp_p1_f32 v0, v0, 2, 1, [m0] ; C8000600 v_interp_p2_f32 v0, [v0], v1, 2, 1, [m0] ; C8010601 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[6:9], 15, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[16:23], s[12:15] ; F0800F00 00640606 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v1, s4, v6 ; 10020C04 v_mul_f32_e32 v6, s5, v7 ; 100C0E05 v_mul_f32_e32 v7, s9, v8 ; 100E1009 v_mul_f32_e32 v8, s0, v9 ; 10101200 v_add_f32_e32 v2, v2, v2 ; 06040502 v_mul_f32_e32 v1, v1, v2 ; 10020501 v_add_f32_e32 v2, v3, v3 ; 06040703 v_mul_f32_e32 v2, v6, v2 ; 10040506 v_add_f32_e32 v3, v4, v4 ; 06060904 v_mul_f32_e32 v3, v7, v3 ; 10060707 v_add_f32_e32 v4, v5, v5 ; 06080B05 v_mul_f32_e32 v4, v8, v4 ; 10080908 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_mul_f32_e32 v1, v0, v1 ; 10020300 v_mul_f32_e32 v2, v0, v2 ; 10040500 v_mul_f32_e32 v0, v0, v3 ; 10000700 v_cmp_nge_f32_e32 vcc, s8, v4 ; 7C120808 v_cndmask_b32_e64 v3, -1.0, 1.0, vcc ; D2000003 01A9E4F3 v_cmpx_le_f32_e32 vcc, 0, v3 ; 7C260680 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_cvt_pkrtz_f16_f32_e32 v0, v0, v4 ; 5E000900 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 12 Code Size: 216 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..4] DCL TEMP[0..2], LOCAL 0: MUL TEMP[0], CONST[1], IN[0].xxxx 1: MAD TEMP[0], CONST[2], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[3], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[4], IN[0].wwww, TEMP[0] 4: MOV TEMP[1].xy, IN[2].xyxx 5: MAD TEMP[2].xy, IN[0].xyyy, CONST[0].xyyy, CONST[0].zwww 6: MOV TEMP[1].zw, TEMP[2].yyxy 7: MOV OUT[2], TEMP[1] 8: MOV OUT[0], TEMP[0] 9: MOV OUT[1], IN[1] 10: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %34 = load <16 x i8>, <16 x i8> addrspace(2)* %33, align 16, !tbaa !0 %35 = add i32 %5, %7 %36 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %34, i32 0, i32 %35) %37 = extractelement <4 x float> %36, i32 0 %38 = extractelement <4 x float> %36, i32 1 %39 = extractelement <4 x float> %36, i32 2 %40 = extractelement <4 x float> %36, i32 3 %41 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0 %43 = add i32 %5, %7 %44 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %43) %45 = extractelement <4 x float> %44, i32 0 %46 = extractelement <4 x float> %44, i32 1 %47 = extractelement <4 x float> %44, i32 2 %48 = extractelement <4 x float> %44, i32 3 %49 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %50 = load <16 x i8>, <16 x i8> addrspace(2)* %49, align 16, !tbaa !0 %51 = add i32 %5, %7 %52 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %50, i32 0, i32 %51) %53 = extractelement <4 x float> %52, i32 0 %54 = extractelement <4 x float> %52, i32 1 %55 = fmul float %17, %37 %56 = fmul float %18, %37 %57 = fmul float %19, %37 %58 = fmul float %20, %37 %59 = fmul float %21, %38 %60 = fadd float %59, %55 %61 = fmul float %22, %38 %62 = fadd float %61, %56 %63 = fmul float %23, %38 %64 = fadd float %63, %57 %65 = fmul float %24, %38 %66 = fadd float %65, %58 %67 = fmul float %25, %39 %68 = fadd float %67, %60 %69 = fmul float %26, %39 %70 = fadd float %69, %62 %71 = fmul float %27, %39 %72 = fadd float %71, %64 %73 = fmul float %28, %39 %74 = fadd float %73, %66 %75 = fmul float %29, %40 %76 = fadd float %75, %68 %77 = fmul float %30, %40 %78 = fadd float %77, %70 %79 = fmul float %31, %40 %80 = fadd float %79, %72 %81 = fmul float %32, %40 %82 = fadd float %81, %74 %83 = fmul float %37, %13 %84 = fadd float %83, %15 %85 = fmul float %38, %14 %86 = fadd float %85, %16 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %45, float %46, float %47, float %48) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %53, float %54, float %84, float %86) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %76, float %78, float %80, float %82) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 buffer_load_format_xyzw v[9:12], v0, s[16:19], 0 idxen ; E00C2000 80040900 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_buffer_load_dword s5, s[0:3], 0x3 ; C2028103 s_buffer_load_dword s6, s[0:3], 0x0 ; C2030100 s_buffer_load_dword s7, s[0:3], 0x1 ; C2038101 exp 15, 32, 0, 0, 0, v5, v6, v7, v8 ; F800020F 08070605 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s4 ; 7E000204 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v5, s5 ; 7E0A0205 v_mac_f32_e32 v0, s6, v1 ; 3E000206 v_mac_f32_e32 v5, s7, v2 ; 3E0A0407 exp 15, 33, 0, 0, 0, v9, v10, v0, v5 ; F800021F 05000A09 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_buffer_load_dword s5, s[0:3], 0x6 ; C2028106 s_buffer_load_dword s6, s[0:3], 0x7 ; C2030107 s_buffer_load_dword s7, s[0:3], 0x8 ; C2038108 s_buffer_load_dword s9, s[0:3], 0x9 ; C2048109 s_buffer_load_dword s10, s[0:3], 0xa ; C205010A s_buffer_load_dword s11, s[0:3], 0xb ; C205810B s_buffer_load_dword s12, s[0:3], 0xc ; C206010C s_buffer_load_dword s13, s[0:3], 0xd ; C206810D s_buffer_load_dword s14, s[0:3], 0xe ; C207010E s_buffer_load_dword s15, s[0:3], 0xf ; C207810F s_buffer_load_dword s16, s[0:3], 0x10 ; C2080110 s_buffer_load_dword s17, s[0:3], 0x11 ; C2088111 s_buffer_load_dword s18, s[0:3], 0x12 ; C2090112 s_buffer_load_dword s0, s[0:3], 0x13 ; C2000113 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s8, v1 ; 10000208 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v0, s7, v2 ; 3E000407 v_mul_f32_e32 v5, s4, v1 ; 100A0204 v_mac_f32_e32 v5, s9, v2 ; 3E0A0409 v_mul_f32_e32 v6, s5, v1 ; 100C0205 v_mac_f32_e32 v6, s10, v2 ; 3E0C040A v_mul_f32_e32 v1, s6, v1 ; 10020206 v_mac_f32_e32 v1, s11, v2 ; 3E02040B v_mac_f32_e32 v0, s12, v3 ; 3E00060C v_mac_f32_e32 v5, s13, v3 ; 3E0A060D v_mac_f32_e32 v6, s14, v3 ; 3E0C060E v_mac_f32_e32 v1, s15, v3 ; 3E02060F v_mac_f32_e32 v0, s16, v4 ; 3E000810 v_mac_f32_e32 v5, s17, v4 ; 3E0A0811 v_mac_f32_e32 v6, s18, v4 ; 3E0C0812 v_mac_f32_e32 v1, s0, v4 ; 3E020800 exp 15, 12, 0, 1, 0, v0, v5, v6, v1 ; F80008CF 01060500 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 16 Code Size: 256 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: ABS TEMP[0].xy, IN[1].zwww 1: MAX TEMP[0].x, TEMP[0].xxxx, TEMP[0].yyyy 2: FSLT TEMP[0].x, IMM[0].xxxx, TEMP[0].xxxx 3: AND TEMP[0].x, TEMP[0].xxxx, IMM[0].xxxx 4: KILL_IF -TEMP[0].xxxx 5: MOV TEMP[0].xy, IN[1].xyyy 6: TEX TEMP[0], TEMP[0], SAMP[0], 2D 7: MUL TEMP[0], TEMP[0], IN[0] 8: MOV OUT[0], TEMP[0] 9: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %32 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %34 = call float @llvm.fabs.f32(float %32) %35 = call float @llvm.fabs.f32(float %33) %36 = call float @llvm.maxnum.f32(float %34, float %35) %37 = fcmp ogt float %36, 1.000000e+00 %38 = select i1 %37, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %38) %39 = bitcast float %30 to i32 %40 = bitcast float %31 to i32 %41 = insertelement <2 x i32> undef, i32 %39, i32 0 %42 = insertelement <2 x i32> %41, i32 %40, i32 1 %43 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %42, <32 x i8> %23, <16 x i8> %25, i32 2) %44 = extractelement <4 x float> %43, i32 0 %45 = extractelement <4 x float> %43, i32 1 %46 = extractelement <4 x float> %43, i32 2 %47 = extractelement <4 x float> %43, i32 3 %48 = fmul float %44, %26 %49 = fmul float %45, %27 %50 = fmul float %46, %28 %51 = fmul float %47, %29 %52 = call i32 @llvm.SI.packf16(float %48, float %49) %53 = bitcast i32 %52 to float %54 = call i32 @llvm.SI.packf16(float %50, float %51) %55 = bitcast i32 %54 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %53, float %55, float %53, float %55) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600 v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601 v_interp_p1_f32 v0, v0, 3, 1, [m0] ; C8000700 v_interp_p2_f32 v0, [v0], v1, 3, 1, [m0] ; C8010701 v_max_f32_e64 v0, |v8|, |v0| ; D2200300 00020108 v_cmp_lt_f32_e32 vcc, 1.0, v0 ; 7C0200F2 v_cndmask_b32_e64 v0, 0, -1.0, vcc ; D2000000 01A9E680 v_cmpx_le_f32_e32 vcc, 0, v0 ; 7C260080 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[6:9], 15, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[4:11], s[0:3] ; F0800F00 00010606 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v2, v6 ; 10000D02 v_mul_f32_e32 v1, v3, v7 ; 10020F03 v_mul_f32_e32 v2, v4, v8 ; 10041104 v_mul_f32_e32 v3, v5, v9 ; 10061305 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 12 Code Size: 156 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL CONST[0..3] DCL TEMP[0..1], LOCAL 0: MUL TEMP[0], CONST[0], IN[0].xxxx 1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0] 4: MOV TEMP[1].xy, IN[1].xyxx 5: MOV OUT[1], TEMP[1] 6: MOV OUT[0], TEMP[0] 7: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = fmul float %13, %33 %44 = fmul float %14, %33 %45 = fmul float %15, %33 %46 = fmul float %16, %33 %47 = fmul float %17, %34 %48 = fadd float %47, %43 %49 = fmul float %18, %34 %50 = fadd float %49, %44 %51 = fmul float %19, %34 %52 = fadd float %51, %45 %53 = fmul float %20, %34 %54 = fadd float %53, %46 %55 = fmul float %21, %35 %56 = fadd float %55, %48 %57 = fmul float %22, %35 %58 = fadd float %57, %50 %59 = fmul float %23, %35 %60 = fadd float %59, %52 %61 = fmul float %24, %35 %62 = fadd float %61, %54 %63 = fmul float %25, %36 %64 = fadd float %63, %56 %65 = fmul float %26, %36 %66 = fadd float %65, %58 %67 = fmul float %27, %36 %68 = fadd float %67, %60 %69 = fmul float %28, %36 %70 = fadd float %69, %62 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %64, float %66, float %68, float %70) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_mov_b32_e32 v1, 0 ; 7E020280 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[0:3], 0x0 ; C2060100 s_buffer_load_dword s13, s[0:3], 0x1 ; C2068101 buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[8:11], 0 idxen ; E00C2000 80020600 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_buffer_load_dword s5, s[0:3], 0x3 ; C2028103 s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104 s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105 s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106 s_buffer_load_dword s9, s[0:3], 0x7 ; C2048107 s_buffer_load_dword s10, s[0:3], 0x8 ; C2050108 s_buffer_load_dword s11, s[0:3], 0x9 ; C2058109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 v_mul_f32_e32 v0, s12, v2 ; 1000040C s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v0, s6, v3 ; 3E000606 v_mul_f32_e32 v8, s13, v2 ; 1010040D v_mac_f32_e32 v8, s7, v3 ; 3E100607 v_mul_f32_e32 v9, s4, v2 ; 10120404 v_mac_f32_e32 v9, s8, v3 ; 3E120608 v_mul_f32_e32 v2, s5, v2 ; 10040405 v_mac_f32_e32 v2, s9, v3 ; 3E040609 v_mac_f32_e32 v0, s10, v4 ; 3E00080A v_mac_f32_e32 v8, s11, v4 ; 3E10080B v_mac_f32_e32 v9, s14, v4 ; 3E12080E v_mac_f32_e32 v2, s15, v4 ; 3E04080F v_mac_f32_e32 v0, s16, v5 ; 3E000A10 v_mac_f32_e32 v8, s17, v5 ; 3E100A11 v_mac_f32_e32 v9, s18, v5 ; 3E120A12 v_mac_f32_e32 v2, s0, v5 ; 3E040A00 exp 15, 32, 0, 0, 0, v6, v7, v1, v1 ; F800020F 01010706 exp 15, 12, 0, 1, 0, v0, v8, v9, v2 ; F80008CF 02090800 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 196 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL CONST[0] DCL CONST[4..12] DCL TEMP[0..5], LOCAL IMM[0] FLT32 { 1.0000, 0.0156, 0.0500, 0.0800} IMM[1] FLT32 { 0.5000, 2.0000, -1.0000, 4.0000} 0: MUL TEMP[0].x, IN[0].xxxx, CONST[4].xxxx 1: FSLT TEMP[1].x, TEMP[0].xxxx, CONST[5].xxxx 2: UIF TEMP[1].xxxx :0 3: RCP TEMP[1].x, CONST[4].zzzz 4: MUL TEMP[1].x, TEMP[0].xxxx, TEMP[1].xxxx 5: ELSE :0 6: ADD TEMP[2].x, CONST[4].xxxx, -TEMP[0].xxxx 7: FSLT TEMP[2].x, TEMP[2].xxxx, CONST[5].zzzz 8: UIF TEMP[2].xxxx :0 9: ADD TEMP[2].x, CONST[4].xxxx, -TEMP[0].xxxx 10: RCP TEMP[3].x, CONST[4].zzzz 11: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[3].xxxx 12: ADD TEMP[1].x, IMM[0].xxxx, -TEMP[2].xxxx 13: ELSE :0 14: MOV TEMP[2].x, -CONST[5].xxxx 15: MOV TEMP[3].x, -CONST[5].zzzz 16: ADD TEMP[4].x, TEMP[0].xxxx, TEMP[2].xxxx 17: ADD TEMP[5].x, CONST[4].xxxx, TEMP[2].xxxx 18: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[3].xxxx 19: RCP TEMP[5].x, TEMP[5].xxxx 20: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx 21: ADD TEMP[2].x, CONST[4].zzzz, TEMP[2].xxxx 22: ADD TEMP[2].x, TEMP[2].xxxx, TEMP[3].xxxx 23: MAD TEMP[2].x, TEMP[4].xxxx, TEMP[2].xxxx, CONST[5].xxxx 24: RCP TEMP[3].x, CONST[4].zzzz 25: MUL TEMP[1].x, TEMP[2].xxxx, TEMP[3].xxxx 26: ENDIF 27: ENDIF 28: MOV TEMP[0].x, TEMP[1].xxxx 29: MUL TEMP[1].x, IN[0].yyyy, CONST[4].yyyy 30: FSLT TEMP[2].x, TEMP[1].xxxx, CONST[5].wwww 31: UIF TEMP[2].xxxx :0 32: RCP TEMP[2].x, CONST[4].wwww 33: MUL TEMP[2].x, TEMP[1].xxxx, TEMP[2].xxxx 34: ELSE :0 35: ADD TEMP[3].x, CONST[4].yyyy, -TEMP[1].xxxx 36: FSLT TEMP[3].x, TEMP[3].xxxx, CONST[5].yyyy 37: UIF TEMP[3].xxxx :0 38: ADD TEMP[3].x, CONST[4].yyyy, -TEMP[1].xxxx 39: RCP TEMP[4].x, CONST[4].wwww 40: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[4].xxxx 41: ADD TEMP[2].x, IMM[0].xxxx, -TEMP[3].xxxx 42: ELSE :0 43: MOV TEMP[3].x, -CONST[5].wwww 44: MOV TEMP[4].x, -CONST[5].yyyy 45: ADD TEMP[1].x, TEMP[1].xxxx, TEMP[3].xxxx 46: ADD TEMP[5].x, CONST[4].yyyy, TEMP[3].xxxx 47: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[4].xxxx 48: RCP TEMP[5].x, TEMP[5].xxxx 49: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[5].xxxx 50: ADD TEMP[3].x, CONST[4].wwww, TEMP[3].xxxx 51: ADD TEMP[3].x, TEMP[3].xxxx, TEMP[4].xxxx 52: MAD TEMP[1].x, TEMP[1].xxxx, TEMP[3].xxxx, CONST[5].wwww 53: RCP TEMP[3].x, CONST[4].wwww 54: MUL TEMP[2].x, TEMP[1].xxxx, TEMP[3].xxxx 55: ENDIF 56: ENDIF 57: MOV TEMP[0].y, TEMP[2].xxxx 58: MOV TEMP[0].xy, TEMP[0].xyyy 59: TEX TEMP[0].x, TEMP[0], SAMP[1], 2D 60: ADD TEMP[1].xy, IN[0].xyyy, CONST[6].xxxx 61: MUL TEMP[1].xy, TEMP[1].xyyy, CONST[4].xyyy 62: MUL TEMP[1].xy, TEMP[1].xyyy, IMM[0].yyyy 63: MOV TEMP[1].xy, TEMP[1].xyyy 64: TEX TEMP[1].xy, TEMP[1], SAMP[2], 2D 65: MAD TEMP[1].xy, TEMP[1].xyyy, CONST[12].xxxx, IN[0].xyyy 66: MUL TEMP[2].x, CONST[6].xxxx, IMM[0].zzzz 67: ADD TEMP[2].x, TEMP[1].xxxx, -TEMP[2].xxxx 68: MAD TEMP[1].x, CONST[6].xxxx, IMM[0].wwww, TEMP[1].yyyy 69: MOV TEMP[2].y, TEMP[1].xxxx 70: MUL TEMP[1].xy, TEMP[2].xyyy, IMM[1].xxxx 71: FRC TEMP[1].xy, TEMP[1].xyyy 72: MAD TEMP[1].xy, TEMP[1].xyyy, IMM[1].yyyy, IMM[1].zzzz 73: ABS TEMP[1].xy, TEMP[1].xyyy 74: LRP TEMP[2], TEMP[1].xxxx, CONST[8], CONST[7] 75: LRP TEMP[3], TEMP[1].xxxx, CONST[10], CONST[9] 76: LRP TEMP[2], TEMP[1].yyyy, TEMP[3], TEMP[2] 77: MOV TEMP[1].xy, TEMP[1].xyyy 78: TEX TEMP[1], TEMP[1], SAMP[0], 2D 79: MUL TEMP[1], TEMP[2], TEMP[1] 80: MUL TEMP[2].x, TEMP[0].xxxx, CONST[11].xxxx 81: ADD TEMP[3].x, IN[0].xxxx, IN[0].yyyy 82: MAD TEMP[3].x, CONST[0].xxxx, CONST[12].wwww, TEMP[3].xxxx 83: MUL TEMP[3].x, TEMP[3].xxxx, CONST[12].zzzz 84: SIN TEMP[3].x, TEMP[3].xxxx 85: ADD TEMP[3].x, TEMP[3].xxxx, IMM[1].yyyy 86: ADD TEMP[0].x, TEMP[0].xxxx, -CONST[12].yyyy 87: MOV_SAT TEMP[0].x, TEMP[0].xxxx 88: MUL TEMP[0].x, TEMP[0].xxxx, IMM[1].wwww 89: LRP TEMP[0].x, TEMP[0].xxxx, TEMP[3].xxxx, IMM[0].xxxx 90: MUL TEMP[0].x, TEMP[2].xxxx, TEMP[0].xxxx 91: MUL TEMP[0], TEMP[1], TEMP[0].xxxx 92: MOV OUT[0], TEMP[0] 93: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 124) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 140) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 172) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200) %54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 204) %55 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %56 = load <32 x i8>, <32 x i8> addrspace(2)* %55, align 32, !tbaa !0 %57 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %58 = load <16 x i8>, <16 x i8> addrspace(2)* %57, align 16, !tbaa !0 %59 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %60 = bitcast <8 x i32> addrspace(2)* %59 to <32 x i8> addrspace(2)* %61 = load <32 x i8>, <32 x i8> addrspace(2)* %60, align 32, !tbaa !0 %62 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %63 = bitcast <4 x i32> addrspace(2)* %62 to <16 x i8> addrspace(2)* %64 = load <16 x i8>, <16 x i8> addrspace(2)* %63, align 16, !tbaa !0 %65 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %66 = bitcast <8 x i32> addrspace(2)* %65 to <32 x i8> addrspace(2)* %67 = load <32 x i8>, <32 x i8> addrspace(2)* %66, align 32, !tbaa !0 %68 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %69 = bitcast <4 x i32> addrspace(2)* %68 to <16 x i8> addrspace(2)* %70 = load <16 x i8>, <16 x i8> addrspace(2)* %69, align 16, !tbaa !0 %71 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %72 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %73 = fmul float %71, %25 %74 = fcmp olt float %73, %29 br i1 %74, label %IF, label %ELSE IF: ; preds = %main_body %75 = fdiv float 1.000000e+00, %27 %76 = fmul float %73, %75 br label %ENDIF ELSE: ; preds = %main_body %77 = fsub float %25, %73 %78 = fcmp olt float %77, %31 br i1 %78, label %IF25, label %ELSE26 ENDIF: ; preds = %IF25, %ELSE26, %IF %temp4.0 = phi float [ %76, %IF ], [ %84, %IF25 ], [ %95, %ELSE26 ] %79 = fmul float %72, %26 %80 = fcmp olt float %79, %32 br i1 %80, label %IF28, label %ELSE29 IF25: ; preds = %ELSE %81 = fsub float %25, %73 %82 = fdiv float 1.000000e+00, %27 %83 = fmul float %81, %82 %84 = fsub float 1.000000e+00, %83 br label %ENDIF ELSE26: ; preds = %ELSE %85 = fsub float %73, %29 %86 = fsub float %25, %29 %87 = fsub float %86, %31 %88 = fdiv float 1.000000e+00, %87 %89 = fmul float %85, %88 %90 = fsub float %27, %29 %91 = fsub float %90, %31 %92 = fmul float %89, %91 %93 = fadd float %92, %29 %94 = fdiv float 1.000000e+00, %27 %95 = fmul float %93, %94 br label %ENDIF IF28: ; preds = %ENDIF %96 = fdiv float 1.000000e+00, %28 %97 = fmul float %79, %96 br label %ENDIF27 ELSE29: ; preds = %ENDIF %98 = fsub float %26, %79 %99 = fcmp olt float %98, %30 br i1 %99, label %IF31, label %ELSE32 ENDIF27: ; preds = %IF31, %ELSE32, %IF28 %temp8.0 = phi float [ %97, %IF28 ], [ %187, %IF31 ], [ %198, %ELSE32 ] %100 = bitcast float %temp4.0 to i32 %101 = bitcast float %temp8.0 to i32 %102 = insertelement <2 x i32> undef, i32 %100, i32 0 %103 = insertelement <2 x i32> %102, i32 %101, i32 1 %104 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %103, <32 x i8> %61, <16 x i8> %64, i32 2) %105 = extractelement <4 x float> %104, i32 0 %106 = fadd float %71, %33 %107 = fadd float %72, %33 %108 = fmul float %106, %25 %109 = fmul float %107, %26 %110 = fmul float %108, 1.562500e-02 %111 = fmul float %109, 1.562500e-02 %112 = bitcast float %110 to i32 %113 = bitcast float %111 to i32 %114 = insertelement <2 x i32> undef, i32 %112, i32 0 %115 = insertelement <2 x i32> %114, i32 %113, i32 1 %116 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %115, <32 x i8> %67, <16 x i8> %70, i32 2) %117 = extractelement <4 x float> %116, i32 0 %118 = extractelement <4 x float> %116, i32 1 %119 = fmul float %117, %51 %120 = fadd float %119, %71 %121 = fmul float %118, %51 %122 = fadd float %121, %72 %123 = fmul float %33, 0x3FA99999A0000000 %124 = fsub float %120, %123 %125 = fmul float %33, 0x3FB47AE140000000 %126 = fadd float %125, %122 %127 = fmul float %124, 5.000000e-01 %128 = fmul float %126, 5.000000e-01 %129 = call float @llvm.floor.f32(float %127) %130 = fsub float %127, %129 %131 = call float @llvm.floor.f32(float %128) %132 = fsub float %128, %131 %133 = fmul float %130, 2.000000e+00 %134 = fadd float %133, -1.000000e+00 %135 = fmul float %132, 2.000000e+00 %136 = fadd float %135, -1.000000e+00 %137 = call float @llvm.fabs.f32(float %134) %138 = call float @llvm.fabs.f32(float %136) %139 = call float @llvm.AMDGPU.lrp(float %137, float %38, float %34) %140 = call float @llvm.AMDGPU.lrp(float %137, float %39, float %35) %141 = call float @llvm.AMDGPU.lrp(float %137, float %40, float %36) %142 = call float @llvm.AMDGPU.lrp(float %137, float %41, float %37) %143 = call float @llvm.AMDGPU.lrp(float %137, float %46, float %42) %144 = call float @llvm.AMDGPU.lrp(float %137, float %47, float %43) %145 = call float @llvm.AMDGPU.lrp(float %137, float %48, float %44) %146 = call float @llvm.AMDGPU.lrp(float %137, float %49, float %45) %147 = call float @llvm.AMDGPU.lrp(float %138, float %143, float %139) %148 = call float @llvm.AMDGPU.lrp(float %138, float %144, float %140) %149 = call float @llvm.AMDGPU.lrp(float %138, float %145, float %141) %150 = call float @llvm.AMDGPU.lrp(float %138, float %146, float %142) %151 = bitcast float %137 to i32 %152 = bitcast float %138 to i32 %153 = insertelement <2 x i32> undef, i32 %151, i32 0 %154 = insertelement <2 x i32> %153, i32 %152, i32 1 %155 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %154, <32 x i8> %56, <16 x i8> %58, i32 2) %156 = extractelement <4 x float> %155, i32 0 %157 = extractelement <4 x float> %155, i32 1 %158 = extractelement <4 x float> %155, i32 2 %159 = extractelement <4 x float> %155, i32 3 %160 = fmul float %147, %156 %161 = fmul float %148, %157 %162 = fmul float %149, %158 %163 = fmul float %150, %159 %164 = fmul float %105, %50 %165 = fadd float %71, %72 %166 = fmul float %24, %54 %167 = fadd float %166, %165 %168 = fmul float %167, %53 %169 = call float @llvm.sin.f32(float %168) %170 = fadd float %169, 2.000000e+00 %171 = fsub float %105, %52 %172 = call float @llvm.AMDIL.clamp.(float %171, float 0.000000e+00, float 1.000000e+00) %173 = fmul float %172, 4.000000e+00 %174 = call float @llvm.AMDGPU.lrp(float %173, float %170, float 1.000000e+00) %175 = fmul float %164, %174 %176 = fmul float %160, %175 %177 = fmul float %161, %175 %178 = fmul float %162, %175 %179 = fmul float %163, %175 %180 = call i32 @llvm.SI.packf16(float %176, float %177) %181 = bitcast i32 %180 to float %182 = call i32 @llvm.SI.packf16(float %178, float %179) %183 = bitcast i32 %182 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %181, float %183, float %181, float %183) ret void IF31: ; preds = %ELSE29 %184 = fsub float %26, %79 %185 = fdiv float 1.000000e+00, %28 %186 = fmul float %184, %185 %187 = fsub float 1.000000e+00, %186 br label %ENDIF27 ELSE32: ; preds = %ELSE29 %188 = fsub float %79, %32 %189 = fsub float %26, %32 %190 = fsub float %189, %30 %191 = fdiv float 1.000000e+00, %190 %192 = fmul float %188, %191 %193 = fsub float %28, %32 %194 = fsub float %193, %30 %195 = fmul float %192, %194 %196 = fadd float %195, %32 %197 = fdiv float 1.000000e+00, %28 %198 = fmul float %196, %197 br label %ENDIF27 } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.floor.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.sin.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[12:15], s[2:3], 0x0 ; C0860300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s0, s[12:15], 0x10 ; C2000D10 s_buffer_load_dword s1, s[12:15], 0x14 ; C2008D14 s_buffer_load_dword s2, s[12:15], 0x12 ; C2010D12 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v0, v0, 1, 0, [m0] ; C8000100 v_interp_p2_f32 v0, [v0], v1, 1, 0, [m0] ; C8010101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s0, v2 ; 10020400 v_cmp_ngt_f32_e32 vcc, s1, v1 ; 7C160201 s_and_saveexec_b64 s[8:9], vcc ; BE88246A s_xor_b64 s[8:9], exec, s[8:9] ; 8988087E s_cbranch_execz BB0_1 ; BF880000 s_buffer_load_dword s3, s[12:15], 0x16 ; C2018D16 v_sub_f32_e32 v3, s0, v1 ; 08060200 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_ngt_f32_e32 vcc, s3, v3 ; 7C160603 s_and_saveexec_b64 s[10:11], vcc ; BE8A246A s_xor_b64 s[10:11], exec, s[10:11] ; 898A0A7E s_cbranch_execz BB0_6 ; BF880000 v_mov_b32_e32 v3, s1 ; 7E060201 v_sub_f32_e32 v4, s0, v3 ; 08080600 v_subrev_f32_e32 v4, s3, v4 ; 0A080803 v_rcp_f32_e32 v4, v4 ; 7E085504 v_subrev_f32_e32 v5, s1, v1 ; 0A0A0201 v_rcp_f32_e32 v6, s2 ; 7E0C5402 v_mul_f32_e32 v4, v4, v5 ; 10080B04 v_sub_f32_e32 v5, s2, v3 ; 080A0602 v_subrev_f32_e32 v5, s3, v5 ; 0A0A0A03 v_mad_f32 v3, v5, v4, v3 ; D2820003 040E0905 v_mul_f32_e32 v3, v6, v3 ; 10060706 s_or_saveexec_b64 s[10:11], s[10:11] ; BE8A250A s_xor_b64 exec, exec, s[10:11] ; 89FE0A7E v_rcp_f32_e32 v3, s2 ; 7E065402 v_sub_f32_e32 v4, s0, v1 ; 08080200 v_mad_f32 v3, -v4, v3, 1.0 ; D2820003 23CA0704 s_or_b64 exec, exec, s[10:11] ; 88FE0A7E s_or_saveexec_b64 s[8:9], s[8:9] ; BE882508 s_buffer_load_dword s16, s[12:15], 0x11 ; C2080D11 s_buffer_load_dword s25, s[12:15], 0x17 ; C20C8D17 s_waitcnt lgkmcnt(0) ; BF8C007F s_xor_b64 exec, exec, s[8:9] ; 89FE087E v_rcp_f32_e32 v3, s2 ; 7E065402 v_mul_f32_e32 v3, v3, v1 ; 10060303 s_or_b64 exec, exec, s[8:9] ; 88FE087E s_buffer_load_dword s30, s[12:15], 0x13 ; C20F0D13 s_buffer_load_dword s31, s[12:15], 0x33 ; C20F8D33 v_mul_f32_e32 v4, s16, v0 ; 10080010 v_cmp_ngt_f32_e32 vcc, s25, v4 ; 7C160819 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[32:33], vcc ; BEA0246A s_xor_b64 s[32:33], exec, s[32:33] ; 89A0207E s_cbranch_execz BB0_9 ; BF880000 s_buffer_load_dword s1, s[12:15], 0x15 ; C2008D15 v_sub_f32_e32 v1, s16, v4 ; 08020810 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_ngt_f32_e32 vcc, s1, v1 ; 7C160201 s_and_saveexec_b64 s[2:3], vcc ; BE82246A s_xor_b64 s[2:3], exec, s[2:3] ; 8982027E s_cbranch_execz BB0_14 ; BF880000 v_mov_b32_e32 v1, s25 ; 7E020219 v_sub_f32_e32 v5, s16, v1 ; 080A0210 v_subrev_f32_e32 v5, s1, v5 ; 0A0A0A01 v_rcp_f32_e32 v5, v5 ; 7E0A5505 v_subrev_f32_e32 v6, s25, v4 ; 0A0C0819 v_rcp_f32_e32 v7, s30 ; 7E0E541E v_mul_f32_e32 v5, v5, v6 ; 100A0D05 v_sub_f32_e32 v6, s30, v1 ; 080C021E v_subrev_f32_e32 v6, s1, v6 ; 0A0C0C01 v_mad_f32 v1, v6, v5, v1 ; D2820001 04060B06 v_mul_f32_e32 v5, v7, v1 ; 100A0307 s_or_saveexec_b64 s[2:3], s[2:3] ; BE822502 s_xor_b64 exec, exec, s[2:3] ; 89FE027E v_rcp_f32_e32 v1, s30 ; 7E02541E v_sub_f32_e32 v5, s16, v4 ; 080A0810 v_mad_f32 v5, -v5, v1, 1.0 ; D2820005 23CA0305 s_or_b64 exec, exec, s[2:3] ; 88FE027E s_or_saveexec_b64 s[32:33], s[32:33] ; BEA02520 s_buffer_load_dword s1, s[12:15], 0x0 ; C2008D00 s_buffer_load_dword s34, s[12:15], 0x18 ; C2110D18 s_buffer_load_dword s8, s[12:15], 0x1c ; C2040D1C s_buffer_load_dword s9, s[12:15], 0x1d ; C2048D1D s_buffer_load_dword s10, s[12:15], 0x1e ; C2050D1E s_buffer_load_dword s11, s[12:15], 0x1f ; C2058D1F s_buffer_load_dword s17, s[12:15], 0x20 ; C2088D20 s_buffer_load_dword s18, s[12:15], 0x21 ; C2090D21 s_buffer_load_dword s19, s[12:15], 0x22 ; C2098D22 s_buffer_load_dword s20, s[12:15], 0x23 ; C20A0D23 s_buffer_load_dword s21, s[12:15], 0x24 ; C20A8D24 s_buffer_load_dword s22, s[12:15], 0x25 ; C20B0D25 s_buffer_load_dword s23, s[12:15], 0x26 ; C20B8D26 s_buffer_load_dword s24, s[12:15], 0x27 ; C20C0D27 s_buffer_load_dword s26, s[12:15], 0x28 ; C20D0D28 s_buffer_load_dword s27, s[12:15], 0x29 ; C20D8D29 s_buffer_load_dword s28, s[12:15], 0x2a ; C20E0D2A s_buffer_load_dword s29, s[12:15], 0x2b ; C20E8D2B s_buffer_load_dword s3, s[12:15], 0x2c ; C2018D2C s_buffer_load_dword s35, s[12:15], 0x30 ; C2118D30 s_buffer_load_dword s2, s[12:15], 0x31 ; C2010D31 s_buffer_load_dword s12, s[12:15], 0x32 ; C2060D32 v_mov_b32_e32 v1, s31 ; 7E02021F s_waitcnt lgkmcnt(0) ; BF8C007F s_xor_b64 exec, exec, s[32:33] ; 89FE207E v_rcp_f32_e32 v5, s30 ; 7E0A541E v_mul_f32_e32 v5, v5, v4 ; 100A0905 s_or_b64 exec, exec, s[32:33] ; 88FE207E v_mov_b32_e32 v4, v5 ; 7E080305 v_add_f32_e32 v5, s34, v2 ; 060A0422 v_mul_f32_e32 v5, s0, v5 ; 100A0A00 v_add_f32_e32 v6, s34, v0 ; 060C0022 v_mul_f32_e32 v6, s16, v6 ; 100C0C10 v_mov_b32_e32 v7, 0x3c800000 ; 7E0E02FF 3C800000 v_mul_f32_e32 v8, v7, v5 ; 10100B07 v_mul_f32_e32 v9, v7, v6 ; 10120D07 s_load_dwordx4 s[36:39], s[4:5], 0x4 ; C0920504 s_load_dwordx4 s[40:43], s[4:5], 0x8 ; C0940508 s_load_dwordx8 s[44:51], s[6:7], 0x8 ; C0D60708 s_load_dwordx8 s[52:59], s[6:7], 0x10 ; C0DA0710 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v3, 1, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[44:51], s[36:39] ; F0800100 012B0303 image_sample v[4:5], 3, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[52:59], s[40:43] ; F0800300 014D0408 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v4, s35, v4, v2 ; D2820004 040A0823 v_mad_f32 v5, s35, v5, v0 ; D2820005 04020A23 v_mov_b32_e32 v6, 0xbd4ccccd ; 7E0C02FF BD4CCCCD v_mac_f32_e32 v4, s34, v6 ; 3E080C22 v_mov_b32_e32 v6, 0x3da3d70a ; 7E0C02FF 3DA3D70A v_mac_f32_e32 v5, s34, v6 ; 3E0A0C22 v_mul_f32_e32 v6, 0.5, v4 ; 100C08F0 v_floor_f32_e32 v6, v6 ; 7E0C4906 s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500 s_load_dwordx8 s[36:43], s[6:7], 0x0 ; C0D20700 v_mad_f32 v4, 0.5, v4, -v6 ; D2820004 841A08F0 v_mul_f32_e32 v6, 0.5, v5 ; 100C0AF0 v_floor_f32_e32 v6, v6 ; 7E0C4906 v_mad_f32 v5, 0.5, v5, -v6 ; D2820005 841A0AF0 v_mad_f32 v4, 2.0, v4, -1.0 ; D2820004 03CE08F4 v_mad_f32 v5, 2.0, v5, -1.0 ; D2820005 03CE0AF4 v_mov_b32_e32 v6, 0x7fffffff ; 7E0C02FF 7FFFFFFF v_and_b32_e32 v7, v4, v6 ; 360E0D04 v_and_b32_e32 v8, v5, v6 ; 36100D05 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[6:9], 15, 0, 0, 0, 0, 0, 0, 0, v[7:8], s[36:43], s[32:35] ; F0800F00 01090607 v_sub_f32_e64 v10, 1.0, |v4| ; D208020A 000208F2 v_mul_f32_e32 v11, s8, v10 ; 10161408 v_mad_f32 v11, |v4|, s17, v11 ; D282010B 042C2304 v_mul_f32_e32 v12, s9, v10 ; 10181409 v_mad_f32 v12, |v4|, s18, v12 ; D282010C 04302504 v_mul_f32_e32 v13, s10, v10 ; 101A140A v_mad_f32 v13, |v4|, s19, v13 ; D282010D 04342704 v_mul_f32_e32 v14, s11, v10 ; 101C140B v_mad_f32 v14, |v4|, s20, v14 ; D282010E 04382904 v_mul_f32_e32 v15, s21, v10 ; 101E1415 v_mad_f32 v15, |v4|, s26, v15 ; D282010F 043C3504 v_mul_f32_e32 v16, s22, v10 ; 10201416 v_mad_f32 v16, |v4|, s27, v16 ; D2820110 04403704 v_mul_f32_e32 v17, s23, v10 ; 10221417 v_mad_f32 v17, |v4|, s28, v17 ; D2820111 04443904 v_mul_f32_e32 v10, s24, v10 ; 10141418 v_mad_f32 v4, |v4|, s29, v10 ; D2820104 04283B04 v_sub_f32_e64 v10, 1.0, |v5| ; D208020A 00020AF2 v_mul_f32_e32 v11, v11, v10 ; 1016150B v_mad_f32 v11, |v5|, v15, v11 ; D282010B 042E1F05 v_mul_f32_e32 v12, v12, v10 ; 1018150C v_mad_f32 v12, |v5|, v16, v12 ; D282010C 04322105 v_mul_f32_e32 v13, v13, v10 ; 101A150D v_mad_f32 v13, |v5|, v17, v13 ; D282010D 04362305 v_mul_f32_e32 v10, v14, v10 ; 1014150E v_mad_f32 v4, |v5|, v4, v10 ; D2820104 042A0905 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v5, v6, v11 ; 100A1706 v_mul_f32_e32 v6, v7, v12 ; 100C1907 v_mul_f32_e32 v7, v8, v13 ; 100E1B08 v_mul_f32_e32 v4, v9, v4 ; 10080909 v_mul_f32_e32 v8, s3, v3 ; 10100603 v_add_f32_e32 v0, v0, v2 ; 06000500 v_mac_f32_e32 v0, s1, v1 ; 3E000201 v_mul_f32_e32 v0, s12, v0 ; 1000000C v_mul_f32_e32 v0, 0x3e22f983, v0 ; 100000FF 3E22F983 v_fract_f32_e32 v0, v0 ; 7E004100 v_sin_f32_e32 v0, v0 ; 7E006B00 v_add_f32_e32 v0, 2.0, v0 ; 060000F4 v_subrev_f32_e32 v1, s2, v3 ; 0A020602 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_mul_f32_e32 v2, 4.0, v1 ; 100402F6 v_mad_f32 v1, 4.0, -v1, 1.0 ; D2820001 43CA02F6 v_mac_f32_e32 v1, v0, v2 ; 3E020500 v_mul_f32_e32 v0, v1, v8 ; 10001101 v_mul_f32_e32 v1, v0, v5 ; 10020B00 v_mul_f32_e32 v2, v0, v6 ; 10040D00 v_mul_f32_e32 v3, v0, v7 ; 10060F00 v_mul_f32_e32 v0, v0, v4 ; 10000900 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_cvt_pkrtz_f16_f32_e32 v0, v3, v0 ; 5E000103 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 64 VGPRS: 20 Code Size: 940 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL CONST[0..5] DCL TEMP[0..2], LOCAL 0: MUL TEMP[0], CONST[2], IN[0].xxxx 1: MAD TEMP[0], CONST[3], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[4], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[5], IN[0].wwww, TEMP[0] 4: MAD TEMP[1].xy, IN[1].xyyy, CONST[1].xyyy, CONST[1].zwww 5: MAD TEMP[2].x, TEMP[0].zzzz, CONST[0].zzzz, CONST[0].wwww 6: MOV TEMP[1].z, TEMP[2].xxxx 7: MOV OUT[1], TEMP[1] 8: MOV OUT[0], TEMP[0] 9: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %35 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0 %37 = add i32 %5, %7 %38 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %36, i32 0, i32 %37) %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = extractelement <4 x float> %38, i32 2 %42 = extractelement <4 x float> %38, i32 3 %43 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0 %45 = add i32 %5, %7 %46 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %45) %47 = extractelement <4 x float> %46, i32 0 %48 = extractelement <4 x float> %46, i32 1 %49 = fmul float %19, %39 %50 = fmul float %20, %39 %51 = fmul float %21, %39 %52 = fmul float %22, %39 %53 = fmul float %23, %40 %54 = fadd float %53, %49 %55 = fmul float %24, %40 %56 = fadd float %55, %50 %57 = fmul float %25, %40 %58 = fadd float %57, %51 %59 = fmul float %26, %40 %60 = fadd float %59, %52 %61 = fmul float %27, %41 %62 = fadd float %61, %54 %63 = fmul float %28, %41 %64 = fadd float %63, %56 %65 = fmul float %29, %41 %66 = fadd float %65, %58 %67 = fmul float %30, %41 %68 = fadd float %67, %60 %69 = fmul float %31, %42 %70 = fadd float %69, %62 %71 = fmul float %32, %42 %72 = fadd float %71, %64 %73 = fmul float %33, %42 %74 = fadd float %73, %66 %75 = fmul float %34, %42 %76 = fadd float %75, %68 %77 = fmul float %47, %15 %78 = fadd float %77, %17 %79 = fmul float %48, %16 %80 = fadd float %79, %18 %81 = fmul float %74, %13 %82 = fadd float %81, %14 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %78, float %80, float %82, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %70, float %72, float %74, float %76) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[0:3], 0x7 ; C2060107 s_buffer_load_dword s13, s[0:3], 0x8 ; C2068108 buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[8:11], 0 idxen ; E00C2000 80020500 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_buffer_load_dword s5, s[0:3], 0xa ; C202810A s_buffer_load_dword s6, s[0:3], 0xb ; C203010B s_buffer_load_dword s7, s[0:3], 0xc ; C203810C s_buffer_load_dword s8, s[0:3], 0xd ; C204010D s_buffer_load_dword s9, s[0:3], 0xe ; C204810E s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s12 ; 7E00020C s_buffer_load_dword s10, s[0:3], 0xf ; C205010F s_buffer_load_dword s11, s[0:3], 0x10 ; C2058110 s_buffer_load_dword s12, s[0:3], 0x6 ; C2060106 s_buffer_load_dword s14, s[0:3], 0x4 ; C2070104 s_buffer_load_dword s15, s[0:3], 0x5 ; C2078105 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v7, s12 ; 7E0E020C v_mac_f32_e32 v7, s14, v5 ; 3E0E0A0E v_mac_f32_e32 v0, s15, v6 ; 3E000C0F s_buffer_load_dword s12, s[0:3], 0x2 ; C2060102 s_buffer_load_dword s14, s[0:3], 0x3 ; C2070103 s_buffer_load_dword s15, s[0:3], 0x11 ; C2078111 s_buffer_load_dword s16, s[0:3], 0x12 ; C2080112 s_buffer_load_dword s17, s[0:3], 0x13 ; C2088113 s_buffer_load_dword s18, s[0:3], 0x14 ; C2090114 s_buffer_load_dword s19, s[0:3], 0x15 ; C2098115 s_buffer_load_dword s20, s[0:3], 0x16 ; C20A0116 s_buffer_load_dword s0, s[0:3], 0x17 ; C2000117 v_mul_f32_e32 v5, s13, v1 ; 100A020D v_mac_f32_e32 v5, s7, v2 ; 3E0A0407 v_mul_f32_e32 v6, s4, v1 ; 100C0204 v_mac_f32_e32 v6, s8, v2 ; 3E0C0408 v_mul_f32_e32 v8, s5, v1 ; 10100205 v_mac_f32_e32 v8, s9, v2 ; 3E100409 v_mul_f32_e32 v1, s6, v1 ; 10020206 v_mac_f32_e32 v1, s10, v2 ; 3E02040A v_mac_f32_e32 v5, s11, v3 ; 3E0A060B s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v6, s15, v3 ; 3E0C060F v_mac_f32_e32 v8, s16, v3 ; 3E100610 v_mac_f32_e32 v1, s17, v3 ; 3E020611 v_mac_f32_e32 v5, s18, v4 ; 3E0A0812 v_mac_f32_e32 v6, s19, v4 ; 3E0C0813 v_mac_f32_e32 v8, s20, v4 ; 3E100814 v_mac_f32_e32 v1, s0, v4 ; 3E020800 v_mov_b32_e32 v2, s14 ; 7E04020E v_mac_f32_e32 v2, s12, v8 ; 3E04100C v_mov_b32_e32 v3, 0 ; 7E060280 exp 15, 32, 0, 0, 0, v7, v0, v2, v3 ; F800020F 03020007 exp 15, 12, 0, 1, 0, v5, v6, v8, v1 ; F80008CF 01080605 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 248 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[0] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0].xyz, TEMP[0], SAMP[0], 2D 2: MOV_SAT TEMP[1].x, IN[0].zzzz 3: LRP TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[0].xyzz 4: MOV TEMP[0].w, IMM[0].xxxx 5: MOV OUT[0], TEMP[0] 6: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %28 = load <32 x i8>, <32 x i8> addrspace(2)* %27, align 32, !tbaa !0 %29 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %32 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %34 = bitcast float %31 to i32 %35 = bitcast float %32 to i32 %36 = insertelement <2 x i32> undef, i32 %34, i32 0 %37 = insertelement <2 x i32> %36, i32 %35, i32 1 %38 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %37, <32 x i8> %28, <16 x i8> %30, i32 2) %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = extractelement <4 x float> %38, i32 2 %42 = call float @llvm.AMDIL.clamp.(float %33, float 0.000000e+00, float 1.000000e+00) %43 = call float @llvm.AMDGPU.lrp(float %42, float %39, float %24) %44 = call float @llvm.AMDGPU.lrp(float %42, float %40, float %25) %45 = call float @llvm.AMDGPU.lrp(float %42, float %41, float %26) %46 = call i32 @llvm.SI.packf16(float %43, float %44) %47 = bitcast i32 %46 to float %48 = call i32 @llvm.SI.packf16(float %45, float 1.000000e+00) %49 = bitcast i32 %48 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %47, float %49, float %47, float %49) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[4:7], s[4:5], 0x0 ; C0820500 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100 s_buffer_load_dword s9, s[0:3], 0x1 ; C2048101 s_buffer_load_dword s0, s[0:3], 0x2 ; C2000102 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v0, v0, 2, 0, [m0] ; C8000200 v_interp_p2_f32 v0, [v0], v1, 2, 0, [m0] ; C8010201 image_sample v[1:3], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[4:7] ; F0800700 00230102 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_sub_f32_e32 v4, 1.0, v0 ; 080800F2 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s8, v4 ; 100A0808 v_mul_f32_e32 v6, s9, v4 ; 100C0809 v_mul_f32_e32 v4, s0, v4 ; 10080800 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v5, v1, v0 ; 3E0A0101 v_mac_f32_e32 v6, v2, v0 ; 3E0C0102 v_mac_f32_e32 v4, v3, v0 ; 3E080103 v_cvt_pkrtz_f16_f32_e32 v0, v5, v6 ; 5E000D05 v_cvt_pkrtz_f16_f32_e64 v1, v4, 1.0 ; D25E0001 0001E504 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 8 Code Size: 136 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL CONST[0..3] DCL TEMP[0..1], LOCAL 0: MUL TEMP[0], CONST[0], IN[0].xxxx 1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0] 4: MOV TEMP[1].xy, IN[1].xyxx 5: MOV OUT[1], TEMP[1] 6: MOV OUT[0], TEMP[0] 7: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = fmul float %13, %33 %44 = fmul float %14, %33 %45 = fmul float %15, %33 %46 = fmul float %16, %33 %47 = fmul float %17, %34 %48 = fadd float %47, %43 %49 = fmul float %18, %34 %50 = fadd float %49, %44 %51 = fmul float %19, %34 %52 = fadd float %51, %45 %53 = fmul float %20, %34 %54 = fadd float %53, %46 %55 = fmul float %21, %35 %56 = fadd float %55, %48 %57 = fmul float %22, %35 %58 = fadd float %57, %50 %59 = fmul float %23, %35 %60 = fadd float %59, %52 %61 = fmul float %24, %35 %62 = fadd float %61, %54 %63 = fmul float %25, %36 %64 = fadd float %63, %56 %65 = fmul float %26, %36 %66 = fadd float %65, %58 %67 = fmul float %27, %36 %68 = fadd float %67, %60 %69 = fmul float %28, %36 %70 = fadd float %69, %62 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %64, float %66, float %68, float %70) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_mov_b32_e32 v1, 0 ; 7E020280 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[0:3], 0x0 ; C2060100 s_buffer_load_dword s13, s[0:3], 0x1 ; C2068101 buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[8:11], 0 idxen ; E00C2000 80020600 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_buffer_load_dword s5, s[0:3], 0x3 ; C2028103 s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104 s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105 s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106 s_buffer_load_dword s9, s[0:3], 0x7 ; C2048107 s_buffer_load_dword s10, s[0:3], 0x8 ; C2050108 s_buffer_load_dword s11, s[0:3], 0x9 ; C2058109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 v_mul_f32_e32 v0, s12, v2 ; 1000040C s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v0, s6, v3 ; 3E000606 v_mul_f32_e32 v8, s13, v2 ; 1010040D v_mac_f32_e32 v8, s7, v3 ; 3E100607 v_mul_f32_e32 v9, s4, v2 ; 10120404 v_mac_f32_e32 v9, s8, v3 ; 3E120608 v_mul_f32_e32 v2, s5, v2 ; 10040405 v_mac_f32_e32 v2, s9, v3 ; 3E040609 v_mac_f32_e32 v0, s10, v4 ; 3E00080A v_mac_f32_e32 v8, s11, v4 ; 3E10080B v_mac_f32_e32 v9, s14, v4 ; 3E12080E v_mac_f32_e32 v2, s15, v4 ; 3E04080F v_mac_f32_e32 v0, s16, v5 ; 3E000A10 v_mac_f32_e32 v8, s17, v5 ; 3E100A11 v_mac_f32_e32 v9, s18, v5 ; 3E120A12 v_mac_f32_e32 v2, s0, v5 ; 3E040A00 exp 15, 32, 0, 0, 0, v6, v7, v1, v1 ; F800020F 01010706 exp 15, 12, 0, 1, 0, v0, v8, v9, v2 ; F80008CF 02090800 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 196 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL CONST[0] DCL CONST[3..4] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 0.0000, 0.4000, 1.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0].xyz, TEMP[0], SAMP[0], 2D 2: MOV TEMP[1].xy, IN[0].xyyy 3: TEX TEMP[1].x, TEMP[1], SAMP[1], 2D 4: MAD TEMP[1].x, CONST[0].xxxx, TEMP[1].xxxx, CONST[0].yyyy 5: RCP TEMP[1].x, TEMP[1].xxxx 6: ADD TEMP[2].xy, CONST[4].xyyy, -IN[0].xyyy 7: DP2 TEMP[2].x, TEMP[2].xyyy, TEMP[2].xyyy 8: SQRT TEMP[2].x, TEMP[2].xxxx 9: ADD TEMP[2].x, CONST[4].wwww, -TEMP[2].xxxx 10: MOV_SAT TEMP[2].x, TEMP[2].xxxx 11: MOV TEMP[3], IMM[0].xxxx 12: FSLT TEMP[1].x, IMM[0].yyyy, TEMP[1].xxxx 13: UIF TEMP[1].xxxx :0 14: ADD TEMP[0].xyz, TEMP[0].xyzz, -CONST[3].xyzz 15: MAX TEMP[0].xyz, TEMP[0].xyzz, IMM[0].xxxx 16: DP3 TEMP[0].x, TEMP[0].xyzz, IMM[0].zzzz 17: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[2].xxxx 18: MOV TEMP[3], TEMP[0].xxxx 19: ENDIF 20: MOV OUT[0], TEMP[3] 21: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %29 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %30 = load <32 x i8>, <32 x i8> addrspace(2)* %29, align 32, !tbaa !0 %31 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %32 = load <16 x i8>, <16 x i8> addrspace(2)* %31, align 16, !tbaa !0 %33 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %34 = bitcast <8 x i32> addrspace(2)* %33 to <32 x i8> addrspace(2)* %35 = load <32 x i8>, <32 x i8> addrspace(2)* %34, align 32, !tbaa !0 %36 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %37 = bitcast <4 x i32> addrspace(2)* %36 to <16 x i8> addrspace(2)* %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %40 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %41 = bitcast float %39 to i32 %42 = bitcast float %40 to i32 %43 = insertelement <2 x i32> undef, i32 %41, i32 0 %44 = insertelement <2 x i32> %43, i32 %42, i32 1 %45 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %44, <32 x i8> %30, <16 x i8> %32, i32 2) %46 = bitcast float %39 to i32 %47 = bitcast float %40 to i32 %48 = insertelement <2 x i32> undef, i32 %46, i32 0 %49 = insertelement <2 x i32> %48, i32 %47, i32 1 %50 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %49, <32 x i8> %35, <16 x i8> %38, i32 2) %51 = extractelement <4 x float> %50, i32 0 %52 = fmul float %24, %51 %53 = fadd float %52, %25 %54 = fdiv float 1.000000e+00, %53 %55 = fsub float %26, %39 %56 = fsub float %27, %40 %57 = fmul float %55, %55 %58 = fmul float %56, %56 %59 = fadd float %57, %58 %60 = call float @llvm.sqrt.f32(float %59) %61 = fsub float %28, %60 %62 = call float @llvm.AMDIL.clamp.(float %61, float 0.000000e+00, float 1.000000e+00) %63 = fcmp ogt float %54, 0x3FD99999A0000000 br i1 %63, label %IF, label %ENDIF IF: ; preds = %main_body %64 = extractelement <4 x float> %45, i32 2 %65 = extractelement <4 x float> %45, i32 1 %66 = extractelement <4 x float> %45, i32 0 %67 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %68 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %69 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %70 = fsub float %66, %69 %71 = fsub float %65, %68 %72 = fsub float %64, %67 %73 = call float @llvm.maxnum.f32(float %70, float 0.000000e+00) %74 = call float @llvm.maxnum.f32(float %71, float 0.000000e+00) %75 = call float @llvm.maxnum.f32(float %72, float 0.000000e+00) %76 = fadd float %74, %73 %77 = fadd float %76, %75 %78 = fmul float %77, %62 br label %ENDIF ENDIF: ; preds = %main_body, %IF %temp12.0 = phi float [ %78, %IF ], [ 0.000000e+00, %main_body ] %79 = call i32 @llvm.SI.packf16(float %temp12.0, float %temp12.0) %80 = bitcast i32 %79 to float %81 = call i32 @llvm.SI.packf16(float %temp12.0, float %temp12.0) %82 = bitcast i32 %81 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %80, float %82, float %80, float %82) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_mov_b32_e32 v5, 0x3ecccccd ; 7E0A02FF 3ECCCCCD v_mov_b32_e32 v2, 0 ; 7E040280 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[8:11], s[4:5], 0x4 ; C0840504 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[0:3], 0x1 ; C2060101 s_load_dwordx8 s[16:23], s[6:7], 0x8 ; C0C80708 s_buffer_load_dword s13, s[0:3], 0x0 ; C2068100 v_interp_p1_f32 v3, v0, 0, 0, [m0] ; C80C0000 v_interp_p2_f32 v3, [v3], v1, 0, 0, [m0] ; C80D0001 v_interp_p1_f32 v4, v0, 1, 0, [m0] ; C8100100 v_interp_p2_f32 v4, [v4], v1, 1, 0, [m0] ; C8110101 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v0, 1, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[16:23], s[8:11] ; F0800100 00440003 v_mov_b32_e32 v1, s12 ; 7E02020C s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v1, s13, v0 ; 3E02000D v_rcp_f32_e32 v0, v1 ; 7E005501 v_cmp_lt_f32_e32 vcc, v5, v0 ; 7C020105 s_and_saveexec_b64 s[8:9], vcc ; BE88246A s_xor_b64 s[8:9], exec, s[8:9] ; 8988087E s_cbranch_execz BB0_2 ; BF880000 s_buffer_load_dword s10, s[0:3], 0x13 ; C2050113 s_buffer_load_dword s11, s[0:3], 0xc ; C205810C s_buffer_load_dword s12, s[0:3], 0xd ; C206010D s_buffer_load_dword s13, s[0:3], 0xe ; C206810E s_buffer_load_dword s14, s[0:3], 0x10 ; C2070110 s_buffer_load_dword s15, s[0:3], 0x11 ; C2078111 s_load_dwordx4 s[16:19], s[4:5], 0x0 ; C0880500 s_load_dwordx8 s[20:27], s[6:7], 0x0 ; C0CA0700 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v0, s14, v3 ; 0800060E v_sub_f32_e32 v1, s15, v4 ; 0802080F v_mul_f32_e32 v1, v1, v1 ; 10020301 image_sample v[2:4], 7, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[20:27], s[16:19] ; F0800700 00850203 v_mac_f32_e32 v1, v0, v0 ; 3E020100 v_sqrt_f32_e32 v0, v1 ; 7E006701 v_sub_f32_e32 v0, s10, v0 ; 0800000A s_waitcnt vmcnt(0) ; BF8C0770 v_subrev_f32_e32 v1, s11, v2 ; 0A02040B v_subrev_f32_e32 v2, s12, v3 ; 0A04060C v_subrev_f32_e32 v3, s13, v4 ; 0A06080D v_max_f32_e32 v1, 0, v1 ; 20020280 v_max_f32_e32 v2, 0, v2 ; 20040480 v_max_f32_e32 v3, 0, v3 ; 20060680 v_add_f32_e32 v1, v1, v2 ; 06020501 v_add_f32_e32 v1, v3, v1 ; 06020303 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_mul_f32_e32 v2, v0, v1 ; 10040300 s_or_b64 exec, exec, s[8:9] ; 88FE087E v_cvt_pkrtz_f16_f32_e32 v0, v2, v2 ; 5E000502 exp 15, 0, 1, 1, 1, v0, v0, v0, v0 ; F8001C0F 00000000 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 8 Code Size: 240 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL CONST[0..3] DCL TEMP[0], LOCAL 0: MUL TEMP[0], CONST[0], IN[0].xxxx 1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0] 4: MOV OUT[0], TEMP[0] 5: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = fmul float %13, %33 %38 = fmul float %14, %33 %39 = fmul float %15, %33 %40 = fmul float %16, %33 %41 = fmul float %17, %34 %42 = fadd float %41, %37 %43 = fmul float %18, %34 %44 = fadd float %43, %38 %45 = fmul float %19, %34 %46 = fadd float %45, %39 %47 = fmul float %20, %34 %48 = fadd float %47, %40 %49 = fmul float %21, %35 %50 = fadd float %49, %42 %51 = fmul float %22, %35 %52 = fadd float %51, %44 %53 = fmul float %23, %35 %54 = fadd float %53, %46 %55 = fmul float %24, %35 %56 = fadd float %55, %48 %57 = fmul float %25, %36 %58 = fadd float %57, %50 %59 = fmul float %26, %36 %60 = fadd float %59, %52 %61 = fmul float %27, %36 %62 = fadd float %61, %54 %63 = fmul float %28, %36 %64 = fadd float %63, %56 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %58, float %60, float %62, float %64) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v0 ; 10080004 v_mac_f32_e32 v4, s8, v1 ; 3E080208 v_mul_f32_e32 v5, s5, v0 ; 100A0005 v_mac_f32_e32 v5, s9, v1 ; 3E0A0209 v_mul_f32_e32 v6, s6, v0 ; 100C0006 v_mac_f32_e32 v6, s10, v1 ; 3E0C020A v_mul_f32_e32 v0, s7, v0 ; 10000007 v_mac_f32_e32 v0, s11, v1 ; 3E00020B v_mac_f32_e32 v4, s12, v2 ; 3E08040C v_mac_f32_e32 v5, s13, v2 ; 3E0A040D v_mac_f32_e32 v6, s14, v2 ; 3E0C040E v_mac_f32_e32 v0, s15, v2 ; 3E00040F v_mac_f32_e32 v4, s16, v3 ; 3E080610 v_mac_f32_e32 v5, s17, v3 ; 3E0A0611 v_mac_f32_e32 v6, s18, v3 ; 3E0C0612 v_mac_f32_e32 v0, s0, v3 ; 3E000600 exp 15, 12, 0, 1, 0, v4, v5, v6, v0 ; F80008CF 00060504 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 8 Code Size: 172 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL OUT[0], COLOR IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MOV OUT[0], IMM[0].xxxx 1: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00) %23 = bitcast i32 %22 to float %24 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00) %25 = bitcast i32 %24 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %23, float %25, float %23, float %25) ret void } ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: v_cvt_pkrtz_f16_f32_e64 v0, 0, 0 ; D25E0000 00010080 exp 15, 0, 1, 1, 1, v0, v0, v0, v0 ; F8001C0F 00000000 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 8 VGPRS: 4 Code Size: 20 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL CONST[0..5] DCL TEMP[0..2], LOCAL 0: MUL TEMP[0], CONST[2], IN[0].xxxx 1: MAD TEMP[0], CONST[3], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[4], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[5], IN[0].wwww, TEMP[0] 4: MOV TEMP[1].xy, IN[1].xyxx 5: ADD TEMP[2].xy, CONST[1].xyyy, -IN[1].xyyy 6: MUL TEMP[2].xy, TEMP[2].xyyy, CONST[0].xyyy 7: MOV TEMP[1].zw, TEMP[2].yyxy 8: MOV OUT[1], TEMP[1] 9: MOV OUT[0], TEMP[0] 10: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %33 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %34 = load <16 x i8>, <16 x i8> addrspace(2)* %33, align 16, !tbaa !0 %35 = add i32 %5, %7 %36 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %34, i32 0, i32 %35) %37 = extractelement <4 x float> %36, i32 0 %38 = extractelement <4 x float> %36, i32 1 %39 = extractelement <4 x float> %36, i32 2 %40 = extractelement <4 x float> %36, i32 3 %41 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0 %43 = add i32 %5, %7 %44 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %43) %45 = extractelement <4 x float> %44, i32 0 %46 = extractelement <4 x float> %44, i32 1 %47 = fmul float %17, %37 %48 = fmul float %18, %37 %49 = fmul float %19, %37 %50 = fmul float %20, %37 %51 = fmul float %21, %38 %52 = fadd float %51, %47 %53 = fmul float %22, %38 %54 = fadd float %53, %48 %55 = fmul float %23, %38 %56 = fadd float %55, %49 %57 = fmul float %24, %38 %58 = fadd float %57, %50 %59 = fmul float %25, %39 %60 = fadd float %59, %52 %61 = fmul float %26, %39 %62 = fadd float %61, %54 %63 = fmul float %27, %39 %64 = fadd float %63, %56 %65 = fmul float %28, %39 %66 = fadd float %65, %58 %67 = fmul float %29, %40 %68 = fadd float %67, %60 %69 = fmul float %30, %40 %70 = fadd float %69, %62 %71 = fmul float %31, %40 %72 = fadd float %71, %64 %73 = fmul float %32, %40 %74 = fadd float %73, %66 %75 = fsub float %15, %45 %76 = fsub float %16, %46 %77 = fmul float %75, %13 %78 = fmul float %76, %14 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %45, float %46, float %77, float %78) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %68, float %70, float %72, float %74) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_buffer_load_dword s6, s[0:3], 0x0 ; C2030100 s_buffer_load_dword s7, s[0:3], 0x1 ; C2038101 s_buffer_load_dword s8, s[0:3], 0x8 ; C2040108 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v0, s4, v5 ; 08000A04 v_sub_f32_e32 v7, s5, v6 ; 080E0C05 v_mul_f32_e32 v0, s6, v0 ; 10000006 v_mul_f32_e32 v7, s7, v7 ; 100E0E07 exp 15, 32, 0, 0, 0, v5, v6, v0, v7 ; F800020F 07000605 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_buffer_load_dword s5, s[0:3], 0xa ; C202810A s_buffer_load_dword s6, s[0:3], 0xb ; C203010B s_buffer_load_dword s7, s[0:3], 0xc ; C203810C s_buffer_load_dword s9, s[0:3], 0xd ; C204810D s_buffer_load_dword s10, s[0:3], 0xe ; C205010E s_buffer_load_dword s11, s[0:3], 0xf ; C205810F s_buffer_load_dword s12, s[0:3], 0x10 ; C2060110 s_buffer_load_dword s13, s[0:3], 0x11 ; C2068111 s_buffer_load_dword s14, s[0:3], 0x12 ; C2070112 s_buffer_load_dword s15, s[0:3], 0x13 ; C2078113 s_buffer_load_dword s16, s[0:3], 0x14 ; C2080114 s_buffer_load_dword s17, s[0:3], 0x15 ; C2088115 s_buffer_load_dword s18, s[0:3], 0x16 ; C2090116 s_buffer_load_dword s0, s[0:3], 0x17 ; C2000117 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s8, v1 ; 10000208 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v0, s7, v2 ; 3E000407 v_mul_f32_e32 v5, s4, v1 ; 100A0204 v_mac_f32_e32 v5, s9, v2 ; 3E0A0409 v_mul_f32_e32 v6, s5, v1 ; 100C0205 v_mac_f32_e32 v6, s10, v2 ; 3E0C040A v_mul_f32_e32 v1, s6, v1 ; 10020206 v_mac_f32_e32 v1, s11, v2 ; 3E02040B v_mac_f32_e32 v0, s12, v3 ; 3E00060C v_mac_f32_e32 v5, s13, v3 ; 3E0A060D v_mac_f32_e32 v6, s14, v3 ; 3E0C060E v_mac_f32_e32 v1, s15, v3 ; 3E02060F v_mac_f32_e32 v0, s16, v4 ; 3E000810 v_mac_f32_e32 v5, s17, v4 ; 3E0A0811 v_mac_f32_e32 v6, s18, v4 ; 3E0C0812 v_mac_f32_e32 v1, s0, v4 ; 3E020800 exp 15, 12, 0, 1, 0, v0, v5, v6, v1 ; F80008CF 01060500 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 232 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.1667, 0.0000, 0.0000, 0.0000} 0: ADD TEMP[0].xy, IN[0].xyyy, IN[0].zwww 1: MOV TEMP[1].xy, IN[0].xyyy 2: TEX TEMP[1], TEMP[1], SAMP[0], 2D 3: MOV TEMP[2].xy, TEMP[0].xyyy 4: TEX TEMP[2], TEMP[2], SAMP[0], 2D 5: ADD TEMP[1], TEMP[1], TEMP[2] 6: ADD TEMP[0].xy, TEMP[0].xyyy, IN[0].zwww 7: MOV TEMP[2].xy, TEMP[0].xyyy 8: TEX TEMP[2], TEMP[2], SAMP[0], 2D 9: ADD TEMP[1], TEMP[1], TEMP[2] 10: ADD TEMP[0].xy, TEMP[0].xyyy, IN[0].zwww 11: MOV TEMP[2].xy, TEMP[0].xyyy 12: TEX TEMP[2], TEMP[2], SAMP[0], 2D 13: ADD TEMP[1], TEMP[1], TEMP[2] 14: ADD TEMP[0].xy, TEMP[0].xyyy, IN[0].zwww 15: MOV TEMP[2].xy, TEMP[0].xyyy 16: TEX TEMP[2], TEMP[2], SAMP[0], 2D 17: ADD TEMP[1], TEMP[1], TEMP[2] 18: ADD TEMP[0].xy, TEMP[0].xyyy, IN[0].zwww 19: MOV TEMP[0].xy, TEMP[0].xyyy 20: TEX TEMP[0], TEMP[0], SAMP[0], 2D 21: ADD TEMP[1], TEMP[1], TEMP[0] 22: MUL TEMP[0], TEMP[1], IMM[0].xxxx 23: MOV OUT[0], TEMP[0] 24: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %23 = load <8 x i32>, <8 x i32> addrspace(2)* %22, align 32, !tbaa !0 %24 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %25 = load <4 x i32>, <4 x i32> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %30 = fadd float %26, %28 %31 = fadd float %27, %29 %32 = bitcast float %26 to i32 %33 = bitcast float %27 to i32 %34 = insertelement <2 x i32> undef, i32 %32, i32 0 %35 = insertelement <2 x i32> %34, i32 %33, i32 1 %36 = bitcast <8 x i32> %23 to <32 x i8> %37 = bitcast <4 x i32> %25 to <16 x i8> %38 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %35, <32 x i8> %36, <16 x i8> %37, i32 2) %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = extractelement <4 x float> %38, i32 2 %42 = extractelement <4 x float> %38, i32 3 %43 = bitcast float %30 to i32 %44 = bitcast float %31 to i32 %45 = insertelement <2 x i32> undef, i32 %43, i32 0 %46 = insertelement <2 x i32> %45, i32 %44, i32 1 %47 = bitcast <8 x i32> %23 to <32 x i8> %48 = bitcast <4 x i32> %25 to <16 x i8> %49 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %46, <32 x i8> %47, <16 x i8> %48, i32 2) %50 = extractelement <4 x float> %49, i32 0 %51 = extractelement <4 x float> %49, i32 1 %52 = extractelement <4 x float> %49, i32 2 %53 = extractelement <4 x float> %49, i32 3 %54 = fadd float %39, %50 %55 = fadd float %40, %51 %56 = fadd float %41, %52 %57 = fadd float %42, %53 %58 = fadd float %30, %28 %59 = fadd float %31, %29 %60 = bitcast float %58 to i32 %61 = bitcast float %59 to i32 %62 = insertelement <2 x i32> undef, i32 %60, i32 0 %63 = insertelement <2 x i32> %62, i32 %61, i32 1 %64 = bitcast <8 x i32> %23 to <32 x i8> %65 = bitcast <4 x i32> %25 to <16 x i8> %66 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %63, <32 x i8> %64, <16 x i8> %65, i32 2) %67 = extractelement <4 x float> %66, i32 0 %68 = extractelement <4 x float> %66, i32 1 %69 = extractelement <4 x float> %66, i32 2 %70 = extractelement <4 x float> %66, i32 3 %71 = fadd float %54, %67 %72 = fadd float %55, %68 %73 = fadd float %56, %69 %74 = fadd float %57, %70 %75 = fadd float %58, %28 %76 = fadd float %59, %29 %77 = bitcast float %75 to i32 %78 = bitcast float %76 to i32 %79 = insertelement <2 x i32> undef, i32 %77, i32 0 %80 = insertelement <2 x i32> %79, i32 %78, i32 1 %81 = bitcast <8 x i32> %23 to <32 x i8> %82 = bitcast <4 x i32> %25 to <16 x i8> %83 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %80, <32 x i8> %81, <16 x i8> %82, i32 2) %84 = extractelement <4 x float> %83, i32 0 %85 = extractelement <4 x float> %83, i32 1 %86 = extractelement <4 x float> %83, i32 2 %87 = extractelement <4 x float> %83, i32 3 %88 = fadd float %71, %84 %89 = fadd float %72, %85 %90 = fadd float %73, %86 %91 = fadd float %74, %87 %92 = fadd float %75, %28 %93 = fadd float %76, %29 %94 = bitcast float %92 to i32 %95 = bitcast float %93 to i32 %96 = insertelement <2 x i32> undef, i32 %94, i32 0 %97 = insertelement <2 x i32> %96, i32 %95, i32 1 %98 = bitcast <8 x i32> %23 to <32 x i8> %99 = bitcast <4 x i32> %25 to <16 x i8> %100 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %97, <32 x i8> %98, <16 x i8> %99, i32 2) %101 = extractelement <4 x float> %100, i32 0 %102 = extractelement <4 x float> %100, i32 1 %103 = extractelement <4 x float> %100, i32 2 %104 = extractelement <4 x float> %100, i32 3 %105 = fadd float %88, %101 %106 = fadd float %89, %102 %107 = fadd float %90, %103 %108 = fadd float %91, %104 %109 = fadd float %92, %28 %110 = fadd float %93, %29 %111 = bitcast float %109 to i32 %112 = bitcast float %110 to i32 %113 = insertelement <2 x i32> undef, i32 %111, i32 0 %114 = insertelement <2 x i32> %113, i32 %112, i32 1 %115 = bitcast <8 x i32> %23 to <32 x i8> %116 = bitcast <4 x i32> %25 to <16 x i8> %117 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %114, <32 x i8> %115, <16 x i8> %116, i32 2) %118 = extractelement <4 x float> %117, i32 0 %119 = extractelement <4 x float> %117, i32 1 %120 = extractelement <4 x float> %117, i32 2 %121 = extractelement <4 x float> %117, i32 3 %122 = fadd float %105, %118 %123 = fadd float %106, %119 %124 = fadd float %107, %120 %125 = fadd float %108, %121 %126 = fmul float %122, 0x3FC5555560000000 %127 = fmul float %123, 0x3FC5555560000000 %128 = fmul float %124, 0x3FC5555560000000 %129 = fmul float %125, 0x3FC5555560000000 %130 = call i32 @llvm.SI.packf16(float %126, float %127) %131 = bitcast i32 %130 to float %132 = call i32 @llvm.SI.packf16(float %128, float %129) %133 = bitcast i32 %132 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %131, float %133, float %131, float %133) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v0, v0, 3, 0, [m0] ; C8000300 v_interp_p2_f32 v0, [v0], v1, 3, 0, [m0] ; C8010301 v_add_f32_e32 v5, v4, v2 ; 060A0504 v_add_f32_e32 v6, v0, v3 ; 060C0700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[7:10], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[8:15], s[0:3] ; F0800F00 00020702 image_sample v[11:14], 15, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[8:15], s[0:3] ; F0800F00 00020B05 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v1, v11, v7 ; 06020F0B v_add_f32_e32 v2, v12, v8 ; 0604110C v_add_f32_e32 v3, v13, v9 ; 0606130D v_add_f32_e32 v7, v14, v10 ; 060E150E v_add_f32_e32 v8, v4, v5 ; 06100B04 v_add_f32_e32 v9, v0, v6 ; 06120D00 v_add_f32_e32 v5, v4, v8 ; 060A1104 v_add_f32_e32 v6, v0, v9 ; 060C1300 v_add_f32_e32 v10, v4, v5 ; 06140B04 v_add_f32_e32 v11, v0, v6 ; 06160D00 v_add_f32_e32 v12, v4, v10 ; 06181504 v_add_f32_e32 v13, v0, v11 ; 061A1700 image_sample v[14:17], 15, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[8:15], s[0:3] ; F0800F00 00020E08 image_sample v[18:21], 15, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[8:15], s[0:3] ; F0800F00 00021205 image_sample v[8:11], 15, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[8:15], s[0:3] ; F0800F00 0002080A image_sample v[22:25], 15, 0, 0, 0, 0, 0, 0, 0, v[12:13], s[8:15], s[0:3] ; F0800F00 0002160C s_waitcnt vmcnt(3) ; BF8C0773 v_add_f32_e32 v0, v14, v1 ; 0600030E v_add_f32_e32 v1, v15, v2 ; 0602050F v_add_f32_e32 v2, v16, v3 ; 06040710 v_add_f32_e32 v3, v17, v7 ; 06060F11 s_waitcnt vmcnt(2) ; BF8C0772 v_add_f32_e32 v0, v18, v0 ; 06000112 v_add_f32_e32 v1, v19, v1 ; 06020313 v_add_f32_e32 v2, v20, v2 ; 06040514 v_add_f32_e32 v3, v21, v3 ; 06060715 s_waitcnt vmcnt(1) ; BF8C0771 v_add_f32_e32 v0, v8, v0 ; 06000108 v_add_f32_e32 v1, v9, v1 ; 06020309 v_add_f32_e32 v2, v10, v2 ; 0604050A v_add_f32_e32 v3, v11, v3 ; 0606070B s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v0, v22, v0 ; 06000116 v_add_f32_e32 v1, v23, v1 ; 06020317 v_add_f32_e32 v2, v24, v2 ; 06040518 v_add_f32_e32 v3, v25, v3 ; 06060719 v_mov_b32_e32 v4, 0x3e2aaaab ; 7E0802FF 3E2AAAAB v_mul_f32_e32 v0, v4, v0 ; 10000104 v_mul_f32_e32 v1, v4, v1 ; 10020304 v_mul_f32_e32 v2, v4, v2 ; 10040504 v_mul_f32_e32 v3, v4, v3 ; 10060704 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 28 Code Size: 284 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL CONST[0..3] DCL TEMP[0..1], LOCAL 0: MUL TEMP[0], CONST[0], IN[0].xxxx 1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0] 4: MOV TEMP[1].xy, IN[1].xyxx 5: MOV OUT[1], TEMP[1] 6: MOV OUT[0], TEMP[0] 7: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = fmul float %13, %33 %44 = fmul float %14, %33 %45 = fmul float %15, %33 %46 = fmul float %16, %33 %47 = fmul float %17, %34 %48 = fadd float %47, %43 %49 = fmul float %18, %34 %50 = fadd float %49, %44 %51 = fmul float %19, %34 %52 = fadd float %51, %45 %53 = fmul float %20, %34 %54 = fadd float %53, %46 %55 = fmul float %21, %35 %56 = fadd float %55, %48 %57 = fmul float %22, %35 %58 = fadd float %57, %50 %59 = fmul float %23, %35 %60 = fadd float %59, %52 %61 = fmul float %24, %35 %62 = fadd float %61, %54 %63 = fmul float %25, %36 %64 = fadd float %63, %56 %65 = fmul float %26, %36 %66 = fadd float %65, %58 %67 = fmul float %27, %36 %68 = fadd float %67, %60 %69 = fmul float %28, %36 %70 = fadd float %69, %62 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %64, float %66, float %68, float %70) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_mov_b32_e32 v1, 0 ; 7E020280 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[0:3], 0x0 ; C2060100 s_buffer_load_dword s13, s[0:3], 0x1 ; C2068101 buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[8:11], 0 idxen ; E00C2000 80020600 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_buffer_load_dword s5, s[0:3], 0x3 ; C2028103 s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104 s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105 s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106 s_buffer_load_dword s9, s[0:3], 0x7 ; C2048107 s_buffer_load_dword s10, s[0:3], 0x8 ; C2050108 s_buffer_load_dword s11, s[0:3], 0x9 ; C2058109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 v_mul_f32_e32 v0, s12, v2 ; 1000040C s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v0, s6, v3 ; 3E000606 v_mul_f32_e32 v8, s13, v2 ; 1010040D v_mac_f32_e32 v8, s7, v3 ; 3E100607 v_mul_f32_e32 v9, s4, v2 ; 10120404 v_mac_f32_e32 v9, s8, v3 ; 3E120608 v_mul_f32_e32 v2, s5, v2 ; 10040405 v_mac_f32_e32 v2, s9, v3 ; 3E040609 v_mac_f32_e32 v0, s10, v4 ; 3E00080A v_mac_f32_e32 v8, s11, v4 ; 3E10080B v_mac_f32_e32 v9, s14, v4 ; 3E12080E v_mac_f32_e32 v2, s15, v4 ; 3E04080F v_mac_f32_e32 v0, s16, v5 ; 3E000A10 v_mac_f32_e32 v8, s17, v5 ; 3E100A11 v_mac_f32_e32 v9, s18, v5 ; 3E120A12 v_mac_f32_e32 v2, s0, v5 ; 3E040A00 exp 15, 32, 0, 0, 0, v6, v7, v1, v1 ; F800020F 01010706 exp 15, 12, 0, 1, 0, v0, v8, v9, v2 ; F80008CF 02090800 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 196 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL CONST[2] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: ADD TEMP[0], IMM[0].xxxx, -TEMP[0] 3: MOV TEMP[1].xy, IN[0].xyyy 4: TEX TEMP[1], TEMP[1], SAMP[1], 2D 5: MUL TEMP[1], TEMP[1], CONST[2] 6: MOV_SAT TEMP[1], TEMP[1] 7: ADD TEMP[1], IMM[0].xxxx, -TEMP[1] 8: MUL TEMP[0], TEMP[0], TEMP[1] 9: ADD TEMP[0], IMM[0].xxxx, -TEMP[0] 10: MOV OUT[0], TEMP[0] 11: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) %28 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %29 = load <32 x i8>, <32 x i8> addrspace(2)* %28, align 32, !tbaa !0 %30 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !tbaa !0 %32 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %33 = bitcast <8 x i32> addrspace(2)* %32 to <32 x i8> addrspace(2)* %34 = load <32 x i8>, <32 x i8> addrspace(2)* %33, align 32, !tbaa !0 %35 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %36 = bitcast <4 x i32> addrspace(2)* %35 to <16 x i8> addrspace(2)* %37 = load <16 x i8>, <16 x i8> addrspace(2)* %36, align 16, !tbaa !0 %38 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %39 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %40 = bitcast float %38 to i32 %41 = bitcast float %39 to i32 %42 = insertelement <2 x i32> undef, i32 %40, i32 0 %43 = insertelement <2 x i32> %42, i32 %41, i32 1 %44 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %43, <32 x i8> %29, <16 x i8> %31, i32 2) %45 = extractelement <4 x float> %44, i32 0 %46 = extractelement <4 x float> %44, i32 1 %47 = extractelement <4 x float> %44, i32 2 %48 = extractelement <4 x float> %44, i32 3 %49 = fsub float 1.000000e+00, %45 %50 = fsub float 1.000000e+00, %46 %51 = fsub float 1.000000e+00, %47 %52 = fsub float 1.000000e+00, %48 %53 = bitcast float %38 to i32 %54 = bitcast float %39 to i32 %55 = insertelement <2 x i32> undef, i32 %53, i32 0 %56 = insertelement <2 x i32> %55, i32 %54, i32 1 %57 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %56, <32 x i8> %34, <16 x i8> %37, i32 2) %58 = extractelement <4 x float> %57, i32 0 %59 = extractelement <4 x float> %57, i32 1 %60 = extractelement <4 x float> %57, i32 2 %61 = extractelement <4 x float> %57, i32 3 %62 = fmul float %58, %24 %63 = fmul float %59, %25 %64 = fmul float %60, %26 %65 = fmul float %61, %27 %66 = call float @llvm.AMDIL.clamp.(float %62, float 0.000000e+00, float 1.000000e+00) %67 = call float @llvm.AMDIL.clamp.(float %63, float 0.000000e+00, float 1.000000e+00) %68 = call float @llvm.AMDIL.clamp.(float %64, float 0.000000e+00, float 1.000000e+00) %69 = call float @llvm.AMDIL.clamp.(float %65, float 0.000000e+00, float 1.000000e+00) %70 = fsub float 1.000000e+00, %66 %71 = fsub float 1.000000e+00, %67 %72 = fsub float 1.000000e+00, %68 %73 = fsub float 1.000000e+00, %69 %74 = fmul float %49, %70 %75 = fmul float %50, %71 %76 = fmul float %51, %72 %77 = fmul float %52, %73 %78 = fsub float 1.000000e+00, %74 %79 = fsub float 1.000000e+00, %75 %80 = fsub float 1.000000e+00, %76 %81 = fsub float 1.000000e+00, %77 %82 = call i32 @llvm.SI.packf16(float %78, float %79) %83 = bitcast i32 %82 to float %84 = call i32 @llvm.SI.packf16(float %80, float %81) %85 = bitcast i32 %84 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %83, float %85, float %83, float %85) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500 s_load_dwordx4 s[16:19], s[4:5], 0x4 ; C0880504 s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx8 s[20:27], s[6:7], 0x0 ; C0CA0700 s_load_dwordx8 s[4:11], s[6:7], 0x8 ; C0C20708 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s28, s[0:3], 0x8 ; C20E0108 s_buffer_load_dword s29, s[0:3], 0x9 ; C20E8109 s_buffer_load_dword s30, s[0:3], 0xa ; C20F010A s_buffer_load_dword s0, s[0:3], 0xb ; C200010B v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 image_sample v[4:7], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[20:27], s[12:15] ; F0800F00 00650402 image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[16:19] ; F0800F00 00810002 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v0, s28, v0 ; 1000001C v_mul_f32_e32 v1, s29, v1 ; 1002021D v_mul_f32_e32 v2, s30, v2 ; 1004041E v_mul_f32_e32 v3, s0, v3 ; 10060600 v_sub_f32_e32 v4, 1.0, v4 ; 080808F2 v_sub_f32_e32 v5, 1.0, v5 ; 080A0AF2 v_sub_f32_e32 v6, 1.0, v6 ; 080C0CF2 v_sub_f32_e32 v7, 1.0, v7 ; 080E0EF2 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_sub_f32_e32 v0, 1.0, v0 ; 080000F2 v_mad_f32 v0, -v4, v0, 1.0 ; D2820000 23CA0104 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_sub_f32_e32 v1, 1.0, v1 ; 080202F2 v_mad_f32 v1, -v5, v1, 1.0 ; D2820001 23CA0305 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_sub_f32_e32 v2, 1.0, v2 ; 080404F2 v_mad_f32 v2, -v6, v2, 1.0 ; D2820002 23CA0506 v_add_f32_e64 v3, 0, v3 clamp ; D2060803 00020680 v_sub_f32_e32 v3, 1.0, v3 ; 080606F2 v_mad_f32 v3, -v7, v3, 1.0 ; D2820003 23CA0707 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 8 Code Size: 216 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL CONST[0..4] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 0.5000, 1.0000, -1.0000, 0.0000} 0: MUL TEMP[0].xy, CONST[0].xyyy, IMM[0].xxxx 1: MUL TEMP[1].xy, CONST[0].xyyy, IMM[0].yzzz 2: MUL TEMP[2], CONST[1], IN[0].xxxx 3: MAD TEMP[2], CONST[2], IN[0].yyyy, TEMP[2] 4: MAD TEMP[2], CONST[3], IN[0].zzzz, TEMP[2] 5: MAD TEMP[2], CONST[4], IN[0].wwww, TEMP[2] 6: ADD TEMP[3].xy, IN[1].xyyy, TEMP[0].xyyy 7: ADD TEMP[0].xy, IN[1].xyyy, -TEMP[0].xyyy 8: MOV TEMP[3].zw, TEMP[0].yyxy 9: MUL TEMP[0].xy, TEMP[1].xyyy, IMM[0].xxxx 10: ADD TEMP[0].xy, IN[1].xyyy, -TEMP[0].xyyy 11: MAD TEMP[1].xy, TEMP[1].xyyy, IMM[0].xxxx, IN[1].xyyy 12: MOV TEMP[0].zw, TEMP[1].yyxy 13: MOV TEMP[1].xy, IN[1].xyxx 14: MOV OUT[1], TEMP[3] 15: MOV OUT[2], TEMP[0] 16: MOV OUT[3], TEMP[1] 17: MOV OUT[0], TEMP[2] 18: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %31 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %32 = load <16 x i8>, <16 x i8> addrspace(2)* %31, align 16, !tbaa !0 %33 = add i32 %5, %7 %34 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %32, i32 0, i32 %33) %35 = extractelement <4 x float> %34, i32 0 %36 = extractelement <4 x float> %34, i32 1 %37 = extractelement <4 x float> %34, i32 2 %38 = extractelement <4 x float> %34, i32 3 %39 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %40 = load <16 x i8>, <16 x i8> addrspace(2)* %39, align 16, !tbaa !0 %41 = add i32 %5, %7 %42 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %40, i32 0, i32 %41) %43 = extractelement <4 x float> %42, i32 0 %44 = extractelement <4 x float> %42, i32 1 %45 = fmul float %13, 5.000000e-01 %46 = fmul float %14, 5.000000e-01 %47 = fmul float %15, %35 %48 = fmul float %16, %35 %49 = fmul float %17, %35 %50 = fmul float %18, %35 %51 = fmul float %19, %36 %52 = fadd float %51, %47 %53 = fmul float %20, %36 %54 = fadd float %53, %48 %55 = fmul float %21, %36 %56 = fadd float %55, %49 %57 = fmul float %22, %36 %58 = fadd float %57, %50 %59 = fmul float %23, %37 %60 = fadd float %59, %52 %61 = fmul float %24, %37 %62 = fadd float %61, %54 %63 = fmul float %25, %37 %64 = fadd float %63, %56 %65 = fmul float %26, %37 %66 = fadd float %65, %58 %67 = fmul float %27, %38 %68 = fadd float %67, %60 %69 = fmul float %28, %38 %70 = fadd float %69, %62 %71 = fmul float %29, %38 %72 = fadd float %71, %64 %73 = fmul float %30, %38 %74 = fadd float %73, %66 %75 = fadd float %43, %45 %76 = fadd float %44, %46 %77 = fsub float %43, %45 %78 = fsub float %44, %46 %79 = fmul float %13, 5.000000e-01 %80 = fmul float %14, -5.000000e-01 %81 = fsub float %43, %79 %82 = fsub float %44, %80 %83 = fmul float %13, 5.000000e-01 %84 = fadd float %83, %43 %85 = fmul float %14, -5.000000e-01 %86 = fadd float %85, %44 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %75, float %76, float %77, float %78) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %81, float %82, float %84, float %86) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %43, float %44, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %68, float %70, float %72, float %74) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_mov_b32_e32 v1, 0 ; 7E020280 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[0:3], 0x0 ; C2060100 s_buffer_load_dword s13, s[0:3], 0x1 ; C2068101 buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[8:11], 0 idxen ; E00C2000 80020600 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_buffer_load_dword s6, s[0:3], 0x6 ; C2030106 s_buffer_load_dword s7, s[0:3], 0x7 ; C2038107 s_buffer_load_dword s8, s[0:3], 0x8 ; C2040108 s_buffer_load_dword s9, s[0:3], 0x9 ; C2048109 s_buffer_load_dword s10, s[0:3], 0xa ; C205010A s_buffer_load_dword s11, s[0:3], 0xb ; C205810B s_buffer_load_dword s14, s[0:3], 0xc ; C207010C s_buffer_load_dword s15, s[0:3], 0xd ; C207810D s_buffer_load_dword s16, s[0:3], 0xe ; C208010E s_buffer_load_dword s17, s[0:3], 0xf ; C208810F s_buffer_load_dword s18, s[0:3], 0x10 ; C2090110 s_buffer_load_dword s19, s[0:3], 0x11 ; C2098111 s_buffer_load_dword s20, s[0:3], 0x12 ; C20A0112 s_buffer_load_dword s0, s[0:3], 0x13 ; C2000113 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v0, s4, v2 ; 10000404 v_mad_f32 v8, 0.5, s12, v6 ; D2820008 041818F0 v_mad_f32 v9, 0.5, -s12, v6 ; D2820009 441818F0 v_mad_f32 v10, 0.5, s13, v7 ; D282000A 041C1AF0 v_mad_f32 v11, 0.5, -s13, v7 ; D282000B 441C1AF0 exp 15, 32, 0, 0, 0, v8, v10, v9, v11 ; F800020F 0B090A08 s_waitcnt expcnt(0) ; BF8C070F v_mad_f32 v10, -0.5, -s13, v7 ; D282000A 441C1AF1 v_mad_f32 v11, -0.5, s13, v7 ; D282000B 041C1AF1 exp 15, 33, 0, 0, 0, v9, v10, v8, v11 ; F800021F 0B080A09 v_mac_f32_e32 v0, s8, v3 ; 3E000608 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v8, s5, v2 ; 10100405 v_mac_f32_e32 v8, s9, v3 ; 3E100609 v_mul_f32_e32 v9, s6, v2 ; 10120406 v_mac_f32_e32 v9, s10, v3 ; 3E12060A v_mul_f32_e32 v2, s7, v2 ; 10040407 v_mac_f32_e32 v2, s11, v3 ; 3E04060B v_mac_f32_e32 v0, s14, v4 ; 3E00080E v_mac_f32_e32 v8, s15, v4 ; 3E10080F v_mac_f32_e32 v9, s16, v4 ; 3E120810 v_mac_f32_e32 v2, s17, v4 ; 3E040811 v_mac_f32_e32 v0, s18, v5 ; 3E000A12 v_mac_f32_e32 v8, s19, v5 ; 3E100A13 v_mac_f32_e32 v9, s20, v5 ; 3E120A14 v_mac_f32_e32 v2, s0, v5 ; 3E040A00 exp 15, 34, 0, 0, 0, v6, v7, v1, v1 ; F800022F 01010706 exp 15, 12, 0, 1, 0, v0, v8, v9, v2 ; F80008CF 02090800 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 272 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..2], LOCAL 0: MOV TEMP[0].xy, IN[2].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MOV TEMP[1].xy, IN[0].xyyy 3: TEX TEMP[1], TEMP[1], SAMP[0], 2D 4: MAX TEMP[0], TEMP[0], TEMP[1] 5: MOV TEMP[1].xy, IN[0].zwww 6: TEX TEMP[1], TEMP[1], SAMP[0], 2D 7: MOV TEMP[2].xy, IN[1].xyyy 8: TEX TEMP[2], TEMP[2], SAMP[0], 2D 9: MAX TEMP[1], TEMP[1], TEMP[2] 10: MOV TEMP[2].xy, IN[1].zwww 11: TEX TEMP[2], TEMP[2], SAMP[0], 2D 12: MAX TEMP[1], TEMP[1], TEMP[2] 13: MAX TEMP[0], TEMP[0], TEMP[1] 14: MOV OUT[0], TEMP[0] 15: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %23 = load <8 x i32>, <8 x i32> addrspace(2)* %22, align 32, !tbaa !0 %24 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %25 = load <4 x i32>, <4 x i32> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %32 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %35 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %36 = bitcast float %34 to i32 %37 = bitcast float %35 to i32 %38 = insertelement <2 x i32> undef, i32 %36, i32 0 %39 = insertelement <2 x i32> %38, i32 %37, i32 1 %40 = bitcast <8 x i32> %23 to <32 x i8> %41 = bitcast <4 x i32> %25 to <16 x i8> %42 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %39, <32 x i8> %40, <16 x i8> %41, i32 2) %43 = extractelement <4 x float> %42, i32 0 %44 = extractelement <4 x float> %42, i32 1 %45 = extractelement <4 x float> %42, i32 2 %46 = extractelement <4 x float> %42, i32 3 %47 = bitcast float %26 to i32 %48 = bitcast float %27 to i32 %49 = insertelement <2 x i32> undef, i32 %47, i32 0 %50 = insertelement <2 x i32> %49, i32 %48, i32 1 %51 = bitcast <8 x i32> %23 to <32 x i8> %52 = bitcast <4 x i32> %25 to <16 x i8> %53 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %50, <32 x i8> %51, <16 x i8> %52, i32 2) %54 = extractelement <4 x float> %53, i32 0 %55 = extractelement <4 x float> %53, i32 1 %56 = extractelement <4 x float> %53, i32 2 %57 = extractelement <4 x float> %53, i32 3 %58 = call float @llvm.maxnum.f32(float %43, float %54) %59 = call float @llvm.maxnum.f32(float %44, float %55) %60 = call float @llvm.maxnum.f32(float %45, float %56) %61 = call float @llvm.maxnum.f32(float %46, float %57) %62 = bitcast float %28 to i32 %63 = bitcast float %29 to i32 %64 = insertelement <2 x i32> undef, i32 %62, i32 0 %65 = insertelement <2 x i32> %64, i32 %63, i32 1 %66 = bitcast <8 x i32> %23 to <32 x i8> %67 = bitcast <4 x i32> %25 to <16 x i8> %68 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %65, <32 x i8> %66, <16 x i8> %67, i32 2) %69 = extractelement <4 x float> %68, i32 0 %70 = extractelement <4 x float> %68, i32 1 %71 = extractelement <4 x float> %68, i32 2 %72 = extractelement <4 x float> %68, i32 3 %73 = bitcast float %30 to i32 %74 = bitcast float %31 to i32 %75 = insertelement <2 x i32> undef, i32 %73, i32 0 %76 = insertelement <2 x i32> %75, i32 %74, i32 1 %77 = bitcast <8 x i32> %23 to <32 x i8> %78 = bitcast <4 x i32> %25 to <16 x i8> %79 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %76, <32 x i8> %77, <16 x i8> %78, i32 2) %80 = extractelement <4 x float> %79, i32 0 %81 = extractelement <4 x float> %79, i32 1 %82 = extractelement <4 x float> %79, i32 2 %83 = extractelement <4 x float> %79, i32 3 %84 = call float @llvm.maxnum.f32(float %69, float %80) %85 = call float @llvm.maxnum.f32(float %70, float %81) %86 = call float @llvm.maxnum.f32(float %71, float %82) %87 = call float @llvm.maxnum.f32(float %72, float %83) %88 = bitcast float %32 to i32 %89 = bitcast float %33 to i32 %90 = insertelement <2 x i32> undef, i32 %88, i32 0 %91 = insertelement <2 x i32> %90, i32 %89, i32 1 %92 = bitcast <8 x i32> %23 to <32 x i8> %93 = bitcast <4 x i32> %25 to <16 x i8> %94 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %91, <32 x i8> %92, <16 x i8> %93, i32 2) %95 = extractelement <4 x float> %94, i32 0 %96 = extractelement <4 x float> %94, i32 1 %97 = extractelement <4 x float> %94, i32 2 %98 = extractelement <4 x float> %94, i32 3 %99 = call float @llvm.maxnum.f32(float %84, float %95) %100 = call float @llvm.maxnum.f32(float %85, float %96) %101 = call float @llvm.maxnum.f32(float %86, float %97) %102 = call float @llvm.maxnum.f32(float %87, float %98) %103 = call float @llvm.maxnum.f32(float %58, float %99) %104 = call float @llvm.maxnum.f32(float %59, float %100) %105 = call float @llvm.maxnum.f32(float %60, float %101) %106 = call float @llvm.maxnum.f32(float %61, float %102) %107 = call i32 @llvm.SI.packf16(float %103, float %104) %108 = bitcast i32 %107 to float %109 = call i32 @llvm.SI.packf16(float %105, float %106) %110 = bitcast i32 %109 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %108, float %110, float %108, float %110) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600 v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601 v_interp_p1_f32 v9, v0, 3, 1, [m0] ; C8240700 v_interp_p2_f32 v9, [v9], v1, 3, 1, [m0] ; C8250701 v_interp_p1_f32 v10, v0, 0, 2, [m0] ; C8280800 v_interp_p2_f32 v10, [v10], v1, 0, 2, [m0] ; C8290801 v_interp_p1_f32 v11, v0, 1, 2, [m0] ; C82C0900 v_interp_p2_f32 v11, [v11], v1, 1, 2, [m0] ; C82D0901 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[4:11], s[0:3] ; F0800F00 00010A0A image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800F00 00010002 image_sample v[14:17], 15, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[4:11], s[0:3] ; F0800F00 00010E04 image_sample v[4:7], 15, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[4:11], s[0:3] ; F0800F00 00010406 image_sample v[18:21], 15, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[4:11], s[0:3] ; F0800F00 00011208 s_waitcnt vmcnt(0) ; BF8C0770 v_max3_f32 v4, v14, v4, v18 ; D2A80004 044A090E v_max3_f32 v5, v15, v5, v19 ; D2A80005 044E0B0F v_max3_f32 v6, v16, v6, v20 ; D2A80006 04520D10 v_max3_f32 v7, v17, v7, v21 ; D2A80007 04560F11 v_max3_f32 v0, v10, v0, v4 ; D2A80000 0412010A v_max3_f32 v1, v11, v1, v5 ; D2A80001 0416030B v_max3_f32 v2, v12, v2, v6 ; D2A80002 041A050C v_max3_f32 v3, v13, v3, v7 ; D2A80003 041E070D v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 24 Code Size: 228 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..4] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 0.5000, 1.0000, -1.0000, 0.0000} 0: MUL TEMP[0].xy, CONST[0].xyyy, IMM[0].xxxx 1: MUL TEMP[1].xy, CONST[0].xyyy, IMM[0].yzzz 2: MUL TEMP[2], CONST[1], IN[0].xxxx 3: MAD TEMP[2], CONST[2], IN[0].yyyy, TEMP[2] 4: MAD TEMP[2], CONST[3], IN[0].zzzz, TEMP[2] 5: MAD TEMP[2], CONST[4], IN[0].wwww, TEMP[2] 6: ADD TEMP[3].xy, IN[1].xyyy, TEMP[0].xyyy 7: ADD TEMP[0].xy, IN[1].xyyy, -TEMP[0].xyyy 8: MOV TEMP[3].zw, TEMP[0].yyxy 9: MUL TEMP[0].xy, TEMP[1].xyyy, IMM[0].xxxx 10: ADD TEMP[0].xy, IN[1].xyyy, -TEMP[0].xyyy 11: MAD TEMP[1].xy, TEMP[1].xyyy, IMM[0].xxxx, IN[1].xyyy 12: MOV TEMP[0].zw, TEMP[1].yyxy 13: MOV OUT[1], TEMP[3] 14: MOV OUT[2], TEMP[0] 15: MOV OUT[0], TEMP[2] 16: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %31 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %32 = load <16 x i8>, <16 x i8> addrspace(2)* %31, align 16, !tbaa !0 %33 = add i32 %5, %7 %34 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %32, i32 0, i32 %33) %35 = extractelement <4 x float> %34, i32 0 %36 = extractelement <4 x float> %34, i32 1 %37 = extractelement <4 x float> %34, i32 2 %38 = extractelement <4 x float> %34, i32 3 %39 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %40 = load <16 x i8>, <16 x i8> addrspace(2)* %39, align 16, !tbaa !0 %41 = add i32 %5, %7 %42 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %40, i32 0, i32 %41) %43 = extractelement <4 x float> %42, i32 0 %44 = extractelement <4 x float> %42, i32 1 %45 = fmul float %13, 5.000000e-01 %46 = fmul float %14, 5.000000e-01 %47 = fmul float %15, %35 %48 = fmul float %16, %35 %49 = fmul float %17, %35 %50 = fmul float %18, %35 %51 = fmul float %19, %36 %52 = fadd float %51, %47 %53 = fmul float %20, %36 %54 = fadd float %53, %48 %55 = fmul float %21, %36 %56 = fadd float %55, %49 %57 = fmul float %22, %36 %58 = fadd float %57, %50 %59 = fmul float %23, %37 %60 = fadd float %59, %52 %61 = fmul float %24, %37 %62 = fadd float %61, %54 %63 = fmul float %25, %37 %64 = fadd float %63, %56 %65 = fmul float %26, %37 %66 = fadd float %65, %58 %67 = fmul float %27, %38 %68 = fadd float %67, %60 %69 = fmul float %28, %38 %70 = fadd float %69, %62 %71 = fmul float %29, %38 %72 = fadd float %71, %64 %73 = fmul float %30, %38 %74 = fadd float %73, %66 %75 = fadd float %43, %45 %76 = fadd float %44, %46 %77 = fsub float %43, %45 %78 = fsub float %44, %46 %79 = fmul float %13, 5.000000e-01 %80 = fmul float %14, -5.000000e-01 %81 = fsub float %43, %79 %82 = fsub float %44, %80 %83 = fmul float %13, 5.000000e-01 %84 = fadd float %83, %43 %85 = fmul float %14, -5.000000e-01 %86 = fadd float %85, %44 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %75, float %76, float %77, float %78) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %81, float %82, float %84, float %86) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %68, float %70, float %72, float %74) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104 s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105 s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, 0.5, s4, v5 ; D2820000 041408F0 v_mad_f32 v5, 0.5, -s4, v5 ; D2820005 441408F0 v_mad_f32 v7, 0.5, s5, v6 ; D2820007 04180AF0 v_mad_f32 v8, 0.5, -s5, v6 ; D2820008 44180AF0 exp 15, 32, 0, 0, 0, v0, v7, v5, v8 ; F800020F 08050700 s_waitcnt expcnt(0) ; BF8C070F v_mad_f32 v7, -0.5, -s5, v6 ; D2820007 44180AF1 v_mac_f32_e64 v6, -0.5, s5 ; D23E0006 00000AF1 exp 15, 33, 0, 0, 0, v5, v7, v0, v6 ; F800021F 06000705 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_buffer_load_dword s5, s[0:3], 0x8 ; C2028108 s_buffer_load_dword s9, s[0:3], 0x9 ; C2048109 s_buffer_load_dword s10, s[0:3], 0xa ; C205010A s_buffer_load_dword s11, s[0:3], 0xb ; C205810B s_buffer_load_dword s12, s[0:3], 0xc ; C206010C s_buffer_load_dword s13, s[0:3], 0xd ; C206810D s_buffer_load_dword s14, s[0:3], 0xe ; C207010E s_buffer_load_dword s15, s[0:3], 0xf ; C207810F s_buffer_load_dword s16, s[0:3], 0x10 ; C2080110 s_buffer_load_dword s17, s[0:3], 0x11 ; C2088111 s_buffer_load_dword s18, s[0:3], 0x12 ; C2090112 s_buffer_load_dword s0, s[0:3], 0x13 ; C2000113 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s6, v1 ; 10000206 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v0, s5, v2 ; 3E000405 v_mul_f32_e32 v5, s7, v1 ; 100A0207 v_mac_f32_e32 v5, s9, v2 ; 3E0A0409 v_mul_f32_e32 v6, s8, v1 ; 100C0208 v_mac_f32_e32 v6, s10, v2 ; 3E0C040A v_mul_f32_e32 v1, s4, v1 ; 10020204 v_mac_f32_e32 v1, s11, v2 ; 3E02040B v_mac_f32_e32 v0, s12, v3 ; 3E00060C v_mac_f32_e32 v5, s13, v3 ; 3E0A060D v_mac_f32_e32 v6, s14, v3 ; 3E0C060E v_mac_f32_e32 v1, s15, v3 ; 3E02060F v_mac_f32_e32 v0, s16, v4 ; 3E000810 v_mac_f32_e32 v5, s17, v4 ; 3E0A0811 v_mac_f32_e32 v6, s18, v4 ; 3E0C0812 v_mac_f32_e32 v1, s0, v4 ; 3E020800 exp 15, 12, 0, 1, 0, v0, v5, v6, v1 ; F80008CF 01060500 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 268 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.2500, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MOV TEMP[1].xy, IN[0].zwww 3: TEX TEMP[1], TEMP[1], SAMP[0], 2D 4: ADD TEMP[0], TEMP[0], TEMP[1] 5: MOV TEMP[1].xy, IN[1].xyyy 6: TEX TEMP[1], TEMP[1], SAMP[0], 2D 7: MOV TEMP[2].xy, IN[1].zwww 8: TEX TEMP[2], TEMP[2], SAMP[0], 2D 9: ADD TEMP[1], TEMP[1], TEMP[2] 10: ADD TEMP[0], TEMP[0], TEMP[1] 11: MUL TEMP[0], TEMP[0], IMM[0].xxxx 12: MOV OUT[0], TEMP[0] 13: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %23 = load <8 x i32>, <8 x i32> addrspace(2)* %22, align 32, !tbaa !0 %24 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %25 = load <4 x i32>, <4 x i32> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %32 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %34 = bitcast float %26 to i32 %35 = bitcast float %27 to i32 %36 = insertelement <2 x i32> undef, i32 %34, i32 0 %37 = insertelement <2 x i32> %36, i32 %35, i32 1 %38 = bitcast <8 x i32> %23 to <32 x i8> %39 = bitcast <4 x i32> %25 to <16 x i8> %40 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %37, <32 x i8> %38, <16 x i8> %39, i32 2) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = bitcast float %28 to i32 %46 = bitcast float %29 to i32 %47 = insertelement <2 x i32> undef, i32 %45, i32 0 %48 = insertelement <2 x i32> %47, i32 %46, i32 1 %49 = bitcast <8 x i32> %23 to <32 x i8> %50 = bitcast <4 x i32> %25 to <16 x i8> %51 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %48, <32 x i8> %49, <16 x i8> %50, i32 2) %52 = extractelement <4 x float> %51, i32 0 %53 = extractelement <4 x float> %51, i32 1 %54 = extractelement <4 x float> %51, i32 2 %55 = extractelement <4 x float> %51, i32 3 %56 = fadd float %41, %52 %57 = fadd float %42, %53 %58 = fadd float %43, %54 %59 = fadd float %44, %55 %60 = bitcast float %30 to i32 %61 = bitcast float %31 to i32 %62 = insertelement <2 x i32> undef, i32 %60, i32 0 %63 = insertelement <2 x i32> %62, i32 %61, i32 1 %64 = bitcast <8 x i32> %23 to <32 x i8> %65 = bitcast <4 x i32> %25 to <16 x i8> %66 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %63, <32 x i8> %64, <16 x i8> %65, i32 2) %67 = extractelement <4 x float> %66, i32 0 %68 = extractelement <4 x float> %66, i32 1 %69 = extractelement <4 x float> %66, i32 2 %70 = extractelement <4 x float> %66, i32 3 %71 = bitcast float %32 to i32 %72 = bitcast float %33 to i32 %73 = insertelement <2 x i32> undef, i32 %71, i32 0 %74 = insertelement <2 x i32> %73, i32 %72, i32 1 %75 = bitcast <8 x i32> %23 to <32 x i8> %76 = bitcast <4 x i32> %25 to <16 x i8> %77 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %74, <32 x i8> %75, <16 x i8> %76, i32 2) %78 = extractelement <4 x float> %77, i32 0 %79 = extractelement <4 x float> %77, i32 1 %80 = extractelement <4 x float> %77, i32 2 %81 = extractelement <4 x float> %77, i32 3 %82 = fadd float %67, %78 %83 = fadd float %68, %79 %84 = fadd float %69, %80 %85 = fadd float %70, %81 %86 = fadd float %56, %82 %87 = fadd float %57, %83 %88 = fadd float %58, %84 %89 = fadd float %59, %85 %90 = fmul float %86, 2.500000e-01 %91 = fmul float %87, 2.500000e-01 %92 = fmul float %88, 2.500000e-01 %93 = fmul float %89, 2.500000e-01 %94 = call i32 @llvm.SI.packf16(float %90, float %91) %95 = bitcast i32 %94 to float %96 = call i32 @llvm.SI.packf16(float %92, float %93) %97 = bitcast i32 %96 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %95, float %97, float %95, float %97) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600 v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601 v_interp_p1_f32 v9, v0, 3, 1, [m0] ; C8240700 v_interp_p2_f32 v9, [v9], v1, 3, 1, [m0] ; C8250701 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[8:15], s[0:3] ; F0800F00 00020002 image_sample v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[8:15], s[0:3] ; F0800F00 00020A04 image_sample v[4:7], 15, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[8:15], s[0:3] ; F0800F00 00020406 image_sample v[14:17], 15, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[8:15], s[0:3] ; F0800F00 00020E08 s_waitcnt vmcnt(2) ; BF8C0772 v_add_f32_e32 v0, v10, v0 ; 0600010A v_add_f32_e32 v1, v11, v1 ; 0602030B v_add_f32_e32 v2, v12, v2 ; 0604050C v_add_f32_e32 v3, v13, v3 ; 0606070D s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v4, v14, v4 ; 0608090E v_add_f32_e32 v5, v15, v5 ; 060A0B0F v_add_f32_e32 v6, v16, v6 ; 060C0D10 v_add_f32_e32 v7, v17, v7 ; 060E0F11 v_add_f32_e32 v0, v4, v0 ; 06000104 v_add_f32_e32 v1, v5, v1 ; 06020305 v_add_f32_e32 v2, v6, v2 ; 06040506 v_add_f32_e32 v3, v7, v3 ; 06060707 v_mov_b32_e32 v4, 0x3e800000 ; 7E0802FF 3E800000 v_mul_f32_e32 v0, v4, v0 ; 10000104 v_mul_f32_e32 v1, v4, v1 ; 10020304 v_mul_f32_e32 v2, v4, v2 ; 10040504 v_mul_f32_e32 v3, v4, v3 ; 10060704 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 20 Code Size: 216 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL CONST[0..3] DCL TEMP[0..1], LOCAL 0: MUL TEMP[0], CONST[0], IN[0].xxxx 1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0] 4: MOV TEMP[1].xy, IN[1].xyxx 5: MOV OUT[1], TEMP[1] 6: MOV OUT[0], TEMP[0] 7: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = fmul float %13, %33 %44 = fmul float %14, %33 %45 = fmul float %15, %33 %46 = fmul float %16, %33 %47 = fmul float %17, %34 %48 = fadd float %47, %43 %49 = fmul float %18, %34 %50 = fadd float %49, %44 %51 = fmul float %19, %34 %52 = fadd float %51, %45 %53 = fmul float %20, %34 %54 = fadd float %53, %46 %55 = fmul float %21, %35 %56 = fadd float %55, %48 %57 = fmul float %22, %35 %58 = fadd float %57, %50 %59 = fmul float %23, %35 %60 = fadd float %59, %52 %61 = fmul float %24, %35 %62 = fadd float %61, %54 %63 = fmul float %25, %36 %64 = fadd float %63, %56 %65 = fmul float %26, %36 %66 = fadd float %65, %58 %67 = fmul float %27, %36 %68 = fadd float %67, %60 %69 = fmul float %28, %36 %70 = fadd float %69, %62 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %64, float %66, float %68, float %70) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_mov_b32_e32 v1, 0 ; 7E020280 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[0:3], 0x0 ; C2060100 s_buffer_load_dword s13, s[0:3], 0x1 ; C2068101 buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[8:11], 0 idxen ; E00C2000 80020600 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_buffer_load_dword s5, s[0:3], 0x3 ; C2028103 s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104 s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105 s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106 s_buffer_load_dword s9, s[0:3], 0x7 ; C2048107 s_buffer_load_dword s10, s[0:3], 0x8 ; C2050108 s_buffer_load_dword s11, s[0:3], 0x9 ; C2058109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 v_mul_f32_e32 v0, s12, v2 ; 1000040C s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v0, s6, v3 ; 3E000606 v_mul_f32_e32 v8, s13, v2 ; 1010040D v_mac_f32_e32 v8, s7, v3 ; 3E100607 v_mul_f32_e32 v9, s4, v2 ; 10120404 v_mac_f32_e32 v9, s8, v3 ; 3E120608 v_mul_f32_e32 v2, s5, v2 ; 10040405 v_mac_f32_e32 v2, s9, v3 ; 3E040609 v_mac_f32_e32 v0, s10, v4 ; 3E00080A v_mac_f32_e32 v8, s11, v4 ; 3E10080B v_mac_f32_e32 v9, s14, v4 ; 3E12080E v_mac_f32_e32 v2, s15, v4 ; 3E04080F v_mac_f32_e32 v0, s16, v5 ; 3E000A10 v_mac_f32_e32 v8, s17, v5 ; 3E100A11 v_mac_f32_e32 v9, s18, v5 ; 3E120A12 v_mac_f32_e32 v2, s0, v5 ; 3E040A00 exp 15, 32, 0, 0, 0, v6, v7, v1, v1 ; F800020F 01010706 exp 15, 12, 0, 1, 0, v0, v8, v9, v2 ; F80008CF 02090800 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 196 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MOV TEMP[1].w, TEMP[0].wwww 3: ADD TEMP[0].xyz, TEMP[0].xyzz, -CONST[1].xyzz 4: MAX TEMP[1].xyz, IMM[0].xxxx, TEMP[0].xyzz 5: MOV OUT[0], TEMP[1] 6: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %27 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %28 = load <32 x i8>, <32 x i8> addrspace(2)* %27, align 32, !tbaa !0 %29 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %32 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %33 = bitcast float %31 to i32 %34 = bitcast float %32 to i32 %35 = insertelement <2 x i32> undef, i32 %33, i32 0 %36 = insertelement <2 x i32> %35, i32 %34, i32 1 %37 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %36, <32 x i8> %28, <16 x i8> %30, i32 2) %38 = extractelement <4 x float> %37, i32 0 %39 = extractelement <4 x float> %37, i32 1 %40 = extractelement <4 x float> %37, i32 2 %41 = extractelement <4 x float> %37, i32 3 %42 = fsub float %38, %24 %43 = fsub float %39, %25 %44 = fsub float %40, %26 %45 = call float @llvm.maxnum.f32(float %42, float 0.000000e+00) %46 = call float @llvm.maxnum.f32(float %43, float 0.000000e+00) %47 = call float @llvm.maxnum.f32(float %44, float 0.000000e+00) %48 = call i32 @llvm.SI.packf16(float %45, float %46) %49 = bitcast i32 %48 to float %50 = call i32 @llvm.SI.packf16(float %47, float %41) %51 = bitcast i32 %50 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %49, float %51, float %49, float %51) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[4:7], s[4:5], 0x0 ; C0820500 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s10, s[0:3], 0x5 ; C2050105 s_buffer_load_dword s0, s[0:3], 0x6 ; C2000106 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[4:7] ; F0800F00 00230002 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_subrev_f32_e32 v0, s8, v0 ; 0A000008 v_subrev_f32_e32 v1, s10, v1 ; 0A02020A v_subrev_f32_e32 v2, s0, v2 ; 0A040400 v_max_f32_e32 v2, 0, v2 ; 20040480 v_cvt_pkrtz_f16_f32_e32 v2, v2, v3 ; 5E040702 v_max_f32_e32 v0, 0, v0 ; 20000080 v_max_f32_e32 v1, 0, v1 ; 20020280 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 exp 15, 0, 1, 1, 1, v0, v2, v0, v2 ; F8001C0F 02000200 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 4 Code Size: 108 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL OUT[5], GENERIC[4] DCL CONST[0..4] DCL TEMP[0..5], LOCAL IMM[0] FLT32 { 1.0000, -1.0000, 2.0000, 3.0000} IMM[1] FLT32 { 5.0000, 0.0000, 0.0000, 0.0000} 0: MUL TEMP[0], CONST[0].xyxy, IMM[0].xxyy 1: MUL TEMP[1], CONST[1], IN[0].xxxx 2: MAD TEMP[1], CONST[2], IN[0].yyyy, TEMP[1] 3: MAD TEMP[1], CONST[3], IN[0].zzzz, TEMP[1] 4: MAD TEMP[1], CONST[4], IN[0].wwww, TEMP[1] 5: ADD TEMP[2], IN[1].xyxy, TEMP[0] 6: MAD TEMP[3], TEMP[0], IMM[0].zzzz, IN[1].xyxy 7: MAD TEMP[4], TEMP[0], IMM[0].wwww, IN[1].xyxy 8: MAD TEMP[0], TEMP[0], IMM[1].xxxx, IN[1].xyxy 9: MOV TEMP[5].xy, IN[1].xyxx 10: MOV OUT[1], TEMP[2] 11: MOV OUT[5], TEMP[5] 12: MOV OUT[2], TEMP[3] 13: MOV OUT[3], TEMP[4] 14: MOV OUT[4], TEMP[0] 15: MOV OUT[0], TEMP[1] 16: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %31 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %32 = load <16 x i8>, <16 x i8> addrspace(2)* %31, align 16, !tbaa !0 %33 = add i32 %5, %7 %34 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %32, i32 0, i32 %33) %35 = extractelement <4 x float> %34, i32 0 %36 = extractelement <4 x float> %34, i32 1 %37 = extractelement <4 x float> %34, i32 2 %38 = extractelement <4 x float> %34, i32 3 %39 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %40 = load <16 x i8>, <16 x i8> addrspace(2)* %39, align 16, !tbaa !0 %41 = add i32 %5, %7 %42 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %40, i32 0, i32 %41) %43 = extractelement <4 x float> %42, i32 0 %44 = extractelement <4 x float> %42, i32 1 %45 = fmul float %15, %35 %46 = fmul float %16, %35 %47 = fmul float %17, %35 %48 = fmul float %18, %35 %49 = fmul float %19, %36 %50 = fadd float %49, %45 %51 = fmul float %20, %36 %52 = fadd float %51, %46 %53 = fmul float %21, %36 %54 = fadd float %53, %47 %55 = fmul float %22, %36 %56 = fadd float %55, %48 %57 = fmul float %23, %37 %58 = fadd float %57, %50 %59 = fmul float %24, %37 %60 = fadd float %59, %52 %61 = fmul float %25, %37 %62 = fadd float %61, %54 %63 = fmul float %26, %37 %64 = fadd float %63, %56 %65 = fmul float %27, %38 %66 = fadd float %65, %58 %67 = fmul float %28, %38 %68 = fadd float %67, %60 %69 = fmul float %29, %38 %70 = fadd float %69, %62 %71 = fmul float %30, %38 %72 = fadd float %71, %64 %73 = fadd float %43, %13 %74 = fadd float %44, %14 %75 = fsub float %43, %13 %76 = fsub float %44, %14 %77 = fmul float %13, 2.000000e+00 %78 = fadd float %77, %43 %79 = fmul float %14, 2.000000e+00 %80 = fadd float %79, %44 %81 = fmul float %13, -2.000000e+00 %82 = fadd float %81, %43 %83 = fmul float %14, -2.000000e+00 %84 = fadd float %83, %44 %85 = fmul float %13, 3.000000e+00 %86 = fadd float %85, %43 %87 = fmul float %14, 3.000000e+00 %88 = fadd float %87, %44 %89 = fmul float %13, -3.000000e+00 %90 = fadd float %89, %43 %91 = fmul float %14, -3.000000e+00 %92 = fadd float %91, %44 %93 = fmul float %13, 5.000000e+00 %94 = fadd float %93, %43 %95 = fmul float %14, 5.000000e+00 %96 = fadd float %95, %44 %97 = fmul float %13, -5.000000e+00 %98 = fadd float %97, %43 %99 = fmul float %14, -5.000000e+00 %100 = fadd float %99, %44 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %73, float %74, float %75, float %76) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %78, float %80, float %82, float %84) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %86, float %88, float %90, float %92) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %94, float %96, float %98, float %100) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %43, float %44, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %66, float %68, float %70, float %72) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100 s_buffer_load_dword s9, s[0:3], 0x1 ; C2048101 s_buffer_load_dword s10, s[0:3], 0x4 ; C2050104 s_buffer_load_dword s11, s[0:3], 0x5 ; C2058105 s_buffer_load_dword s16, s[0:3], 0x6 ; C2080106 buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_add_f32_e32 v0, s8, v5 ; 06000A08 v_add_f32_e32 v7, s9, v6 ; 060E0C09 v_subrev_f32_e32 v8, s8, v5 ; 0A100A08 v_subrev_f32_e32 v9, s9, v6 ; 0A120C09 exp 15, 32, 0, 0, 0, v0, v7, v8, v9 ; F800020F 09080700 s_waitcnt expcnt(0) ; BF8C070F v_mad_f32 v0, 2.0, s8, v5 ; D2820000 041410F4 v_mad_f32 v7, 2.0, s9, v6 ; D2820007 041812F4 v_mad_f32 v8, -2.0, s8, v5 ; D2820008 041410F5 v_mad_f32 v9, -2.0, s9, v6 ; D2820009 041812F5 exp 15, 33, 0, 0, 0, v0, v7, v8, v9 ; F800021F 09080700 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v0, 0x40400000 ; 7E0002FF 40400000 v_mad_f32 v7, s8, v0, v5 ; D2820007 04160008 v_mad_f32 v0, s9, v0, v6 ; D2820000 041A0009 v_mov_b32_e32 v8, 0xc0400000 ; 7E1002FF C0400000 v_mad_f32 v9, s8, v8, v5 ; D2820009 04161008 v_mad_f32 v8, s9, v8, v6 ; D2820008 041A1009 exp 15, 34, 0, 0, 0, v7, v0, v9, v8 ; F800022F 08090007 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v0, 0x40a00000 ; 7E0002FF 40A00000 v_mad_f32 v7, s8, v0, v5 ; D2820007 04160008 v_mad_f32 v0, s9, v0, v6 ; D2820000 041A0009 v_mov_b32_e32 v8, 0xc0a00000 ; 7E1002FF C0A00000 v_mad_f32 v9, s8, v8, v5 ; D2820009 04161008 v_mad_f32 v8, s9, v8, v6 ; D2820008 041A1009 exp 15, 35, 0, 0, 0, v7, v0, v9, v8 ; F800023F 08090007 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v0, 0 ; 7E000280 exp 15, 36, 0, 0, 0, v5, v6, v0, v0 ; F800024F 00000605 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_buffer_load_dword s5, s[0:3], 0x8 ; C2028108 s_buffer_load_dword s6, s[0:3], 0x9 ; C2030109 s_buffer_load_dword s7, s[0:3], 0xa ; C203810A s_buffer_load_dword s8, s[0:3], 0xb ; C204010B s_buffer_load_dword s9, s[0:3], 0xc ; C204810C s_buffer_load_dword s12, s[0:3], 0xd ; C206010D s_buffer_load_dword s13, s[0:3], 0xe ; C206810E s_buffer_load_dword s14, s[0:3], 0xf ; C207010F s_buffer_load_dword s15, s[0:3], 0x10 ; C2078110 s_buffer_load_dword s17, s[0:3], 0x11 ; C2088111 s_buffer_load_dword s18, s[0:3], 0x12 ; C2090112 s_buffer_load_dword s0, s[0:3], 0x13 ; C2000113 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s10, v1 ; 1000020A s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v0, s5, v2 ; 3E000405 v_mul_f32_e32 v5, s11, v1 ; 100A020B v_mac_f32_e32 v5, s6, v2 ; 3E0A0406 v_mul_f32_e32 v6, s16, v1 ; 100C0210 v_mac_f32_e32 v6, s7, v2 ; 3E0C0407 v_mul_f32_e32 v1, s4, v1 ; 10020204 v_mac_f32_e32 v1, s8, v2 ; 3E020408 v_mac_f32_e32 v0, s9, v3 ; 3E000609 v_mac_f32_e32 v5, s12, v3 ; 3E0A060C v_mac_f32_e32 v6, s13, v3 ; 3E0C060D v_mac_f32_e32 v1, s14, v3 ; 3E02060E v_mac_f32_e32 v0, s15, v4 ; 3E00080F v_mac_f32_e32 v5, s17, v4 ; 3E0A0811 v_mac_f32_e32 v6, s18, v4 ; 3E0C0812 v_mac_f32_e32 v1, s0, v4 ; 3E020800 exp 15, 12, 0, 1, 0, v0, v5, v6, v1 ; F80008CF 01060500 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 400 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL IN[4], GENERIC[4], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..8], LOCAL IMM[0] FLT32 { 0.0525, 0.0750, 0.1100, 0.1500} IMM[1] FLT32 { 0.2250, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[3].zwww 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MOV TEMP[1].xy, IN[3].xyyy 3: TEX TEMP[1], TEMP[1], SAMP[0], 2D 4: MOV TEMP[2].xy, IN[2].zwww 5: TEX TEMP[2], TEMP[2], SAMP[0], 2D 6: MOV TEMP[3].xy, IN[2].xyyy 7: TEX TEMP[3], TEMP[3], SAMP[0], 2D 8: MOV TEMP[4].xy, IN[1].zwww 9: TEX TEMP[4], TEMP[4], SAMP[0], 2D 10: MOV TEMP[5].xy, IN[1].xyyy 11: TEX TEMP[5], TEMP[5], SAMP[0], 2D 12: MOV TEMP[6].xy, IN[0].zwww 13: TEX TEMP[6], TEMP[6], SAMP[0], 2D 14: MOV TEMP[7].xy, IN[0].xyyy 15: TEX TEMP[7], TEMP[7], SAMP[0], 2D 16: MOV TEMP[8].xy, IN[4].xyyy 17: TEX TEMP[8], TEMP[8], SAMP[0], 2D 18: MUL TEMP[8], IMM[1].xxxx, TEMP[8] 19: MAD TEMP[7], IMM[0].wwww, TEMP[7], TEMP[8] 20: MAD TEMP[6], IMM[0].wwww, TEMP[6], TEMP[7] 21: MAD TEMP[5], IMM[0].zzzz, TEMP[5], TEMP[6] 22: MAD TEMP[4], IMM[0].zzzz, TEMP[4], TEMP[5] 23: MAD TEMP[3], IMM[0].yyyy, TEMP[3], TEMP[4] 24: MAD TEMP[2], IMM[0].yyyy, TEMP[2], TEMP[3] 25: MAD TEMP[1], IMM[0].xxxx, TEMP[1], TEMP[2] 26: MAD TEMP[0], IMM[0].xxxx, TEMP[0], TEMP[1] 27: MOV OUT[0], TEMP[0] 28: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %23 = load <8 x i32>, <8 x i32> addrspace(2)* %22, align 32, !tbaa !0 %24 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %25 = load <4 x i32>, <4 x i32> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %32 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %35 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %36 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %37 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %38 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %39 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %40 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %41 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7) %42 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %43 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %44 = bitcast float %40 to i32 %45 = bitcast float %41 to i32 %46 = insertelement <2 x i32> undef, i32 %44, i32 0 %47 = insertelement <2 x i32> %46, i32 %45, i32 1 %48 = bitcast <8 x i32> %23 to <32 x i8> %49 = bitcast <4 x i32> %25 to <16 x i8> %50 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %47, <32 x i8> %48, <16 x i8> %49, i32 2) %51 = extractelement <4 x float> %50, i32 0 %52 = extractelement <4 x float> %50, i32 1 %53 = extractelement <4 x float> %50, i32 2 %54 = extractelement <4 x float> %50, i32 3 %55 = bitcast float %38 to i32 %56 = bitcast float %39 to i32 %57 = insertelement <2 x i32> undef, i32 %55, i32 0 %58 = insertelement <2 x i32> %57, i32 %56, i32 1 %59 = bitcast <8 x i32> %23 to <32 x i8> %60 = bitcast <4 x i32> %25 to <16 x i8> %61 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %58, <32 x i8> %59, <16 x i8> %60, i32 2) %62 = extractelement <4 x float> %61, i32 0 %63 = extractelement <4 x float> %61, i32 1 %64 = extractelement <4 x float> %61, i32 2 %65 = extractelement <4 x float> %61, i32 3 %66 = bitcast float %36 to i32 %67 = bitcast float %37 to i32 %68 = insertelement <2 x i32> undef, i32 %66, i32 0 %69 = insertelement <2 x i32> %68, i32 %67, i32 1 %70 = bitcast <8 x i32> %23 to <32 x i8> %71 = bitcast <4 x i32> %25 to <16 x i8> %72 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %69, <32 x i8> %70, <16 x i8> %71, i32 2) %73 = extractelement <4 x float> %72, i32 0 %74 = extractelement <4 x float> %72, i32 1 %75 = extractelement <4 x float> %72, i32 2 %76 = extractelement <4 x float> %72, i32 3 %77 = bitcast float %34 to i32 %78 = bitcast float %35 to i32 %79 = insertelement <2 x i32> undef, i32 %77, i32 0 %80 = insertelement <2 x i32> %79, i32 %78, i32 1 %81 = bitcast <8 x i32> %23 to <32 x i8> %82 = bitcast <4 x i32> %25 to <16 x i8> %83 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %80, <32 x i8> %81, <16 x i8> %82, i32 2) %84 = extractelement <4 x float> %83, i32 0 %85 = extractelement <4 x float> %83, i32 1 %86 = extractelement <4 x float> %83, i32 2 %87 = extractelement <4 x float> %83, i32 3 %88 = bitcast float %32 to i32 %89 = bitcast float %33 to i32 %90 = insertelement <2 x i32> undef, i32 %88, i32 0 %91 = insertelement <2 x i32> %90, i32 %89, i32 1 %92 = bitcast <8 x i32> %23 to <32 x i8> %93 = bitcast <4 x i32> %25 to <16 x i8> %94 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %91, <32 x i8> %92, <16 x i8> %93, i32 2) %95 = extractelement <4 x float> %94, i32 0 %96 = extractelement <4 x float> %94, i32 1 %97 = extractelement <4 x float> %94, i32 2 %98 = extractelement <4 x float> %94, i32 3 %99 = bitcast float %30 to i32 %100 = bitcast float %31 to i32 %101 = insertelement <2 x i32> undef, i32 %99, i32 0 %102 = insertelement <2 x i32> %101, i32 %100, i32 1 %103 = bitcast <8 x i32> %23 to <32 x i8> %104 = bitcast <4 x i32> %25 to <16 x i8> %105 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %102, <32 x i8> %103, <16 x i8> %104, i32 2) %106 = extractelement <4 x float> %105, i32 0 %107 = extractelement <4 x float> %105, i32 1 %108 = extractelement <4 x float> %105, i32 2 %109 = extractelement <4 x float> %105, i32 3 %110 = bitcast float %28 to i32 %111 = bitcast float %29 to i32 %112 = insertelement <2 x i32> undef, i32 %110, i32 0 %113 = insertelement <2 x i32> %112, i32 %111, i32 1 %114 = bitcast <8 x i32> %23 to <32 x i8> %115 = bitcast <4 x i32> %25 to <16 x i8> %116 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %113, <32 x i8> %114, <16 x i8> %115, i32 2) %117 = extractelement <4 x float> %116, i32 0 %118 = extractelement <4 x float> %116, i32 1 %119 = extractelement <4 x float> %116, i32 2 %120 = extractelement <4 x float> %116, i32 3 %121 = bitcast float %26 to i32 %122 = bitcast float %27 to i32 %123 = insertelement <2 x i32> undef, i32 %121, i32 0 %124 = insertelement <2 x i32> %123, i32 %122, i32 1 %125 = bitcast <8 x i32> %23 to <32 x i8> %126 = bitcast <4 x i32> %25 to <16 x i8> %127 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %124, <32 x i8> %125, <16 x i8> %126, i32 2) %128 = extractelement <4 x float> %127, i32 0 %129 = extractelement <4 x float> %127, i32 1 %130 = extractelement <4 x float> %127, i32 2 %131 = extractelement <4 x float> %127, i32 3 %132 = bitcast float %42 to i32 %133 = bitcast float %43 to i32 %134 = insertelement <2 x i32> undef, i32 %132, i32 0 %135 = insertelement <2 x i32> %134, i32 %133, i32 1 %136 = bitcast <8 x i32> %23 to <32 x i8> %137 = bitcast <4 x i32> %25 to <16 x i8> %138 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %135, <32 x i8> %136, <16 x i8> %137, i32 2) %139 = extractelement <4 x float> %138, i32 0 %140 = extractelement <4 x float> %138, i32 1 %141 = extractelement <4 x float> %138, i32 2 %142 = extractelement <4 x float> %138, i32 3 %143 = fmul float %139, 0x3FCCCCCCC0000000 %144 = fmul float %140, 0x3FCCCCCCC0000000 %145 = fmul float %141, 0x3FCCCCCCC0000000 %146 = fmul float %142, 0x3FCCCCCCC0000000 %147 = fmul float %128, 0x3FC3333340000000 %148 = fadd float %147, %143 %149 = fmul float %129, 0x3FC3333340000000 %150 = fadd float %149, %144 %151 = fmul float %130, 0x3FC3333340000000 %152 = fadd float %151, %145 %153 = fmul float %131, 0x3FC3333340000000 %154 = fadd float %153, %146 %155 = fmul float %117, 0x3FC3333340000000 %156 = fadd float %155, %148 %157 = fmul float %118, 0x3FC3333340000000 %158 = fadd float %157, %150 %159 = fmul float %119, 0x3FC3333340000000 %160 = fadd float %159, %152 %161 = fmul float %120, 0x3FC3333340000000 %162 = fadd float %161, %154 %163 = fmul float %106, 0x3FBC28F5C0000000 %164 = fadd float %163, %156 %165 = fmul float %107, 0x3FBC28F5C0000000 %166 = fadd float %165, %158 %167 = fmul float %108, 0x3FBC28F5C0000000 %168 = fadd float %167, %160 %169 = fmul float %109, 0x3FBC28F5C0000000 %170 = fadd float %169, %162 %171 = fmul float %95, 0x3FBC28F5C0000000 %172 = fadd float %171, %164 %173 = fmul float %96, 0x3FBC28F5C0000000 %174 = fadd float %173, %166 %175 = fmul float %97, 0x3FBC28F5C0000000 %176 = fadd float %175, %168 %177 = fmul float %98, 0x3FBC28F5C0000000 %178 = fadd float %177, %170 %179 = fmul float %84, 0x3FB3333340000000 %180 = fadd float %179, %172 %181 = fmul float %85, 0x3FB3333340000000 %182 = fadd float %181, %174 %183 = fmul float %86, 0x3FB3333340000000 %184 = fadd float %183, %176 %185 = fmul float %87, 0x3FB3333340000000 %186 = fadd float %185, %178 %187 = fmul float %73, 0x3FB3333340000000 %188 = fadd float %187, %180 %189 = fmul float %74, 0x3FB3333340000000 %190 = fadd float %189, %182 %191 = fmul float %75, 0x3FB3333340000000 %192 = fadd float %191, %184 %193 = fmul float %76, 0x3FB3333340000000 %194 = fadd float %193, %186 %195 = fmul float %62, 0x3FAAE147A0000000 %196 = fadd float %195, %188 %197 = fmul float %63, 0x3FAAE147A0000000 %198 = fadd float %197, %190 %199 = fmul float %64, 0x3FAAE147A0000000 %200 = fadd float %199, %192 %201 = fmul float %65, 0x3FAAE147A0000000 %202 = fadd float %201, %194 %203 = fmul float %51, 0x3FAAE147A0000000 %204 = fadd float %203, %196 %205 = fmul float %52, 0x3FAAE147A0000000 %206 = fadd float %205, %198 %207 = fmul float %53, 0x3FAAE147A0000000 %208 = fadd float %207, %200 %209 = fmul float %54, 0x3FAAE147A0000000 %210 = fadd float %209, %202 %211 = call i32 @llvm.SI.packf16(float %204, float %206) %212 = bitcast i32 %211 to float %213 = call i32 @llvm.SI.packf16(float %208, float %210) %214 = bitcast i32 %213 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %212, float %214, float %212, float %214) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 v_mov_b32_e32 v2, 0x3e666666 ; 7E0402FF 3E666666 v_mov_b32_e32 v3, 0x3e19999a ; 7E0602FF 3E19999A v_mov_b32_e32 v4, 0x3de147ae ; 7E0802FF 3DE147AE v_interp_p1_f32 v5, v0, 0, 0, [m0] ; C8140000 v_interp_p2_f32 v5, [v5], v1, 0, 0, [m0] ; C8150001 v_interp_p1_f32 v6, v0, 1, 0, [m0] ; C8180100 v_interp_p2_f32 v6, [v6], v1, 1, 0, [m0] ; C8190101 v_interp_p1_f32 v7, v0, 2, 0, [m0] ; C81C0200 v_interp_p2_f32 v7, [v7], v1, 2, 0, [m0] ; C81D0201 v_interp_p1_f32 v8, v0, 3, 0, [m0] ; C8200300 v_interp_p2_f32 v8, [v8], v1, 3, 0, [m0] ; C8210301 v_interp_p1_f32 v9, v0, 0, 1, [m0] ; C8240400 v_interp_p2_f32 v9, [v9], v1, 0, 1, [m0] ; C8250401 v_interp_p1_f32 v10, v0, 1, 1, [m0] ; C8280500 v_interp_p2_f32 v10, [v10], v1, 1, 1, [m0] ; C8290501 v_interp_p1_f32 v11, v0, 2, 1, [m0] ; C82C0600 v_interp_p2_f32 v11, [v11], v1, 2, 1, [m0] ; C82D0601 v_interp_p1_f32 v12, v0, 3, 1, [m0] ; C8300700 v_interp_p2_f32 v12, [v12], v1, 3, 1, [m0] ; C8310701 v_interp_p1_f32 v13, v0, 0, 2, [m0] ; C8340800 v_interp_p2_f32 v13, [v13], v1, 0, 2, [m0] ; C8350801 v_interp_p1_f32 v14, v0, 1, 2, [m0] ; C8380900 v_interp_p2_f32 v14, [v14], v1, 1, 2, [m0] ; C8390901 v_interp_p1_f32 v15, v0, 2, 2, [m0] ; C83C0A00 v_interp_p2_f32 v15, [v15], v1, 2, 2, [m0] ; C83D0A01 v_interp_p1_f32 v16, v0, 3, 2, [m0] ; C8400B00 v_interp_p2_f32 v16, [v16], v1, 3, 2, [m0] ; C8410B01 v_interp_p1_f32 v17, v0, 0, 3, [m0] ; C8440C00 v_interp_p2_f32 v17, [v17], v1, 0, 3, [m0] ; C8450C01 v_interp_p1_f32 v18, v0, 1, 3, [m0] ; C8480D00 v_interp_p2_f32 v18, [v18], v1, 1, 3, [m0] ; C8490D01 v_interp_p1_f32 v19, v0, 2, 3, [m0] ; C84C0E00 v_interp_p2_f32 v19, [v19], v1, 2, 3, [m0] ; C84D0E01 v_interp_p1_f32 v20, v0, 3, 3, [m0] ; C8500F00 v_interp_p2_f32 v20, [v20], v1, 3, 3, [m0] ; C8510F01 v_interp_p1_f32 v21, v0, 0, 4, [m0] ; C8541000 v_interp_p2_f32 v21, [v21], v1, 0, 4, [m0] ; C8551001 v_interp_p1_f32 v22, v0, 1, 4, [m0] ; C8581100 v_interp_p2_f32 v22, [v22], v1, 1, 4, [m0] ; C8591101 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[23:26], 15, 0, 0, 0, 0, 0, 0, 0, v[19:20], s[4:11], s[0:3] ; F0800F00 00011713 image_sample v[17:20], 15, 0, 0, 0, 0, 0, 0, 0, v[17:18], s[4:11], s[0:3] ; F0800F00 00011111 image_sample v[27:30], 15, 0, 0, 0, 0, 0, 0, 0, v[15:16], s[4:11], s[0:3] ; F0800F00 00011B0F image_sample v[13:16], 15, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[4:11], s[0:3] ; F0800F00 00010D0D image_sample v[31:34], 15, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[4:11], s[0:3] ; F0800F00 00011F0B image_sample v[9:12], 15, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[4:11], s[0:3] ; F0800F00 00010909 image_sample v[35:38], 15, 0, 0, 0, 0, 0, 0, 0, v[7:8], s[4:11], s[0:3] ; F0800F00 00012307 image_sample v[5:8], 15, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[4:11], s[0:3] ; F0800F00 00010505 image_sample v[39:42], 15, 0, 0, 0, 0, 0, 0, 0, v[21:22], s[4:11], s[0:3] ; F0800F00 00012715 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v2, v39 ; 10004F02 v_mul_f32_e32 v1, v2, v40 ; 10025102 v_mul_f32_e32 v21, v2, v41 ; 102A5302 v_mul_f32_e32 v2, v2, v42 ; 10045502 v_mac_f32_e32 v0, v3, v5 ; 3E000B03 v_mac_f32_e32 v1, v3, v6 ; 3E020D03 v_mac_f32_e32 v21, v3, v7 ; 3E2A0F03 v_mac_f32_e32 v2, v3, v8 ; 3E041103 v_mac_f32_e32 v0, v3, v35 ; 3E004703 v_mac_f32_e32 v1, v3, v36 ; 3E024903 v_mac_f32_e32 v21, v3, v37 ; 3E2A4B03 v_mac_f32_e32 v2, v3, v38 ; 3E044D03 v_mac_f32_e32 v0, v4, v9 ; 3E001304 v_mac_f32_e32 v1, v4, v10 ; 3E021504 v_mac_f32_e32 v21, v4, v11 ; 3E2A1704 v_mac_f32_e32 v2, v4, v12 ; 3E041904 v_mac_f32_e32 v0, v4, v31 ; 3E003F04 v_mac_f32_e32 v1, v4, v32 ; 3E024104 v_mac_f32_e32 v21, v4, v33 ; 3E2A4304 v_mac_f32_e32 v2, v4, v34 ; 3E044504 v_mov_b32_e32 v3, 0x3d99999a ; 7E0602FF 3D99999A v_mac_f32_e32 v0, v3, v13 ; 3E001B03 v_mac_f32_e32 v1, v3, v14 ; 3E021D03 v_mac_f32_e32 v21, v3, v15 ; 3E2A1F03 v_mac_f32_e32 v2, v3, v16 ; 3E042103 v_mac_f32_e32 v0, v3, v27 ; 3E003703 v_mac_f32_e32 v1, v3, v28 ; 3E023903 v_mac_f32_e32 v21, v3, v29 ; 3E2A3B03 v_mac_f32_e32 v2, v3, v30 ; 3E043D03 v_mov_b32_e32 v3, 0x3d570a3d ; 7E0602FF 3D570A3D v_mac_f32_e32 v0, v3, v17 ; 3E002303 v_mac_f32_e32 v1, v3, v18 ; 3E022503 v_mac_f32_e32 v21, v3, v19 ; 3E2A2703 v_mac_f32_e32 v2, v3, v20 ; 3E042903 v_mac_f32_e32 v0, v3, v23 ; 3E002F03 v_mac_f32_e32 v1, v3, v24 ; 3E023103 v_mac_f32_e32 v21, v3, v25 ; 3E2A3303 v_mac_f32_e32 v2, v3, v26 ; 3E043503 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v21, v2 ; 5E020515 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 44 Code Size: 444 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL CONST[0..3] DCL TEMP[0..1], LOCAL 0: MUL TEMP[0], CONST[0], IN[0].xxxx 1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0] 4: MOV TEMP[1].xy, IN[1].xyxx 5: MOV OUT[1], TEMP[1] 6: MOV OUT[0], TEMP[0] 7: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = fmul float %13, %33 %44 = fmul float %14, %33 %45 = fmul float %15, %33 %46 = fmul float %16, %33 %47 = fmul float %17, %34 %48 = fadd float %47, %43 %49 = fmul float %18, %34 %50 = fadd float %49, %44 %51 = fmul float %19, %34 %52 = fadd float %51, %45 %53 = fmul float %20, %34 %54 = fadd float %53, %46 %55 = fmul float %21, %35 %56 = fadd float %55, %48 %57 = fmul float %22, %35 %58 = fadd float %57, %50 %59 = fmul float %23, %35 %60 = fadd float %59, %52 %61 = fmul float %24, %35 %62 = fadd float %61, %54 %63 = fmul float %25, %36 %64 = fadd float %63, %56 %65 = fmul float %26, %36 %66 = fadd float %65, %58 %67 = fmul float %27, %36 %68 = fadd float %67, %60 %69 = fmul float %28, %36 %70 = fadd float %69, %62 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %64, float %66, float %68, float %70) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_mov_b32_e32 v1, 0 ; 7E020280 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[0:3], 0x0 ; C2060100 s_buffer_load_dword s13, s[0:3], 0x1 ; C2068101 buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[8:11], 0 idxen ; E00C2000 80020600 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_buffer_load_dword s5, s[0:3], 0x3 ; C2028103 s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104 s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105 s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106 s_buffer_load_dword s9, s[0:3], 0x7 ; C2048107 s_buffer_load_dword s10, s[0:3], 0x8 ; C2050108 s_buffer_load_dword s11, s[0:3], 0x9 ; C2058109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 v_mul_f32_e32 v0, s12, v2 ; 1000040C s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v0, s6, v3 ; 3E000606 v_mul_f32_e32 v8, s13, v2 ; 1010040D v_mac_f32_e32 v8, s7, v3 ; 3E100607 v_mul_f32_e32 v9, s4, v2 ; 10120404 v_mac_f32_e32 v9, s8, v3 ; 3E120608 v_mul_f32_e32 v2, s5, v2 ; 10040405 v_mac_f32_e32 v2, s9, v3 ; 3E040609 v_mac_f32_e32 v0, s10, v4 ; 3E00080A v_mac_f32_e32 v8, s11, v4 ; 3E10080B v_mac_f32_e32 v9, s14, v4 ; 3E12080E v_mac_f32_e32 v2, s15, v4 ; 3E04080F v_mac_f32_e32 v0, s16, v5 ; 3E000A10 v_mac_f32_e32 v8, s17, v5 ; 3E100A11 v_mac_f32_e32 v9, s18, v5 ; 3E120A12 v_mac_f32_e32 v2, s0, v5 ; 3E040A00 exp 15, 32, 0, 0, 0, v6, v7, v1, v1 ; F800020F 01010706 exp 15, 12, 0, 1, 0, v0, v8, v9, v2 ; F80008CF 02090800 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 196 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0], LOCAL 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MOV OUT[0], TEMP[0] 3: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = bitcast float %26 to i32 %29 = bitcast float %27 to i32 %30 = insertelement <2 x i32> undef, i32 %28, i32 0 %31 = insertelement <2 x i32> %30, i32 %29, i32 1 %32 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %31, <32 x i8> %23, <16 x i8> %25, i32 2) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = call i32 @llvm.SI.packf16(float %33, float %34) %38 = bitcast i32 %37 to float %39 = call i32 @llvm.SI.packf16(float %35, float %36) %40 = bitcast i32 %39 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %38, float %40, float %38, float %40) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800F00 00030002 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 4 Code Size: 68 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL CONST[0..3] DCL TEMP[0..1], LOCAL 0: MUL TEMP[0], CONST[0], IN[0].xxxx 1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0] 4: MOV TEMP[1].xy, IN[1].xyxx 5: MOV OUT[1], TEMP[1] 6: MOV OUT[0], TEMP[0] 7: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = fmul float %13, %33 %44 = fmul float %14, %33 %45 = fmul float %15, %33 %46 = fmul float %16, %33 %47 = fmul float %17, %34 %48 = fadd float %47, %43 %49 = fmul float %18, %34 %50 = fadd float %49, %44 %51 = fmul float %19, %34 %52 = fadd float %51, %45 %53 = fmul float %20, %34 %54 = fadd float %53, %46 %55 = fmul float %21, %35 %56 = fadd float %55, %48 %57 = fmul float %22, %35 %58 = fadd float %57, %50 %59 = fmul float %23, %35 %60 = fadd float %59, %52 %61 = fmul float %24, %35 %62 = fadd float %61, %54 %63 = fmul float %25, %36 %64 = fadd float %63, %56 %65 = fmul float %26, %36 %66 = fadd float %65, %58 %67 = fmul float %27, %36 %68 = fadd float %67, %60 %69 = fmul float %28, %36 %70 = fadd float %69, %62 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %64, float %66, float %68, float %70) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_mov_b32_e32 v1, 0 ; 7E020280 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[0:3], 0x0 ; C2060100 s_buffer_load_dword s13, s[0:3], 0x1 ; C2068101 buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[8:11], 0 idxen ; E00C2000 80020600 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_buffer_load_dword s5, s[0:3], 0x3 ; C2028103 s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104 s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105 s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106 s_buffer_load_dword s9, s[0:3], 0x7 ; C2048107 s_buffer_load_dword s10, s[0:3], 0x8 ; C2050108 s_buffer_load_dword s11, s[0:3], 0x9 ; C2058109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 v_mul_f32_e32 v0, s12, v2 ; 1000040C s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v0, s6, v3 ; 3E000606 v_mul_f32_e32 v8, s13, v2 ; 1010040D v_mac_f32_e32 v8, s7, v3 ; 3E100607 v_mul_f32_e32 v9, s4, v2 ; 10120404 v_mac_f32_e32 v9, s8, v3 ; 3E120608 v_mul_f32_e32 v2, s5, v2 ; 10040405 v_mac_f32_e32 v2, s9, v3 ; 3E040609 v_mac_f32_e32 v0, s10, v4 ; 3E00080A v_mac_f32_e32 v8, s11, v4 ; 3E10080B v_mac_f32_e32 v9, s14, v4 ; 3E12080E v_mac_f32_e32 v2, s15, v4 ; 3E04080F v_mac_f32_e32 v0, s16, v5 ; 3E000A10 v_mac_f32_e32 v8, s17, v5 ; 3E100A11 v_mac_f32_e32 v9, s18, v5 ; 3E120A12 v_mac_f32_e32 v2, s0, v5 ; 3E040A00 exp 15, 32, 0, 0, 0, v6, v7, v1, v1 ; F800020F 01010706 exp 15, 12, 0, 1, 0, v0, v8, v9, v2 ; F80008CF 02090800 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 196 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0], LOCAL 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MOV OUT[0], TEMP[0] 3: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = bitcast float %26 to i32 %29 = bitcast float %27 to i32 %30 = insertelement <2 x i32> undef, i32 %28, i32 0 %31 = insertelement <2 x i32> %30, i32 %29, i32 1 %32 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %31, <32 x i8> %23, <16 x i8> %25, i32 2) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = call i32 @llvm.SI.packf16(float %33, float %34) %38 = bitcast i32 %37 to float %39 = call i32 @llvm.SI.packf16(float %35, float %36) %40 = bitcast i32 %39 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %38, float %40, float %38, float %40) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800F00 00030002 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 4 Code Size: 68 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL CONST[0..3] DCL TEMP[0], LOCAL 0: MUL TEMP[0], CONST[0], IN[0].xxxx 1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0] 4: MOV OUT[1], IN[1].xyxy 5: MOV OUT[0], TEMP[0] 6: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = fmul float %13, %33 %44 = fmul float %14, %33 %45 = fmul float %15, %33 %46 = fmul float %16, %33 %47 = fmul float %17, %34 %48 = fadd float %47, %43 %49 = fmul float %18, %34 %50 = fadd float %49, %44 %51 = fmul float %19, %34 %52 = fadd float %51, %45 %53 = fmul float %20, %34 %54 = fadd float %53, %46 %55 = fmul float %21, %35 %56 = fadd float %55, %48 %57 = fmul float %22, %35 %58 = fadd float %57, %50 %59 = fmul float %23, %35 %60 = fadd float %59, %52 %61 = fmul float %24, %35 %62 = fadd float %61, %54 %63 = fmul float %25, %36 %64 = fadd float %63, %56 %65 = fmul float %26, %36 %66 = fadd float %65, %58 %67 = fmul float %27, %36 %68 = fadd float %67, %60 %69 = fmul float %28, %36 %70 = fadd float %69, %62 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float %41, float %42) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %64, float %66, float %68, float %70) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[8:11], 0 idxen ; E00C2000 80020500 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s4, v1 ; 10000204 v_mac_f32_e32 v0, s8, v2 ; 3E000408 v_mul_f32_e32 v7, s5, v1 ; 100E0205 v_mac_f32_e32 v7, s9, v2 ; 3E0E0409 v_mul_f32_e32 v8, s6, v1 ; 10100206 v_mac_f32_e32 v8, s10, v2 ; 3E10040A v_mul_f32_e32 v1, s7, v1 ; 10020207 v_mac_f32_e32 v1, s11, v2 ; 3E02040B v_mac_f32_e32 v0, s12, v3 ; 3E00060C v_mac_f32_e32 v7, s13, v3 ; 3E0E060D v_mac_f32_e32 v8, s14, v3 ; 3E10060E v_mac_f32_e32 v1, s15, v3 ; 3E02060F v_mac_f32_e32 v0, s16, v4 ; 3E000810 v_mac_f32_e32 v7, s17, v4 ; 3E0E0811 v_mac_f32_e32 v8, s18, v4 ; 3E100812 v_mac_f32_e32 v1, s0, v4 ; 3E020800 exp 15, 32, 0, 0, 0, v5, v6, v5, v6 ; F800020F 06050605 exp 15, 12, 0, 1, 0, v0, v7, v8, v1 ; F80008CF 01080700 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 192 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL CONST[2] DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[1], 2D 2: MOV TEMP[1].xy, IN[0].zwww 3: TEX TEMP[1], TEMP[1], SAMP[0], 2D 4: MAD TEMP[0], CONST[2].xxxx, TEMP[0], TEMP[1] 5: MOV OUT[0], TEMP[0] 6: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %25 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %26 = load <32 x i8>, <32 x i8> addrspace(2)* %25, align 32, !tbaa !0 %27 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %28 = load <16 x i8>, <16 x i8> addrspace(2)* %27, align 16, !tbaa !0 %29 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %30 = bitcast <8 x i32> addrspace(2)* %29 to <32 x i8> addrspace(2)* %31 = load <32 x i8>, <32 x i8> addrspace(2)* %30, align 32, !tbaa !0 %32 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %33 = bitcast <4 x i32> addrspace(2)* %32 to <16 x i8> addrspace(2)* %34 = load <16 x i8>, <16 x i8> addrspace(2)* %33, align 16, !tbaa !0 %35 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %36 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %37 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %38 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %39 = bitcast float %35 to i32 %40 = bitcast float %36 to i32 %41 = insertelement <2 x i32> undef, i32 %39, i32 0 %42 = insertelement <2 x i32> %41, i32 %40, i32 1 %43 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %42, <32 x i8> %31, <16 x i8> %34, i32 2) %44 = extractelement <4 x float> %43, i32 0 %45 = extractelement <4 x float> %43, i32 1 %46 = extractelement <4 x float> %43, i32 2 %47 = extractelement <4 x float> %43, i32 3 %48 = bitcast float %37 to i32 %49 = bitcast float %38 to i32 %50 = insertelement <2 x i32> undef, i32 %48, i32 0 %51 = insertelement <2 x i32> %50, i32 %49, i32 1 %52 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %51, <32 x i8> %26, <16 x i8> %28, i32 2) %53 = extractelement <4 x float> %52, i32 0 %54 = extractelement <4 x float> %52, i32 1 %55 = extractelement <4 x float> %52, i32 2 %56 = extractelement <4 x float> %52, i32 3 %57 = fmul float %24, %44 %58 = fadd float %57, %53 %59 = fmul float %24, %45 %60 = fadd float %59, %54 %61 = fmul float %24, %46 %62 = fadd float %61, %55 %63 = fmul float %24, %47 %64 = fadd float %63, %56 %65 = call i32 @llvm.SI.packf16(float %58, float %60) %66 = bitcast i32 %65 to float %67 = call i32 @llvm.SI.packf16(float %62, float %64) %68 = bitcast i32 %67 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %66, float %68, float %66, float %68) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500 s_load_dwordx4 s[16:19], s[4:5], 0x4 ; C0880504 s_mov_b32 m0, s9 ; BEFC0309 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s0, s[0:3], 0x8 ; C2000108 s_load_dwordx8 s[20:27], s[6:7], 0x8 ; C0CA0708 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[20:27], s[16:19] ; F0800F00 00850002 image_sample v[4:7], 15, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[4:11], s[12:15] ; F0800F00 00610404 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, s0, v0, v4 ; D2820000 04120000 v_mad_f32 v1, s0, v1, v5 ; D2820001 04160200 v_mad_f32 v2, s0, v2, v6 ; D2820002 041A0400 v_mac_f32_e32 v7, s0, v3 ; 3E0E0600 v_cvt_pkrtz_f16_f32_e32 v2, v2, v7 ; 5E040F02 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 exp 15, 0, 1, 1, 1, v0, v2, v0, v2 ; F8001C0F 02000200 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 8 Code Size: 140 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL CONST[0..4] DCL TEMP[0..4], LOCAL IMM[0] FLT32 { 0.5000, 4.0000, 0.0000, 0.0000} 0: MUL TEMP[0].xy, CONST[0].xyyy, IMM[0].xxxx 1: ADD TEMP[1].xy, IN[1].xyyy, -TEMP[0].xyyy 2: ADD TEMP[2].xy, IN[1].xyyy, TEMP[0].xyyy 3: MOV TEMP[1].zw, TEMP[2].yyxy 4: MUL TEMP[2].xy, -CONST[0].xyyy, IMM[0].xxxx 5: MOV TEMP[2].zw, TEMP[0].yyxy 6: MUL TEMP[3].xy, TEMP[2].xyyy, IMM[0].yyyy 7: MUL TEMP[0].xy, TEMP[0].xyyy, IMM[0].yyyy 8: MOV TEMP[3].zw, TEMP[0].yyxy 9: MUL TEMP[0], CONST[1], IN[0].xxxx 10: MAD TEMP[0], CONST[2], IN[0].yyyy, TEMP[0] 11: MAD TEMP[0], CONST[3], IN[0].zzzz, TEMP[0] 12: MAD TEMP[0], CONST[4], IN[0].wwww, TEMP[0] 13: MOV TEMP[4].xy, IN[1].xyxx 14: MOV OUT[1], TEMP[1] 15: MOV OUT[4], TEMP[4] 16: MOV OUT[2], TEMP[2] 17: MOV OUT[3], TEMP[3] 18: MOV OUT[0], TEMP[0] 19: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %31 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %32 = load <16 x i8>, <16 x i8> addrspace(2)* %31, align 16, !tbaa !0 %33 = add i32 %5, %7 %34 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %32, i32 0, i32 %33) %35 = extractelement <4 x float> %34, i32 0 %36 = extractelement <4 x float> %34, i32 1 %37 = extractelement <4 x float> %34, i32 2 %38 = extractelement <4 x float> %34, i32 3 %39 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %40 = load <16 x i8>, <16 x i8> addrspace(2)* %39, align 16, !tbaa !0 %41 = add i32 %5, %7 %42 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %40, i32 0, i32 %41) %43 = extractelement <4 x float> %42, i32 0 %44 = extractelement <4 x float> %42, i32 1 %45 = fmul float %13, 5.000000e-01 %46 = fmul float %14, 5.000000e-01 %47 = fsub float %43, %45 %48 = fsub float %44, %46 %49 = fadd float %43, %45 %50 = fadd float %44, %46 %51 = fmul float %13, -5.000000e-01 %52 = fmul float %14, -5.000000e-01 %53 = fmul float %51, 4.000000e+00 %54 = fmul float %52, 4.000000e+00 %55 = fmul float %45, 4.000000e+00 %56 = fmul float %46, 4.000000e+00 %57 = fmul float %15, %35 %58 = fmul float %16, %35 %59 = fmul float %17, %35 %60 = fmul float %18, %35 %61 = fmul float %19, %36 %62 = fadd float %61, %57 %63 = fmul float %20, %36 %64 = fadd float %63, %58 %65 = fmul float %21, %36 %66 = fadd float %65, %59 %67 = fmul float %22, %36 %68 = fadd float %67, %60 %69 = fmul float %23, %37 %70 = fadd float %69, %62 %71 = fmul float %24, %37 %72 = fadd float %71, %64 %73 = fmul float %25, %37 %74 = fadd float %73, %66 %75 = fmul float %26, %37 %76 = fadd float %75, %68 %77 = fmul float %27, %38 %78 = fadd float %77, %70 %79 = fmul float %28, %38 %80 = fadd float %79, %72 %81 = fmul float %29, %38 %82 = fadd float %81, %74 %83 = fmul float %30, %38 %84 = fadd float %83, %76 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %47, float %48, float %49, float %50) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %51, float %52, float %45, float %46) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %53, float %54, float %55, float %56) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %43, float %44, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %78, float %80, float %82, float %84) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100 s_buffer_load_dword s9, s[0:3], 0x1 ; C2048101 s_buffer_load_dword s10, s[0:3], 0x4 ; C2050104 s_buffer_load_dword s11, s[0:3], 0x5 ; C2058105 s_buffer_load_dword s16, s[0:3], 0x6 ; C2080106 buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mad_f32 v0, 0.5, -s8, v5 ; D2820000 441410F0 v_mad_f32 v7, 0.5, -s9, v6 ; D2820007 441812F0 v_mad_f32 v8, 0.5, s8, v5 ; D2820008 041410F0 v_mad_f32 v9, 0.5, s9, v6 ; D2820009 041812F0 exp 15, 32, 0, 0, 0, v0, v7, v8, v9 ; F800020F 09080700 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e64 v0, 0.5, s8 ; D2100000 000010F0 v_mul_f32_e64 v7, -0.5, s8 ; D2100007 000010F1 v_mul_f32_e64 v8, 0.5, s9 ; D2100008 000012F0 v_mul_f32_e64 v9, -0.5, s9 ; D2100009 000012F1 exp 15, 33, 0, 0, 0, v7, v9, v0, v8 ; F800021F 08000907 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v7, 4.0, v7 ; 100E0EF6 v_mul_f32_e32 v9, 4.0, v9 ; 101212F6 v_mul_f32_e32 v0, 4.0, v0 ; 100000F6 v_mul_f32_e32 v8, 4.0, v8 ; 101010F6 exp 15, 34, 0, 0, 0, v7, v9, v0, v8 ; F800022F 08000907 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v0, 0 ; 7E000280 exp 15, 35, 0, 0, 0, v5, v6, v0, v0 ; F800023F 00000605 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_buffer_load_dword s5, s[0:3], 0x8 ; C2028108 s_buffer_load_dword s6, s[0:3], 0x9 ; C2030109 s_buffer_load_dword s7, s[0:3], 0xa ; C203810A s_buffer_load_dword s8, s[0:3], 0xb ; C204010B s_buffer_load_dword s9, s[0:3], 0xc ; C204810C s_buffer_load_dword s12, s[0:3], 0xd ; C206010D s_buffer_load_dword s13, s[0:3], 0xe ; C206810E s_buffer_load_dword s14, s[0:3], 0xf ; C207010F s_buffer_load_dword s15, s[0:3], 0x10 ; C2078110 s_buffer_load_dword s17, s[0:3], 0x11 ; C2088111 s_buffer_load_dword s18, s[0:3], 0x12 ; C2090112 s_buffer_load_dword s0, s[0:3], 0x13 ; C2000113 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s10, v1 ; 1000020A s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v0, s5, v2 ; 3E000405 v_mul_f32_e32 v5, s11, v1 ; 100A020B v_mac_f32_e32 v5, s6, v2 ; 3E0A0406 v_mul_f32_e32 v6, s16, v1 ; 100C0210 v_mac_f32_e32 v6, s7, v2 ; 3E0C0407 v_mul_f32_e32 v1, s4, v1 ; 10020204 v_mac_f32_e32 v1, s8, v2 ; 3E020408 v_mac_f32_e32 v0, s9, v3 ; 3E000609 v_mac_f32_e32 v5, s12, v3 ; 3E0A060C v_mac_f32_e32 v6, s13, v3 ; 3E0C060D v_mac_f32_e32 v1, s14, v3 ; 3E02060E v_mac_f32_e32 v0, s15, v4 ; 3E00080F v_mac_f32_e32 v5, s17, v4 ; 3E0A0811 v_mac_f32_e32 v6, s18, v4 ; 3E0C0812 v_mac_f32_e32 v1, s0, v4 ; 3E020800 exp 15, 12, 0, 1, 0, v0, v5, v6, v1 ; F80008CF 01060500 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 324 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[0] DCL CONST[2..4] DCL TEMP[0..8], LOCAL IMM[0] FLT32 { 0.0000, 2.0000, 0.0026, -2.0000} IMM[1] FLT32 { 0.2500, 0.5000, 1.0000, 0.0000} IMM[2] UINT32 {4294967295, 0, 0, 0} 0: MOV TEMP[0].xy, IN[0].xyyy 1: MOV TEMP[0].w, IMM[0].xxxx 2: TXL TEMP[0].xyz, TEMP[0], SAMP[0], 2D 3: MUL TEMP[0].xyz, TEMP[0].xyzz, CONST[0].xyzz 4: ADD TEMP[1].x, TEMP[0].xxxx, TEMP[0].zzzz 5: MUL TEMP[1].x, TEMP[0].yyyy, TEMP[1].xxxx 6: SQRT TEMP[1].x, TEMP[1].xxxx 7: MUL TEMP[1].x, IMM[0].yyyy, TEMP[1].xxxx 8: ADD TEMP[2].x, TEMP[0].xxxx, TEMP[0].yyyy 9: ADD TEMP[0].x, TEMP[2].xxxx, TEMP[0].zzzz 10: MAD TEMP[0].x, TEMP[1].xxxx, CONST[0].wwww, TEMP[0].xxxx 11: MOV TEMP[1].xy, IN[0].xwww 12: MOV TEMP[1].w, IMM[0].xxxx 13: TXL TEMP[1].xyz, TEMP[1], SAMP[0], 2D 14: MUL TEMP[1].xyz, TEMP[1].xyzz, CONST[0].xyzz 15: ADD TEMP[2].x, TEMP[1].xxxx, TEMP[1].zzzz 16: MUL TEMP[2].x, TEMP[1].yyyy, TEMP[2].xxxx 17: SQRT TEMP[2].x, TEMP[2].xxxx 18: MUL TEMP[2].x, IMM[0].yyyy, TEMP[2].xxxx 19: ADD TEMP[3].x, TEMP[1].xxxx, TEMP[1].yyyy 20: ADD TEMP[1].x, TEMP[3].xxxx, TEMP[1].zzzz 21: MAD TEMP[1].x, TEMP[2].xxxx, CONST[0].wwww, TEMP[1].xxxx 22: MOV TEMP[2].xy, IN[0].zyyy 23: MOV TEMP[2].w, IMM[0].xxxx 24: TXL TEMP[2].xyz, TEMP[2], SAMP[0], 2D 25: MUL TEMP[2].xyz, TEMP[2].xyzz, CONST[0].xyzz 26: MOV TEMP[3].xy, IN[0].zwww 27: MOV TEMP[3].w, IMM[0].xxxx 28: TXL TEMP[3].xyz, TEMP[3], SAMP[0], 2D 29: MUL TEMP[3].xyz, TEMP[3].xyzz, CONST[0].xyzz 30: ADD TEMP[4].x, TEMP[3].xxxx, TEMP[3].zzzz 31: MUL TEMP[4].x, TEMP[3].yyyy, TEMP[4].xxxx 32: SQRT TEMP[4].x, TEMP[4].xxxx 33: MUL TEMP[4].x, IMM[0].yyyy, TEMP[4].xxxx 34: ADD TEMP[5].x, TEMP[3].xxxx, TEMP[3].yyyy 35: ADD TEMP[3].x, TEMP[5].xxxx, TEMP[3].zzzz 36: MAD TEMP[3].x, TEMP[4].xxxx, CONST[0].wwww, TEMP[3].xxxx 37: MOV TEMP[4].xy, IN[3].xyyy 38: MOV TEMP[4].w, IMM[0].xxxx 39: TXL TEMP[4].xyz, TEMP[4], SAMP[0], 2D 40: MUL TEMP[5].xyz, TEMP[4].xyzz, CONST[0].xyzz 41: ADD TEMP[6].x, TEMP[5].xxxx, TEMP[5].zzzz 42: MUL TEMP[6].x, TEMP[5].yyyy, TEMP[6].xxxx 43: SQRT TEMP[6].x, TEMP[6].xxxx 44: MUL TEMP[6].x, IMM[0].yyyy, TEMP[6].xxxx 45: ADD TEMP[7].x, TEMP[5].xxxx, TEMP[5].yyyy 46: ADD TEMP[5].x, TEMP[7].xxxx, TEMP[5].zzzz 47: MAD TEMP[5].x, TEMP[6].xxxx, CONST[0].wwww, TEMP[5].xxxx 48: ADD TEMP[6].x, TEMP[2].xxxx, TEMP[2].zzzz 49: MUL TEMP[6].x, TEMP[2].yyyy, TEMP[6].xxxx 50: SQRT TEMP[6].x, TEMP[6].xxxx 51: MUL TEMP[6].x, IMM[0].yyyy, TEMP[6].xxxx 52: ADD TEMP[7].x, TEMP[2].xxxx, TEMP[2].yyyy 53: ADD TEMP[2].x, TEMP[7].xxxx, TEMP[2].zzzz 54: MAD TEMP[2].x, TEMP[6].xxxx, CONST[0].wwww, TEMP[2].xxxx 55: ADD TEMP[2].x, TEMP[2].xxxx, IMM[0].zzzz 56: MAX TEMP[6].x, TEMP[2].xxxx, TEMP[3].xxxx 57: MAX TEMP[7].x, TEMP[0].xxxx, TEMP[1].xxxx 58: MAX TEMP[6].x, TEMP[6].xxxx, TEMP[7].xxxx 59: MIN TEMP[7].x, TEMP[2].xxxx, TEMP[3].xxxx 60: MIN TEMP[8].x, TEMP[0].xxxx, TEMP[1].xxxx 61: MIN TEMP[7].x, TEMP[7].xxxx, TEMP[8].xxxx 62: MUL TEMP[8].x, TEMP[6].xxxx, CONST[3].xxxx 63: MAX TEMP[8].x, CONST[2].xxxx, TEMP[8].xxxx 64: ADD TEMP[1].x, TEMP[1].xxxx, -TEMP[2].xxxx 65: MAX TEMP[2].x, TEMP[6].xxxx, TEMP[5].xxxx 66: MIN TEMP[5].x, TEMP[7].xxxx, TEMP[5].xxxx 67: ADD TEMP[2].x, TEMP[2].xxxx, -TEMP[5].xxxx 68: ADD TEMP[0].x, TEMP[3].xxxx, -TEMP[0].xxxx 69: FSLT TEMP[2].x, TEMP[2].xxxx, TEMP[8].xxxx 70: UIF TEMP[2].xxxx :0 71: MOV TEMP[2].xyz, TEMP[4].xyzx 72: ELSE :0 73: ADD TEMP[3].x, TEMP[1].xxxx, TEMP[0].xxxx 74: ADD TEMP[0].x, TEMP[1].xxxx, -TEMP[0].xxxx 75: MOV TEMP[3].y, TEMP[0].xxxx 76: DP2 TEMP[0].x, TEMP[3].xyyy, TEMP[3].xyyy 77: RSQ TEMP[0].x, TEMP[0].xxxx 78: MUL TEMP[0].xy, TEMP[3].xyyy, TEMP[0].xxxx 79: MUL TEMP[3].xy, TEMP[0].xyyy, IN[1].zwww 80: ADD TEMP[1].xy, IN[3].xyyy, -TEMP[3].xyyy 81: MAD TEMP[3].xy, TEMP[0].xyyy, IN[1].zwww, IN[3].xyyy 82: ABS TEMP[4].x, TEMP[0].xxxx 83: ABS TEMP[5].x, TEMP[0].yyyy 84: MIN TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx 85: MUL TEMP[4].x, TEMP[4].xxxx, CONST[4].xxxx 86: RCP TEMP[4].x, TEMP[4].xxxx 87: MUL TEMP[0].xy, TEMP[0].xyyy, TEMP[4].xxxx 88: MAX TEMP[0].xy, TEMP[0].xyyy, IMM[0].wwww 89: MIN TEMP[0].xy, TEMP[0].xyyy, IMM[0].yyyy 90: MUL TEMP[5].xy, TEMP[0].xyyy, IN[2].zwww 91: ADD TEMP[4].xy, IN[3].xyyy, -TEMP[5].xyyy 92: MAD TEMP[0].xy, TEMP[0].xyyy, IN[2].zwww, IN[3].xyyy 93: MOV TEMP[1].xy, TEMP[1].xyyy 94: MOV TEMP[1].w, IMM[0].xxxx 95: TXL TEMP[1].xyz, TEMP[1], SAMP[0], 2D 96: MOV TEMP[3].xy, TEMP[3].xyyy 97: MOV TEMP[3].w, IMM[0].xxxx 98: TXL TEMP[3].xyz, TEMP[3], SAMP[0], 2D 99: ADD TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xyzz 100: MOV TEMP[3].xy, TEMP[4].xyyy 101: MOV TEMP[3].w, IMM[0].xxxx 102: TXL TEMP[3].xyz, TEMP[3], SAMP[0], 2D 103: MOV TEMP[0].xy, TEMP[0].xyyy 104: MOV TEMP[0].w, IMM[0].xxxx 105: TXL TEMP[0].xyz, TEMP[0], SAMP[0], 2D 106: ADD TEMP[0].xyz, TEMP[3].xyzz, TEMP[0].xyzz 107: MUL TEMP[0].xyz, TEMP[0].xyzz, IMM[1].xxxx 108: MAD TEMP[0].xyz, TEMP[1].xyzz, IMM[1].xxxx, TEMP[0].xyzz 109: MUL TEMP[3].xyz, TEMP[1].xyzz, CONST[0].xyzz 110: ADD TEMP[4].x, TEMP[3].xxxx, TEMP[3].zzzz 111: MUL TEMP[4].x, TEMP[3].yyyy, TEMP[4].xxxx 112: SQRT TEMP[4].x, TEMP[4].xxxx 113: MUL TEMP[4].x, IMM[0].yyyy, TEMP[4].xxxx 114: ADD TEMP[5].x, TEMP[3].xxxx, TEMP[3].yyyy 115: ADD TEMP[3].x, TEMP[5].xxxx, TEMP[3].zzzz 116: MAD TEMP[3].x, TEMP[4].xxxx, CONST[0].wwww, TEMP[3].xxxx 117: FSLT TEMP[3].x, TEMP[3].xxxx, TEMP[7].xxxx 118: UIF TEMP[3].xxxx :0 119: MOV TEMP[3].x, IMM[2].xxxx 120: ELSE :0 121: MUL TEMP[4].xyz, TEMP[0].xyzz, CONST[0].xyzz 122: ADD TEMP[5].x, TEMP[4].xxxx, TEMP[4].zzzz 123: MUL TEMP[5].x, TEMP[4].yyyy, TEMP[5].xxxx 124: SQRT TEMP[5].x, TEMP[5].xxxx 125: MUL TEMP[5].x, IMM[0].yyyy, TEMP[5].xxxx 126: ADD TEMP[7].x, TEMP[4].xxxx, TEMP[4].yyyy 127: ADD TEMP[4].x, TEMP[7].xxxx, TEMP[4].zzzz 128: MAD TEMP[4].x, TEMP[5].xxxx, CONST[0].wwww, TEMP[4].xxxx 129: FSLT TEMP[3].x, TEMP[6].xxxx, TEMP[4].xxxx 130: ENDIF 131: UIF TEMP[3].xxxx :0 132: MUL TEMP[2].xyz, TEMP[1].xyzz, IMM[1].yyyy 133: ELSE :0 134: MOV TEMP[2].xyz, TEMP[0].xyzx 135: ENDIF 136: ENDIF 137: MOV TEMP[0].w, IMM[1].zzzz 138: MOV TEMP[0].xyz, TEMP[2].xyzx 139: MOV OUT[0], TEMP[0] 140: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %30 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %31 = load <8 x i32>, <8 x i32> addrspace(2)* %30, align 32, !tbaa !0 %32 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %33 = load <4 x i32>, <4 x i32> addrspace(2)* %32, align 16, !tbaa !0 %34 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %35 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %36 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %37 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %38 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %39 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %40 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %41 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %42 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %43 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %44 = bitcast float %34 to i32 %45 = bitcast float %35 to i32 %46 = insertelement <4 x i32> undef, i32 %44, i32 0 %47 = insertelement <4 x i32> %46, i32 %45, i32 1 %48 = insertelement <4 x i32> %47, i32 0, i32 2 %49 = bitcast <8 x i32> %31 to <32 x i8> %50 = bitcast <4 x i32> %33 to <16 x i8> %51 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %48, <32 x i8> %49, <16 x i8> %50, i32 2) %52 = extractelement <4 x float> %51, i32 0 %53 = extractelement <4 x float> %51, i32 1 %54 = extractelement <4 x float> %51, i32 2 %55 = fmul float %52, %24 %56 = fmul float %53, %25 %57 = fmul float %54, %26 %58 = fadd float %55, %57 %59 = fmul float %56, %58 %60 = call float @llvm.sqrt.f32(float %59) %61 = fmul float %60, 2.000000e+00 %62 = fadd float %55, %56 %63 = fadd float %62, %57 %64 = fmul float %61, %27 %65 = fadd float %64, %63 %66 = bitcast float %34 to i32 %67 = bitcast float %37 to i32 %68 = insertelement <4 x i32> undef, i32 %66, i32 0 %69 = insertelement <4 x i32> %68, i32 %67, i32 1 %70 = insertelement <4 x i32> %69, i32 0, i32 2 %71 = bitcast <8 x i32> %31 to <32 x i8> %72 = bitcast <4 x i32> %33 to <16 x i8> %73 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %70, <32 x i8> %71, <16 x i8> %72, i32 2) %74 = extractelement <4 x float> %73, i32 0 %75 = extractelement <4 x float> %73, i32 1 %76 = extractelement <4 x float> %73, i32 2 %77 = fmul float %74, %24 %78 = fmul float %75, %25 %79 = fmul float %76, %26 %80 = fadd float %77, %79 %81 = fmul float %78, %80 %82 = call float @llvm.sqrt.f32(float %81) %83 = fmul float %82, 2.000000e+00 %84 = fadd float %77, %78 %85 = fadd float %84, %79 %86 = fmul float %83, %27 %87 = fadd float %86, %85 %88 = bitcast float %36 to i32 %89 = bitcast float %35 to i32 %90 = insertelement <4 x i32> undef, i32 %88, i32 0 %91 = insertelement <4 x i32> %90, i32 %89, i32 1 %92 = insertelement <4 x i32> %91, i32 0, i32 2 %93 = bitcast <8 x i32> %31 to <32 x i8> %94 = bitcast <4 x i32> %33 to <16 x i8> %95 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %92, <32 x i8> %93, <16 x i8> %94, i32 2) %96 = extractelement <4 x float> %95, i32 0 %97 = extractelement <4 x float> %95, i32 1 %98 = extractelement <4 x float> %95, i32 2 %99 = fmul float %96, %24 %100 = fmul float %97, %25 %101 = fmul float %98, %26 %102 = bitcast float %36 to i32 %103 = bitcast float %37 to i32 %104 = insertelement <4 x i32> undef, i32 %102, i32 0 %105 = insertelement <4 x i32> %104, i32 %103, i32 1 %106 = insertelement <4 x i32> %105, i32 0, i32 2 %107 = bitcast <8 x i32> %31 to <32 x i8> %108 = bitcast <4 x i32> %33 to <16 x i8> %109 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %106, <32 x i8> %107, <16 x i8> %108, i32 2) %110 = extractelement <4 x float> %109, i32 0 %111 = extractelement <4 x float> %109, i32 1 %112 = extractelement <4 x float> %109, i32 2 %113 = fmul float %110, %24 %114 = fmul float %111, %25 %115 = fmul float %112, %26 %116 = fadd float %113, %115 %117 = fmul float %114, %116 %118 = call float @llvm.sqrt.f32(float %117) %119 = fmul float %118, 2.000000e+00 %120 = fadd float %113, %114 %121 = fadd float %120, %115 %122 = fmul float %119, %27 %123 = fadd float %122, %121 %124 = bitcast float %42 to i32 %125 = bitcast float %43 to i32 %126 = insertelement <4 x i32> undef, i32 %124, i32 0 %127 = insertelement <4 x i32> %126, i32 %125, i32 1 %128 = insertelement <4 x i32> %127, i32 0, i32 2 %129 = bitcast <8 x i32> %31 to <32 x i8> %130 = bitcast <4 x i32> %33 to <16 x i8> %131 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %128, <32 x i8> %129, <16 x i8> %130, i32 2) %132 = extractelement <4 x float> %131, i32 0 %133 = extractelement <4 x float> %131, i32 1 %134 = extractelement <4 x float> %131, i32 2 %135 = fmul float %132, %24 %136 = fmul float %133, %25 %137 = fmul float %134, %26 %138 = fadd float %135, %137 %139 = fmul float %136, %138 %140 = call float @llvm.sqrt.f32(float %139) %141 = fmul float %140, 2.000000e+00 %142 = fadd float %135, %136 %143 = fadd float %142, %137 %144 = fmul float %141, %27 %145 = fadd float %144, %143 %146 = fadd float %99, %101 %147 = fmul float %100, %146 %148 = call float @llvm.sqrt.f32(float %147) %149 = fmul float %148, 2.000000e+00 %150 = fadd float %99, %100 %151 = fadd float %150, %101 %152 = fmul float %149, %27 %153 = fadd float %152, %151 %154 = fadd float %153, 0x3F65555580000000 %155 = call float @llvm.maxnum.f32(float %154, float %123) %156 = call float @llvm.maxnum.f32(float %65, float %87) %157 = call float @llvm.maxnum.f32(float %155, float %156) %158 = call float @llvm.minnum.f32(float %154, float %123) %159 = call float @llvm.minnum.f32(float %65, float %87) %160 = call float @llvm.minnum.f32(float %158, float %159) %161 = fmul float %157, %29 %162 = call float @llvm.maxnum.f32(float %28, float %161) %163 = fsub float %87, %154 %164 = call float @llvm.maxnum.f32(float %157, float %145) %165 = call float @llvm.minnum.f32(float %160, float %145) %166 = fsub float %164, %165 %167 = fsub float %123, %65 %168 = fcmp olt float %166, %162 br i1 %168, label %ENDIF, label %ELSE ELSE: ; preds = %main_body %169 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %170 = fadd float %163, %167 %171 = fsub float %163, %167 %172 = fmul float %170, %170 %173 = fmul float %171, %171 %174 = fadd float %172, %173 %175 = call float @llvm.AMDGPU.rsq.clamped.f32(float %174) %176 = fmul float %170, %175 %177 = fmul float %171, %175 %178 = fmul float %176, %38 %179 = fmul float %177, %39 %180 = fsub float %42, %178 %181 = fsub float %43, %179 %182 = fmul float %176, %38 %183 = fadd float %182, %42 %184 = fmul float %177, %39 %185 = fadd float %184, %43 %186 = call float @llvm.fabs.f32(float %176) %187 = call float @llvm.fabs.f32(float %177) %188 = call float @llvm.minnum.f32(float %186, float %187) %189 = fmul float %188, %169 %190 = fdiv float 1.000000e+00, %189 %191 = fmul float %176, %190 %192 = fmul float %177, %190 %193 = call float @llvm.maxnum.f32(float %191, float -2.000000e+00) %194 = call float @llvm.maxnum.f32(float %192, float -2.000000e+00) %195 = call float @llvm.minnum.f32(float %193, float 2.000000e+00) %196 = call float @llvm.minnum.f32(float %194, float 2.000000e+00) %197 = fmul float %195, %40 %198 = fmul float %196, %41 %199 = fsub float %42, %197 %200 = fsub float %43, %198 %201 = fmul float %195, %40 %202 = fadd float %201, %42 %203 = fmul float %196, %41 %204 = fadd float %203, %43 %205 = bitcast float %180 to i32 %206 = bitcast float %181 to i32 %207 = insertelement <4 x i32> undef, i32 %205, i32 0 %208 = insertelement <4 x i32> %207, i32 %206, i32 1 %209 = insertelement <4 x i32> %208, i32 0, i32 2 %210 = bitcast <8 x i32> %31 to <32 x i8> %211 = bitcast <4 x i32> %33 to <16 x i8> %212 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %209, <32 x i8> %210, <16 x i8> %211, i32 2) %213 = extractelement <4 x float> %212, i32 0 %214 = extractelement <4 x float> %212, i32 1 %215 = extractelement <4 x float> %212, i32 2 %216 = bitcast float %183 to i32 %217 = bitcast float %185 to i32 %218 = insertelement <4 x i32> undef, i32 %216, i32 0 %219 = insertelement <4 x i32> %218, i32 %217, i32 1 %220 = insertelement <4 x i32> %219, i32 0, i32 2 %221 = bitcast <8 x i32> %31 to <32 x i8> %222 = bitcast <4 x i32> %33 to <16 x i8> %223 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %220, <32 x i8> %221, <16 x i8> %222, i32 2) %224 = extractelement <4 x float> %223, i32 0 %225 = extractelement <4 x float> %223, i32 1 %226 = extractelement <4 x float> %223, i32 2 %227 = fadd float %213, %224 %228 = fadd float %214, %225 %229 = fadd float %215, %226 %230 = bitcast float %199 to i32 %231 = bitcast float %200 to i32 %232 = insertelement <4 x i32> undef, i32 %230, i32 0 %233 = insertelement <4 x i32> %232, i32 %231, i32 1 %234 = insertelement <4 x i32> %233, i32 0, i32 2 %235 = bitcast <8 x i32> %31 to <32 x i8> %236 = bitcast <4 x i32> %33 to <16 x i8> %237 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %234, <32 x i8> %235, <16 x i8> %236, i32 2) %238 = extractelement <4 x float> %237, i32 0 %239 = extractelement <4 x float> %237, i32 1 %240 = extractelement <4 x float> %237, i32 2 %241 = bitcast float %202 to i32 %242 = bitcast float %204 to i32 %243 = insertelement <4 x i32> undef, i32 %241, i32 0 %244 = insertelement <4 x i32> %243, i32 %242, i32 1 %245 = insertelement <4 x i32> %244, i32 0, i32 2 %246 = bitcast <8 x i32> %31 to <32 x i8> %247 = bitcast <4 x i32> %33 to <16 x i8> %248 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %245, <32 x i8> %246, <16 x i8> %247, i32 2) %249 = extractelement <4 x float> %248, i32 0 %250 = extractelement <4 x float> %248, i32 1 %251 = extractelement <4 x float> %248, i32 2 %252 = fadd float %238, %249 %253 = fadd float %239, %250 %254 = fadd float %240, %251 %255 = fmul float %252, 2.500000e-01 %256 = fmul float %253, 2.500000e-01 %257 = fmul float %254, 2.500000e-01 %258 = fmul float %227, 2.500000e-01 %259 = fadd float %258, %255 %260 = fmul float %228, 2.500000e-01 %261 = fadd float %260, %256 %262 = fmul float %229, 2.500000e-01 %263 = fadd float %262, %257 %264 = fmul float %227, %24 %265 = fmul float %228, %25 %266 = fmul float %229, %26 %267 = fadd float %264, %266 %268 = fmul float %265, %267 %269 = call float @llvm.sqrt.f32(float %268) %270 = fmul float %269, 2.000000e+00 %271 = fadd float %264, %265 %272 = fadd float %271, %266 %273 = fmul float %270, %27 %274 = fadd float %273, %272 %275 = fcmp olt float %274, %160 br i1 %275, label %ENDIF36, label %ELSE38 ENDIF: ; preds = %IF40, %ENDIF36, %main_body %temp10.0 = phi float [ %134, %main_body ], [ %294, %IF40 ], [ %263, %ENDIF36 ] %temp9.0 = phi float [ %133, %main_body ], [ %293, %IF40 ], [ %261, %ENDIF36 ] %temp8.0 = phi float [ %132, %main_body ], [ %292, %IF40 ], [ %259, %ENDIF36 ] %276 = call i32 @llvm.SI.packf16(float %temp8.0, float %temp9.0) %277 = bitcast i32 %276 to float %278 = call i32 @llvm.SI.packf16(float %temp10.0, float 1.000000e+00) %279 = bitcast i32 %278 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %277, float %279, float %277, float %279) ret void ELSE38: ; preds = %ELSE %280 = fmul float %259, %24 %281 = fmul float %261, %25 %282 = fmul float %263, %26 %283 = fadd float %280, %282 %284 = fmul float %281, %283 %285 = call float @llvm.sqrt.f32(float %284) %286 = fmul float %285, 2.000000e+00 %287 = fadd float %280, %281 %288 = fadd float %287, %282 %289 = fmul float %286, %27 %290 = fadd float %289, %288 %291 = fcmp olt float %157, %290 br label %ENDIF36 ENDIF36: ; preds = %ELSE, %ELSE38 %temp12.0 = phi i1 [ %291, %ELSE38 ], [ true, %ELSE ] br i1 %temp12.0, label %IF40, label %ENDIF IF40: ; preds = %ENDIF36 %292 = fmul float %227, 5.000000e-01 %293 = fmul float %228, 5.000000e-01 %294 = fmul float %229, 5.000000e-01 br label %ENDIF } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_interp_p1_f32 v4, v0, 0, 0, [m0] ; C8100000 v_interp_p2_f32 v4, [v4], v1, 0, 0, [m0] ; C8110001 v_interp_p1_f32 v5, v0, 1, 0, [m0] ; C8140100 v_interp_p2_f32 v5, [v5], v1, 1, 0, [m0] ; C8150101 v_interp_p1_f32 v13, v0, 2, 0, [m0] ; C8340200 v_interp_p2_f32 v13, [v13], v1, 2, 0, [m0] ; C8350201 v_interp_p1_f32 v7, v0, 3, 0, [m0] ; C81C0300 s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 v_interp_p2_f32 v7, [v7], v1, 3, 0, [m0] ; C81D0301 v_mov_b32_e32 v6, 0 ; 7E0C0280 v_interp_p1_f32 v2, v0, 0, 3, [m0] ; C8080C00 v_interp_p2_f32 v2, [v2], v1, 0, 3, [m0] ; C8090C01 v_mov_b32_e32 v8, v4 ; 7E100304 v_mov_b32_e32 v9, v5 ; 7E120305 v_mov_b32_e32 v10, v6 ; 7E140306 v_mov_b32_e32 v11, v7 ; 7E160307 v_interp_p1_f32 v3, v0, 1, 3, [m0] ; C80C0D00 v_mov_b32_e32 v9, v7 ; 7E120307 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s18, s[0:3], 0x2 ; C2090102 s_buffer_load_dword s16, s[0:3], 0x0 ; C2080100 s_buffer_load_dword s17, s[0:3], 0x1 ; C2088101 v_interp_p2_f32 v3, [v3], v1, 1, 3, [m0] ; C80D0D01 v_mov_b32_e32 v10, v6 ; 7E140306 v_mov_b32_e32 v14, v5 ; 7E1C0305 image_sample_l v[16:18], 7, 0, 0, 0, 0, 0, 0, 0, v[4:7], s[4:11], s[12:15] ; F0900700 00611004 image_sample_l v[19:21], 7, 0, 0, 0, 0, 0, 0, 0, v[8:11], s[4:11], s[12:15] ; F0900700 00611308 v_mov_b32_e32 v15, v6 ; 7E1E0306 s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 image_sample_l v[22:24], 7, 0, 0, 0, 0, 0, 0, 0, v[13:16], s[4:11], s[12:15] ; F0900700 0061160D v_mov_b32_e32 v14, v7 ; 7E1C0307 v_mov_b32_e32 v15, v6 ; 7E1E0306 v_mov_b32_e32 v4, v6 ; 7E080306 s_buffer_load_dword s19, s[0:3], 0x3 ; C2098103 s_buffer_load_dword s20, s[0:3], 0x8 ; C20A0108 v_mul_f32_e32 v5, s18, v18 ; 100A2412 v_mac_f32_e32 v5, s16, v16 ; 3E0A2010 v_mul_f32_e32 v10, s17, v17 ; 10142211 v_mul_f32_e32 v5, v5, v10 ; 100A1505 v_mac_f32_e32 v10, s16, v16 ; 3E142010 v_mac_f32_e32 v10, s18, v18 ; 3E142412 s_waitcnt vmcnt(1) ; BF8C0771 v_mul_f32_e32 v6, s18, v21 ; 100C2A12 v_mac_f32_e32 v6, s16, v19 ; 3E0C2610 v_mul_f32_e32 v12, s17, v20 ; 10182811 v_mul_f32_e32 v6, v6, v12 ; 100C1906 v_mac_f32_e32 v12, s16, v19 ; 3E182610 v_mac_f32_e32 v12, s18, v21 ; 3E182A12 image_sample_l v[7:9], 7, 0, 0, 0, 0, 0, 0, 0, v[13:16], s[4:11], s[12:15] ; F0900700 0061070D s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v13, s18, v9 ; 101A1212 v_mac_f32_e32 v13, s16, v7 ; 3E1A0E10 v_mul_f32_e32 v11, s17, v8 ; 10161011 v_mul_f32_e32 v8, v13, v11 ; 1010170D v_mac_f32_e32 v11, s16, v7 ; 3E160E10 v_mac_f32_e32 v11, s18, v9 ; 3E161212 v_mul_f32_e32 v7, s18, v24 ; 100E3012 v_mac_f32_e32 v7, s16, v22 ; 3E0E2C10 v_mul_f32_e32 v13, s17, v23 ; 101A2E11 v_mul_f32_e32 v14, v7, v13 ; 101C1B07 v_mac_f32_e32 v13, s16, v22 ; 3E1A2C10 v_mac_f32_e32 v13, s18, v24 ; 3E1A3012 v_sqrt_f32_e32 v5, v5 ; 7E0A6705 v_add_f32_e32 v5, v5, v5 ; 060A0B05 v_mac_f32_e32 v10, s19, v5 ; 3E140A13 v_sqrt_f32_e32 v5, v6 ; 7E0A6706 v_add_f32_e32 v5, v5, v5 ; 060A0B05 v_mac_f32_e32 v12, s19, v5 ; 3E180A13 v_sqrt_f32_e32 v5, v8 ; 7E0A6708 v_add_f32_e32 v5, v5, v5 ; 060A0B05 v_mac_f32_e32 v11, s19, v5 ; 3E160A13 image_sample_l v[7:9], 7, 0, 0, 0, 0, 0, 0, 0, v[2:5], s[4:11], s[12:15] ; F0900700 00610702 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v4, s18, v9 ; 10081212 v_mac_f32_e32 v4, s16, v7 ; 3E080E10 v_mul_f32_e32 v15, s17, v8 ; 101E1011 v_mul_f32_e32 v4, v4, v15 ; 10081F04 v_sqrt_f32_e32 v4, v4 ; 7E086704 v_add_f32_e32 v4, v4, v4 ; 06080904 s_buffer_load_dword s21, s[0:3], 0xc ; C20A810C v_mac_f32_e32 v15, s16, v7 ; 3E1E0E10 v_mac_f32_e32 v15, s18, v9 ; 3E1E1212 v_mac_f32_e32 v15, s19, v4 ; 3E1E0813 v_sqrt_f32_e32 v4, v14 ; 7E08670E v_add_f32_e32 v4, v4, v4 ; 06080904 v_mac_f32_e32 v13, s19, v4 ; 3E1A0813 v_add_f32_e32 v4, 0x3b2aaaac, v13 ; 06081AFF 3B2AAAAC v_max_f32_e32 v5, v12, v10 ; 200A150C v_max3_f32 v5, v4, v11, v5 ; D2A80005 04161704 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s21, v5 ; 100C0A15 v_max_f32_e32 v13, s20, v6 ; 201A0C14 v_min_f32_e32 v6, v12, v10 ; 1E0C150C v_min3_f32 v6, v4, v11, v6 ; D2A20006 041A1704 v_max_f32_e32 v14, v15, v5 ; 201C0B0F v_min_f32_e32 v15, v15, v6 ; 1E1E0D0F v_subrev_f32_e32 v14, v15, v14 ; 0A1C1D0F v_cmp_nlt_f32_e32 vcc, v14, v13 ; 7C1C1B0E s_and_saveexec_b64 s[20:21], vcc ; BE94246A s_xor_b64 s[20:21], exec, s[20:21] ; 8994147E s_cbranch_execz BB0_5 ; BF880000 v_subrev_f32_e32 v4, v4, v12 ; 0A081904 v_interp_p1_f32 v7, v0, 2, 1, [m0] ; C81C0600 v_interp_p2_f32 v7, [v7], v1, 2, 1, [m0] ; C81D0601 v_interp_p1_f32 v8, v0, 3, 1, [m0] ; C8200700 v_interp_p2_f32 v8, [v8], v1, 3, 1, [m0] ; C8210701 v_interp_p1_f32 v9, v0, 2, 2, [m0] ; C8240A00 v_subrev_f32_e32 v10, v10, v11 ; 0A14170A s_buffer_load_dword s22, s[0:3], 0x10 ; C20B0110 v_add_f32_e32 v11, v10, v4 ; 0616090A v_subrev_f32_e32 v4, v10, v4 ; 0A08090A v_mul_f32_e32 v10, v4, v4 ; 10140904 v_mac_f32_e32 v10, v11, v11 ; 3E14170B v_rsq_clamp_f32_e32 v10, v10 ; 7E14590A v_interp_p2_f32 v9, [v9], v1, 2, 2, [m0] ; C8250A01 v_interp_p1_f32 v0, v0, 3, 2, [m0] ; C8000B00 v_interp_p2_f32 v0, [v0], v1, 3, 2, [m0] ; C8010B01 v_mul_f32_e32 v1, v10, v11 ; 1002170A v_mul_f32_e32 v4, v10, v4 ; 1008090A v_mad_f32 v11, -v1, v7, v2 ; D282000B 240A0F01 v_min_f32_e64 v10, |v1|, |v4| ; D21E030A 00020901 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v10, s22, v10 ; 10141416 v_rcp_f32_e32 v10, v10 ; 7E14550A v_mad_f32 v12, -v4, v8, v3 ; D282000C 240E1104 v_mad_f32 v14, v7, v1, v2 ; D282000E 040A0307 v_mad_f32 v15, v8, v4, v3 ; D282000F 040E0908 v_mul_f32_e32 v1, v10, v1 ; 1002030A v_mul_f32_e32 v4, v10, v4 ; 1008090A v_max_f32_e32 v1, -2.0, v1 ; 200202F5 v_max_f32_e32 v4, -2.0, v4 ; 200808F5 v_min_f32_e32 v1, 2.0, v1 ; 1E0202F4 v_min_f32_e32 v4, 2.0, v4 ; 1E0808F4 v_mad_f32 v17, -v1, v9, v2 ; D2820011 240A1301 v_mad_f32 v18, -v4, v0, v3 ; D2820012 240E0104 v_mac_f32_e32 v2, v9, v1 ; 3E040309 v_mov_b32_e32 v13, 0 ; 7E1A0280 v_mac_f32_e32 v3, v0, v4 ; 3E060900 v_mov_b32_e32 v16, v13 ; 7E20030D image_sample_l v[7:9], 7, 0, 0, 0, 0, 0, 0, 0, v[11:14], s[4:11], s[12:15] ; F0900700 0061070B image_sample_l v[10:12], 7, 0, 0, 0, 0, 0, 0, 0, v[14:17], s[4:11], s[12:15] ; F0900700 00610A0E s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v0, v10, v7 ; 06000F0A v_add_f32_e32 v1, v11, v8 ; 0602110B v_add_f32_e32 v10, v12, v9 ; 0614130C v_mov_b32_e32 v19, v13 ; 7E26030D v_mov_b32_e32 v4, v13 ; 7E08030D image_sample_l v[7:9], 7, 0, 0, 0, 0, 0, 0, 0, v[17:20], s[4:11], s[12:15] ; F0900700 00610711 image_sample_l v[2:4], 7, 0, 0, 0, 0, 0, 0, 0, v[2:5], s[4:11], s[12:15] ; F0900700 00610202 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v2, v2, v7 ; 06040F02 v_add_f32_e32 v3, v3, v8 ; 06061103 v_add_f32_e32 v4, v4, v9 ; 06081304 v_mov_b32_e32 v11, 0x3e800000 ; 7E1602FF 3E800000 v_mul_f32_e32 v7, v11, v2 ; 100E050B v_mul_f32_e32 v8, v11, v3 ; 1010070B v_mul_f32_e32 v9, v11, v4 ; 1012090B v_mac_f32_e32 v7, v11, v0 ; 3E0E010B v_mac_f32_e32 v8, v11, v1 ; 3E10030B v_mac_f32_e32 v9, v11, v10 ; 3E12150B v_mul_f32_e32 v2, s17, v1 ; 10040211 v_mul_f32_e32 v3, s18, v10 ; 10061412 v_mac_f32_e32 v3, s16, v0 ; 3E060010 v_mul_f32_e32 v3, v3, v2 ; 10060503 v_sqrt_f32_e32 v3, v3 ; 7E066703 v_add_f32_e32 v3, v3, v3 ; 06060703 v_mac_f32_e32 v2, s16, v0 ; 3E040010 v_mac_f32_e32 v2, s18, v10 ; 3E041412 v_mac_f32_e32 v2, s19, v3 ; 3E040613 v_cmp_nlt_f32_e32 vcc, v2, v6 ; 7C1C0D02 v_mov_b32_e32 v2, -1 ; 7E0402C1 s_and_saveexec_b64 s[22:23], vcc ; BE96246A s_xor_b64 s[22:23], exec, s[22:23] ; 8996167E s_cbranch_execz BB0_6 ; BF880000 v_mul_f32_e32 v2, s17, v8 ; 10041011 v_mul_f32_e32 v3, s18, v9 ; 10061212 v_mac_f32_e32 v3, s16, v7 ; 3E060E10 v_mul_f32_e32 v3, v3, v2 ; 10060503 v_sqrt_f32_e32 v3, v3 ; 7E066703 v_add_f32_e32 v3, v3, v3 ; 06060703 v_mac_f32_e32 v2, s16, v7 ; 3E040E10 v_mac_f32_e32 v2, s18, v9 ; 3E041212 v_mac_f32_e32 v2, s19, v3 ; 3E040613 v_cmp_lt_f32_e32 vcc, v5, v2 ; 7C020505 v_cndmask_b32_e64 v2, 0, -1, vcc ; D2000002 01A98280 s_or_b64 exec, exec, s[22:23] ; 88FE167E v_cmp_ne_i32_e32 vcc, 0, v2 ; 7D0A0480 s_and_saveexec_b64 s[22:23], vcc ; BE96246A s_xor_b64 s[22:23], exec, s[22:23] ; 8996167E v_mul_f32_e32 v7, 0.5, v0 ; 100E00F0 v_mul_f32_e32 v8, 0.5, v1 ; 101002F0 v_mul_f32_e32 v9, 0.5, v10 ; 101214F0 s_or_b64 exec, exec, s[22:23] ; 88FE167E s_or_b64 exec, exec, s[20:21] ; 88FE147E v_cvt_pkrtz_f16_f32_e32 v0, v7, v8 ; 5E001107 v_cvt_pkrtz_f16_f32_e64 v1, v9, 1.0 ; D25E0001 0001E509 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 28 Code Size: 896 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL CONST[0..3] DCL TEMP[0..1], LOCAL 0: MUL TEMP[0], CONST[0], IN[0].xxxx 1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0] 4: MOV TEMP[1].xy, IN[1].xyxx 5: MOV OUT[1], TEMP[1] 6: MOV OUT[0], TEMP[0] 7: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = fmul float %13, %33 %44 = fmul float %14, %33 %45 = fmul float %15, %33 %46 = fmul float %16, %33 %47 = fmul float %17, %34 %48 = fadd float %47, %43 %49 = fmul float %18, %34 %50 = fadd float %49, %44 %51 = fmul float %19, %34 %52 = fadd float %51, %45 %53 = fmul float %20, %34 %54 = fadd float %53, %46 %55 = fmul float %21, %35 %56 = fadd float %55, %48 %57 = fmul float %22, %35 %58 = fadd float %57, %50 %59 = fmul float %23, %35 %60 = fadd float %59, %52 %61 = fmul float %24, %35 %62 = fadd float %61, %54 %63 = fmul float %25, %36 %64 = fadd float %63, %56 %65 = fmul float %26, %36 %66 = fadd float %65, %58 %67 = fmul float %27, %36 %68 = fadd float %67, %60 %69 = fmul float %28, %36 %70 = fadd float %69, %62 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %64, float %66, float %68, float %70) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_mov_b32_e32 v1, 0 ; 7E020280 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[0:3], 0x0 ; C2060100 s_buffer_load_dword s13, s[0:3], 0x1 ; C2068101 buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[8:11], 0 idxen ; E00C2000 80020600 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_buffer_load_dword s5, s[0:3], 0x3 ; C2028103 s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104 s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105 s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106 s_buffer_load_dword s9, s[0:3], 0x7 ; C2048107 s_buffer_load_dword s10, s[0:3], 0x8 ; C2050108 s_buffer_load_dword s11, s[0:3], 0x9 ; C2058109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 v_mul_f32_e32 v0, s12, v2 ; 1000040C s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v0, s6, v3 ; 3E000606 v_mul_f32_e32 v8, s13, v2 ; 1010040D v_mac_f32_e32 v8, s7, v3 ; 3E100607 v_mul_f32_e32 v9, s4, v2 ; 10120404 v_mac_f32_e32 v9, s8, v3 ; 3E120608 v_mul_f32_e32 v2, s5, v2 ; 10040405 v_mac_f32_e32 v2, s9, v3 ; 3E040609 v_mac_f32_e32 v0, s10, v4 ; 3E00080A v_mac_f32_e32 v8, s11, v4 ; 3E10080B v_mac_f32_e32 v9, s14, v4 ; 3E12080E v_mac_f32_e32 v2, s15, v4 ; 3E04080F v_mac_f32_e32 v0, s16, v5 ; 3E000A10 v_mac_f32_e32 v8, s17, v5 ; 3E100A11 v_mac_f32_e32 v9, s18, v5 ; 3E120A12 v_mac_f32_e32 v2, s0, v5 ; 3E040A00 exp 15, 32, 0, 0, 0, v6, v7, v1, v1 ; F800020F 01010706 exp 15, 12, 0, 1, 0, v0, v8, v9, v2 ; F80008CF 02090800 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 196 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL CONST[0] DCL CONST[3..4] DCL TEMP[0..4], LOCAL IMM[0] FLT32 { 0.5000, 2.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MUL TEMP[1].xyz, TEMP[0].xyzz, CONST[0].xyzz 3: MOV TEMP[2].y, IMM[0].xxxx 4: ADD TEMP[3].x, TEMP[1].xxxx, TEMP[1].zzzz 5: MUL TEMP[3].x, TEMP[1].yyyy, TEMP[3].xxxx 6: SQRT TEMP[3].x, TEMP[3].xxxx 7: MUL TEMP[3].x, IMM[0].yyyy, TEMP[3].xxxx 8: ADD TEMP[4].x, TEMP[1].xxxx, TEMP[1].yyyy 9: ADD TEMP[1].x, TEMP[4].xxxx, TEMP[1].zzzz 10: MAD TEMP[1].x, TEMP[3].xxxx, CONST[0].wwww, TEMP[1].xxxx 11: ADD TEMP[2].x, TEMP[1].xxxx, CONST[3].xxxx 12: MOV TEMP[1].xy, TEMP[2].xyyy 13: TEX TEMP[1].xyz, TEMP[1], SAMP[1], 2D 14: MOV TEMP[2].w, TEMP[0].wwww 15: LRP TEMP[2].xyz, CONST[4].xxxx, TEMP[0].xyzz, TEMP[1].xyzz 16: MOV OUT[0], TEMP[2] 17: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %30 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %31 = load <32 x i8>, <32 x i8> addrspace(2)* %30, align 32, !tbaa !0 %32 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %33 = load <16 x i8>, <16 x i8> addrspace(2)* %32, align 16, !tbaa !0 %34 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %35 = bitcast <8 x i32> addrspace(2)* %34 to <32 x i8> addrspace(2)* %36 = load <32 x i8>, <32 x i8> addrspace(2)* %35, align 32, !tbaa !0 %37 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %38 = bitcast <4 x i32> addrspace(2)* %37 to <16 x i8> addrspace(2)* %39 = load <16 x i8>, <16 x i8> addrspace(2)* %38, align 16, !tbaa !0 %40 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %41 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %42 = bitcast float %40 to i32 %43 = bitcast float %41 to i32 %44 = insertelement <2 x i32> undef, i32 %42, i32 0 %45 = insertelement <2 x i32> %44, i32 %43, i32 1 %46 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %45, <32 x i8> %31, <16 x i8> %33, i32 2) %47 = extractelement <4 x float> %46, i32 0 %48 = extractelement <4 x float> %46, i32 1 %49 = extractelement <4 x float> %46, i32 2 %50 = extractelement <4 x float> %46, i32 3 %51 = fmul float %47, %24 %52 = fmul float %48, %25 %53 = fmul float %49, %26 %54 = fadd float %51, %53 %55 = fmul float %52, %54 %56 = call float @llvm.sqrt.f32(float %55) %57 = fmul float %56, 2.000000e+00 %58 = fadd float %51, %52 %59 = fadd float %58, %53 %60 = fmul float %57, %27 %61 = fadd float %60, %59 %62 = fadd float %61, %28 %63 = bitcast float %62 to i32 %64 = insertelement <2 x i32> undef, i32 %63, i32 0 %65 = insertelement <2 x i32> %64, i32 1056964608, i32 1 %66 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %65, <32 x i8> %36, <16 x i8> %39, i32 2) %67 = extractelement <4 x float> %66, i32 0 %68 = extractelement <4 x float> %66, i32 1 %69 = extractelement <4 x float> %66, i32 2 %70 = call float @llvm.AMDGPU.lrp(float %29, float %47, float %67) %71 = call float @llvm.AMDGPU.lrp(float %29, float %48, float %68) %72 = call float @llvm.AMDGPU.lrp(float %29, float %49, float %69) %73 = call i32 @llvm.SI.packf16(float %70, float %71) %74 = bitcast i32 %73 to float %75 = call i32 @llvm.SI.packf16(float %72, float %50) %76 = bitcast i32 %75 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %74, float %76, float %74, float %76) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500 s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x1 ; C2040101 s_buffer_load_dword s9, s[0:3], 0x2 ; C2048102 s_buffer_load_dword s10, s[0:3], 0x0 ; C2050100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 s_load_dwordx4 s[24:27], s[4:5], 0x4 ; C08C0504 s_load_dwordx8 s[28:35], s[6:7], 0x8 ; C0CE0708 image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[16:23], s[12:15] ; F0800F00 00640002 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_buffer_load_dword s5, s[0:3], 0xc ; C202810C s_buffer_load_dword s0, s[0:3], 0x10 ; C2000110 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v4, s8, v1 ; 10080208 v_mul_f32_e32 v5, s9, v2 ; 100A0409 v_mac_f32_e32 v5, s10, v0 ; 3E0A000A v_mul_f32_e32 v5, v5, v4 ; 100A0905 v_mac_f32_e32 v4, s10, v0 ; 3E08000A v_mac_f32_e32 v4, s9, v2 ; 3E080409 v_sqrt_f32_e32 v5, v5 ; 7E0A6705 v_add_f32_e32 v5, v5, v5 ; 060A0B05 v_mac_f32_e32 v4, s4, v5 ; 3E080A04 v_add_f32_e32 v4, s5, v4 ; 06080805 v_mov_b32_e32 v5, 0.5 ; 7E0A02F0 image_sample v[4:6], 7, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[28:35], s[24:27] ; F0800700 00C70404 v_sub_f32_e64 v7, 1.0, s0 ; D2080007 000000F2 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v4, v4, v7 ; 10080F04 v_mul_f32_e32 v5, v5, v7 ; 100A0F05 v_mul_f32_e32 v6, v6, v7 ; 100C0F06 v_mac_f32_e32 v4, s0, v0 ; 3E080000 v_mac_f32_e32 v5, s0, v1 ; 3E0A0200 v_mac_f32_e32 v6, s0, v2 ; 3E0C0400 v_cvt_pkrtz_f16_f32_e32 v0, v6, v3 ; 5E000706 v_cvt_pkrtz_f16_f32_e32 v1, v4, v5 ; 5E020B04 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 8 Code Size: 192 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL OUT[5], GENERIC[4] DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[0..14] DCL CONST[16..19] DCL TEMP[0..7], LOCAL IMM[0] FLT32 { 0.2500, -1.0000, 10.0000, 0.4999} IMM[1] INT32 {256, 0, 1, 2} IMM[2] FLT32 { 1.0000, 0.0000, 0.1000, 0.0039} IMM[3] FLT32 { 16.0000, -8.0000, 4.0000, -2.0000} IMM[4] INT32 {4, 0, 0, 0} 0: MUL TEMP[0].x, IN[2].xxxx, IMM[0].xxxx 1: F2I TEMP[0].x, TEMP[0].xxxx 2: F2I TEMP[1].x, IN[2].yyyy 3: IDIV TEMP[2].x, TEMP[1].xxxx, IMM[1].xxxx 4: I2F TEMP[3].x, TEMP[0].xxxx 5: I2F TEMP[4].x, TEMP[2].xxxx 6: MOV TEMP[3].y, TEMP[4].xxxx 7: UMUL TEMP[2].x, TEMP[2].xxxx, IMM[1].xxxx 8: INEG TEMP[2].x, TEMP[2].xxxx 9: UADD TEMP[2].x, TEMP[1].xxxx, TEMP[2].xxxx 10: I2F TEMP[2].x, TEMP[2].xxxx 11: MOV TEMP[3].z, TEMP[2].xxxx 12: ADD TEMP[2].xyz, TEMP[3].xyzz, IMM[0].yyyy 13: I2F TEMP[1].x, TEMP[1].xxxx 14: ADD TEMP[1].x, IN[2].yyyy, -TEMP[1].xxxx 15: MAD TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz, IMM[0].wwww 16: F2I TEMP[1].x, TEMP[1].xxxx 17: USEQ TEMP[4].x, TEMP[1].xxxx, IMM[1].yyyy 18: AND TEMP[4].x, TEMP[4].xxxx, IMM[2].xxxx 19: USEQ TEMP[5].x, TEMP[1].xxxx, IMM[1].zzzz 20: AND TEMP[5].x, TEMP[5].xxxx, IMM[2].xxxx 21: MOV TEMP[4].y, TEMP[5].xxxx 22: USEQ TEMP[1].x, TEMP[1].xxxx, IMM[1].wwww 23: AND TEMP[1].x, TEMP[1].xxxx, IMM[2].xxxx 24: MOV TEMP[4].z, TEMP[1].xxxx 25: MOV TEMP[1].xyz, TEMP[4].xyzx 26: MOV TEMP[4].w, IMM[2].yyyy 27: MOV TEMP[4].xyz, TEMP[3].xyzx 28: MOV TEMP[3].y, IMM[2].yzyy 29: DP4 TEMP[4].x, TEMP[1], TEMP[4] 30: MUL TEMP[3].x, TEMP[4].xxxx, IMM[2].wwww 31: MOV TEMP[3].xy, TEMP[3].xyyy 32: MOV TEMP[3].w, IMM[2].yyyy 33: TXL TEMP[3].xy, TEMP[3], SAMP[0], 2D 34: MAD TEMP[4].x, TEMP[3].xxxx, IMM[3].xxxx, IMM[3].yyyy 35: MOV TEMP[2].w, TEMP[4].xxxx 36: MUL TEMP[3].x, TEMP[3].yyyy, IMM[3].zzzz 37: MOV TEMP[1].w, TEMP[3].xxxx 38: UMUL TEMP[0].x, IMM[4].xxxx, TEMP[0].xxxx 39: I2F TEMP[0].x, TEMP[0].xxxx 40: ADD TEMP[0].x, IN[2].xxxx, -TEMP[0].xxxx 41: ADD TEMP[0].x, TEMP[0].xxxx, IMM[3].wwww 42: MOV TEMP[3].x, CONST[11].xxxx 43: MOV TEMP[3].y, CONST[12].xxxx 44: MOV TEMP[3].z, CONST[13].xxxx 45: MOV TEMP[4].x, CONST[11].yyyy 46: MOV TEMP[4].y, CONST[12].yyyy 47: MOV TEMP[4].z, CONST[13].yyyy 48: MOV TEMP[5].x, CONST[11].zzzz 49: MOV TEMP[5].y, CONST[12].zzzz 50: MOV TEMP[5].z, CONST[13].zzzz 51: MUL TEMP[3].xyz, TEMP[3].xyzz, IN[1].xxxx 52: MAD TEMP[3].xyz, TEMP[4].xyzz, IN[1].yyyy, TEMP[3].xyzz 53: MAD TEMP[0].xyz, TEMP[5].xyzz, TEMP[0].xxxx, TEMP[3].xyzz 54: DP3 TEMP[3].x, TEMP[0].xyzz, TEMP[0].xyzz 55: RSQ TEMP[3].x, TEMP[3].xxxx 56: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[3].xxxx 57: MOV TEMP[3].w, IMM[2].xxxx 58: MOV TEMP[3].xyz, TEMP[0].xyzx 59: DP4 TEMP[4].x, CONST[0], TEMP[3] 60: DP4 TEMP[5].x, CONST[1], TEMP[3] 61: MOV TEMP[4].y, TEMP[5].xxxx 62: DP4 TEMP[3].x, CONST[2], TEMP[3] 63: MOV TEMP[4].z, TEMP[3].xxxx 64: MUL TEMP[3], TEMP[0].xyzz, TEMP[0].yzzx 65: DP4 TEMP[5].x, CONST[3], TEMP[3] 66: DP4 TEMP[6].x, CONST[4], TEMP[3] 67: MOV TEMP[5].y, TEMP[6].xxxx 68: DP4 TEMP[3].x, CONST[5], TEMP[3] 69: MOV TEMP[5].z, TEMP[3].xxxx 70: MUL TEMP[3], CONST[16], IN[0].xxxx 71: MAD TEMP[3], CONST[17], IN[0].yyyy, TEMP[3] 72: MAD TEMP[3], CONST[18], IN[0].zzzz, TEMP[3] 73: MAD TEMP[3], CONST[19], IN[0].wwww, TEMP[3] 74: MUL TEMP[6], CONST[7], IN[0].xxxx 75: MAD TEMP[6], CONST[8], IN[0].yyyy, TEMP[6] 76: MAD TEMP[6], CONST[9], IN[0].zzzz, TEMP[6] 77: MAD TEMP[6].xyz, CONST[10], IN[0].wwww, TEMP[6] 78: MUL TEMP[7].x, TEMP[0].yyyy, TEMP[0].yyyy 79: MAD TEMP[7].x, TEMP[0].xxxx, TEMP[0].xxxx, -TEMP[7].xxxx 80: MAD TEMP[5].xyz, CONST[6].xyzz, TEMP[7].xxxx, TEMP[5].xyzz 81: ADD TEMP[4].xyz, TEMP[5].xyzz, TEMP[4].xyzz 82: MOV TEMP[0].xyz, TEMP[0].xyzx 83: MOV TEMP[0].w, TEMP[6].xxxx 84: MOV TEMP[5].xy, TEMP[6].yzyy 85: MOV TEMP[5].zw, TEMP[4].yyxy 86: MOV TEMP[4].x, TEMP[4].zzzz 87: MOV OUT[5], TEMP[4] 88: MOV OUT[1], TEMP[2] 89: MOV OUT[2], TEMP[1] 90: MOV OUT[4], TEMP[5] 91: MOV OUT[3], TEMP[0] 92: MOV OUT[0], TEMP[3] 93: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260) %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264) %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 268) %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272) %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276) %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280) %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284) %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288) %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292) %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296) %72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300) %73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304) %74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308) %75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312) %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316) %77 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %78 = load <32 x i8>, <32 x i8> addrspace(2)* %77, align 32, !tbaa !0 %79 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %80 = load <16 x i8>, <16 x i8> addrspace(2)* %79, align 16, !tbaa !0 %81 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %82 = load <16 x i8>, <16 x i8> addrspace(2)* %81, align 16, !tbaa !0 %83 = add i32 %5, %7 %84 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %82, i32 0, i32 %83) %85 = extractelement <4 x float> %84, i32 0 %86 = extractelement <4 x float> %84, i32 1 %87 = extractelement <4 x float> %84, i32 2 %88 = extractelement <4 x float> %84, i32 3 %89 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %90 = load <16 x i8>, <16 x i8> addrspace(2)* %89, align 16, !tbaa !0 %91 = add i32 %5, %7 %92 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %90, i32 0, i32 %91) %93 = extractelement <4 x float> %92, i32 0 %94 = extractelement <4 x float> %92, i32 1 %95 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %96 = load <16 x i8>, <16 x i8> addrspace(2)* %95, align 16, !tbaa !0 %97 = add i32 %5, %7 %98 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %96, i32 0, i32 %97) %99 = extractelement <4 x float> %98, i32 0 %100 = extractelement <4 x float> %98, i32 1 %101 = fmul float %99, 2.500000e-01 %102 = fptosi float %101 to i32 %103 = fptosi float %100 to i32 %104 = sdiv i32 %103, 256 %105 = sitofp i32 %102 to float %106 = sitofp i32 %104 to float %107 = shl nsw i32 %104, 8 %108 = sub i32 %103, %107 %109 = sitofp i32 %108 to float %110 = fadd float %105, -1.000000e+00 %111 = fadd float %106, -1.000000e+00 %112 = fadd float %109, -1.000000e+00 %113 = sitofp i32 %103 to float %114 = fsub float %100, %113 %115 = fmul float %114, 1.000000e+01 %116 = fadd float %115, 0x3FDFFE5CA0000000 %117 = fptosi float %116 to i32 %118 = icmp eq i32 %117, 0 %119 = select i1 %118, float 1.000000e+00, float 0.000000e+00 %120 = icmp eq i32 %117, 1 %121 = select i1 %120, float 1.000000e+00, float 0.000000e+00 %122 = icmp eq i32 %117, 2 %123 = select i1 %122, float 1.000000e+00, float 0.000000e+00 %124 = fmul float %119, %105 %125 = fmul float %121, %106 %126 = fadd float %124, %125 %127 = fmul float %123, %109 %128 = fadd float %126, %127 %129 = fadd float %128, 0.000000e+00 %130 = fmul float %129, 0x3F70101020000000 %131 = bitcast float %130 to i32 %132 = insertelement <4 x i32> undef, i32 %131, i32 0 %133 = insertelement <4 x i32> %132, i32 1036831949, i32 1 %134 = insertelement <4 x i32> %133, i32 0, i32 2 %135 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %134, <32 x i8> %78, <16 x i8> %80, i32 2) %136 = extractelement <4 x float> %135, i32 0 %137 = extractelement <4 x float> %135, i32 1 %138 = fmul float %136, 1.600000e+01 %139 = fadd float %138, -8.000000e+00 %140 = fmul float %137, 4.000000e+00 %141 = shl i32 %102, 2 %142 = sitofp i32 %141 to float %143 = fsub float %99, %142 %144 = fadd float %143, -2.000000e+00 %145 = fmul float %52, %93 %146 = fmul float %55, %93 %147 = fmul float %58, %93 %148 = fmul float %53, %94 %149 = fadd float %148, %145 %150 = fmul float %56, %94 %151 = fadd float %150, %146 %152 = fmul float %59, %94 %153 = fadd float %152, %147 %154 = fmul float %54, %144 %155 = fadd float %154, %149 %156 = fmul float %57, %144 %157 = fadd float %156, %151 %158 = fmul float %60, %144 %159 = fadd float %158, %153 %160 = fmul float %155, %155 %161 = fmul float %157, %157 %162 = fadd float %161, %160 %163 = fmul float %159, %159 %164 = fadd float %162, %163 %165 = call float @llvm.AMDGPU.rsq.clamped.f32(float %164) %166 = fmul float %155, %165 %167 = fmul float %157, %165 %168 = fmul float %159, %165 %169 = fmul float %13, %166 %170 = fmul float %14, %167 %171 = fadd float %169, %170 %172 = fmul float %15, %168 %173 = fadd float %171, %172 %174 = fadd float %173, %16 %175 = fmul float %17, %166 %176 = fmul float %18, %167 %177 = fadd float %175, %176 %178 = fmul float %19, %168 %179 = fadd float %177, %178 %180 = fadd float %179, %20 %181 = fmul float %21, %166 %182 = fmul float %22, %167 %183 = fadd float %181, %182 %184 = fmul float %23, %168 %185 = fadd float %183, %184 %186 = fadd float %185, %24 %187 = fmul float %166, %167 %188 = fmul float %167, %168 %189 = fmul float %168, %168 %190 = fmul float %168, %166 %191 = fmul float %25, %187 %192 = fmul float %26, %188 %193 = fadd float %191, %192 %194 = fmul float %27, %189 %195 = fadd float %193, %194 %196 = fmul float %28, %190 %197 = fadd float %195, %196 %198 = fmul float %29, %187 %199 = fmul float %30, %188 %200 = fadd float %198, %199 %201 = fmul float %31, %189 %202 = fadd float %200, %201 %203 = fmul float %32, %190 %204 = fadd float %202, %203 %205 = fmul float %33, %187 %206 = fmul float %34, %188 %207 = fadd float %205, %206 %208 = fmul float %35, %189 %209 = fadd float %207, %208 %210 = fmul float %36, %190 %211 = fadd float %209, %210 %212 = fmul float %61, %85 %213 = fmul float %62, %85 %214 = fmul float %63, %85 %215 = fmul float %64, %85 %216 = fmul float %65, %86 %217 = fadd float %216, %212 %218 = fmul float %66, %86 %219 = fadd float %218, %213 %220 = fmul float %67, %86 %221 = fadd float %220, %214 %222 = fmul float %68, %86 %223 = fadd float %222, %215 %224 = fmul float %69, %87 %225 = fadd float %224, %217 %226 = fmul float %70, %87 %227 = fadd float %226, %219 %228 = fmul float %71, %87 %229 = fadd float %228, %221 %230 = fmul float %72, %87 %231 = fadd float %230, %223 %232 = fmul float %73, %88 %233 = fadd float %232, %225 %234 = fmul float %74, %88 %235 = fadd float %234, %227 %236 = fmul float %75, %88 %237 = fadd float %236, %229 %238 = fmul float %76, %88 %239 = fadd float %238, %231 %240 = fmul float %40, %85 %241 = fmul float %41, %85 %242 = fmul float %42, %85 %243 = fmul float %43, %86 %244 = fadd float %243, %240 %245 = fmul float %44, %86 %246 = fadd float %245, %241 %247 = fmul float %45, %86 %248 = fadd float %247, %242 %249 = fmul float %46, %87 %250 = fadd float %249, %244 %251 = fmul float %47, %87 %252 = fadd float %251, %246 %253 = fmul float %48, %87 %254 = fadd float %253, %248 %255 = fmul float %49, %88 %256 = fadd float %255, %250 %257 = fmul float %50, %88 %258 = fadd float %257, %252 %259 = fmul float %51, %88 %260 = fadd float %259, %254 %261 = fmul float %167, %167 %262 = fmul float %166, %166 %263 = fsub float %262, %261 %264 = fmul float %37, %263 %265 = fadd float %264, %197 %266 = fmul float %38, %263 %267 = fadd float %266, %204 %268 = fmul float %39, %263 %269 = fadd float %268, %211 %270 = fadd float %265, %174 %271 = fadd float %267, %180 %272 = fadd float %269, %186 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %110, float %111, float %112, float %139) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %119, float %121, float %123, float %140) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %166, float %167, float %168, float %256) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %258, float %260, float %270, float %271) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %272, float %271, float %272, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %233, float %235, float %237, float %239) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[12:15], s[8:9], 0x0 ; C0860900 v_add_i32_e32 v0, s10, v0 ; 4A00000A v_mov_b32_e32 v1, 0x3efff2e5 ; 7E0202FF 3EFFF2E5 s_load_dwordx4 s[16:19], s[8:9], 0x4 ; C0880904 s_load_dwordx4 s[20:23], s[8:9], 0x8 ; C08A0908 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s24, s[0:3], 0x2d ; C20C012D buffer_load_format_xyzw v[2:5], v0, s[12:15], 0 idxen ; E00C2000 80030200 s_buffer_load_dword s10, s[0:3], 0x2e ; C205012E s_buffer_load_dword s13, s[0:3], 0x30 ; C2068130 buffer_load_format_xyzw v[6:9], v0, s[16:19], 0 idxen ; E00C2000 80040600 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[8:11], v0, s[20:23], 0 idxen ; E00C2000 80050800 s_buffer_load_dword s14, s[0:3], 0x31 ; C2070131 s_buffer_load_dword s11, s[0:3], 0x32 ; C2058132 s_buffer_load_dword s15, s[0:3], 0x34 ; C2078134 s_buffer_load_dword s16, s[0:3], 0x35 ; C2080135 s_buffer_load_dword s12, s[0:3], 0x36 ; C2060136 s_buffer_load_dword s9, s[0:3], 0x40 ; C2048140 s_buffer_load_dword s8, s[0:3], 0x41 ; C2040141 s_buffer_load_dword s17, s[0:3], 0x2c ; C208812C s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v0, s17, v6 ; 10000C11 v_mac_f32_e32 v0, s24, v7 ; 3E000E18 v_mul_f32_e32 v10, 0x3e800000, v8 ; 101410FF 3E800000 v_cvt_i32_f32_e32 v11, v9 ; 7E161109 v_cvt_i32_f32_e32 v10, v10 ; 7E14110A v_mul_f32_e32 v12, s13, v6 ; 10180C0D v_mac_f32_e32 v12, s14, v7 ; 3E180E0E v_cvt_f32_i32_e32 v13, v11 ; 7E1A0B0B v_lshlrev_b32_e32 v14, 2, v10 ; 341C1482 v_cvt_f32_i32_e32 v14, v14 ; 7E1C0B0E v_mul_f32_e32 v6, s15, v6 ; 100C0C0F v_mac_f32_e32 v6, s16, v7 ; 3E0C0E10 v_subrev_f32_e32 v7, v13, v9 ; 0A0E130D v_subrev_f32_e32 v8, v14, v8 ; 0A10110E v_madmk_f32_e32 v1, v7, v1, 0x41200000 ; 40020307 41200000 v_ashrrev_i32_e32 v7, 31, v11 ; 300E169F v_lshrrev_b32_e32 v7, 24, v7 ; 2C0E0E98 v_cvt_i32_f32_e32 v1, v1 ; 7E021101 v_add_i32_e32 v7, v11, v7 ; 4A0E0F0B v_and_b32_e32 v9, 0xffffff00, v7 ; 36120EFF FFFFFF00 v_sub_i32_e32 v9, v11, v9 ; 4C12130B v_cmp_eq_i32_e32 vcc, 0, v1 ; 7D040280 v_cndmask_b32_e64 v11, 0, 1.0, vcc ; D200000B 01A9E480 v_cmp_eq_i32_e32 vcc, 1, v1 ; 7D040281 v_cndmask_b32_e64 v13, 0, 1.0, vcc ; D200000D 01A9E480 v_cvt_f32_i32_e32 v10, v10 ; 7E140B0A v_ashrrev_i32_e32 v7, 8, v7 ; 300E0E88 v_cvt_f32_i32_e32 v7, v7 ; 7E0E0B07 v_cvt_f32_i32_e32 v9, v9 ; 7E120B09 s_load_dwordx4 s[16:19], s[4:5], 0x0 ; C0880500 s_load_dwordx8 s[20:27], s[6:7], 0x0 ; C0CA0700 v_cmp_eq_i32_e32 vcc, 2, v1 ; 7D040282 v_cndmask_b32_e64 v1, 0, 1.0, vcc ; D2000001 01A9E480 v_mul_f32_e32 v14, v7, v13 ; 101C1B07 v_mac_f32_e32 v14, v10, v11 ; 3E1C170A v_mac_f32_e32 v14, v9, v1 ; 3E1C0309 v_add_f32_e32 v14, 0, v14 ; 061C1C80 v_mov_b32_e32 v17, 0 ; 7E220280 v_mul_f32_e32 v15, 0x3b808081, v14 ; 101E1CFF 3B808081 v_mov_b32_e32 v16, 0x3dcccccd ; 7E2002FF 3DCCCCCD s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[14:15], 3, 0, 0, 0, 0, 0, 0, 0, v[15:18], s[20:27], s[16:19] ; F0900300 00850E0F v_add_f32_e32 v10, -1.0, v10 ; 061414F3 v_add_f32_e32 v7, -1.0, v7 ; 060E0EF3 v_add_f32_e32 v9, -1.0, v9 ; 061212F3 v_mov_b32_e32 v16, 0xc1000000 ; 7E2002FF C1000000 s_waitcnt vmcnt(0) ; BF8C0770 v_madmk_f32_e32 v14, v14, v16, 0x41800000 ; 401C210E 41800000 exp 15, 32, 0, 0, 0, v10, v7, v9, v14 ; F800020F 0E09070A s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v7, 4.0, v15 ; 100E1EF6 exp 15, 33, 0, 0, 0, v11, v13, v1, v7 ; F800021F 07010D0B s_buffer_load_dword s4, s[0:3], 0x1c ; C202011C s_waitcnt expcnt(0) ; BF8C070F v_add_f32_e32 v1, -2.0, v8 ; 060210F5 v_mac_f32_e32 v0, s10, v1 ; 3E00020A s_buffer_load_dword s5, s[0:3], 0x20 ; C2028120 s_buffer_load_dword s6, s[0:3], 0x1d ; C203011D s_buffer_load_dword s7, s[0:3], 0x21 ; C2038121 v_mac_f32_e32 v12, s11, v1 ; 3E18020B s_buffer_load_dword s10, s[0:3], 0x24 ; C2050124 v_mac_f32_e32 v6, s12, v1 ; 3E0C020C s_buffer_load_dword s11, s[0:3], 0x1e ; C205811E s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s4, v2 ; 10020404 s_buffer_load_dword s4, s[0:3], 0x22 ; C2020122 s_buffer_load_dword s12, s[0:3], 0x25 ; C2060125 v_mac_f32_e32 v1, s5, v3 ; 3E020605 v_mul_f32_e32 v7, s6, v2 ; 100E0406 v_mac_f32_e32 v7, s7, v3 ; 3E0E0607 s_buffer_load_dword s5, s[0:3], 0x28 ; C2028128 v_mac_f32_e32 v1, s10, v4 ; 3E02080A v_mul_f32_e32 v8, v0, v0 ; 10100100 v_mac_f32_e32 v8, v12, v12 ; 3E10190C v_mac_f32_e32 v8, v6, v6 ; 3E100D06 v_rsq_clamp_f32_e32 v8, v8 ; 7E105908 s_buffer_load_dword s6, s[0:3], 0x0 ; C2030100 s_buffer_load_dword s7, s[0:3], 0x1 ; C2038101 s_buffer_load_dword s10, s[0:3], 0x29 ; C2050129 s_buffer_load_dword s13, s[0:3], 0x2a ; C206812A s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v1, s5, v5 ; 3E020A05 v_mul_f32_e32 v0, v8, v0 ; 10000108 v_mul_f32_e32 v9, v8, v12 ; 10121908 v_mul_f32_e32 v6, v8, v6 ; 100C0D08 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_buffer_load_dword s14, s[0:3], 0x2 ; C2070102 s_buffer_load_dword s15, s[0:3], 0x3 ; C2078103 s_buffer_load_dword s16, s[0:3], 0x4 ; C2080104 v_mul_f32_e32 v8, s7, v9 ; 10101207 v_mac_f32_e32 v8, s6, v0 ; 3E100006 s_buffer_load_dword s6, s[0:3], 0x6 ; C2030106 s_buffer_load_dword s7, s[0:3], 0xc ; C203810C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0x7 ; C2090107 s_buffer_load_dword s19, s[0:3], 0x8 ; C2098108 s_buffer_load_dword s20, s[0:3], 0x9 ; C20A0109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v10, s5, v9 ; 10141205 v_mac_f32_e32 v10, s16, v0 ; 3E140010 v_mac_f32_e32 v8, s14, v6 ; 3E100C0E s_buffer_load_dword s5, s[0:3], 0xe ; C202810E v_mac_f32_e32 v10, s6, v6 ; 3E140C06 v_mul_f32_e32 v11, v6, v9 ; 10161306 v_mul_f32_e32 v12, s17, v11 ; 10181611 v_mul_f32_e32 v13, v9, v0 ; 101A0109 v_mac_f32_e32 v12, s7, v13 ; 3E181A07 s_buffer_load_dword s6, s[0:3], 0xf ; C203010F v_add_f32_e32 v8, s15, v8 ; 0610100F s_buffer_load_dword s7, s[0:3], 0x18 ; C2038118 v_mul_f32_e32 v14, v6, v6 ; 101C0D06 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v12, s5, v14 ; 3E181C05 s_buffer_load_dword s5, s[0:3], 0x10 ; C2028110 s_buffer_load_dword s14, s[0:3], 0x11 ; C2070111 s_buffer_load_dword s15, s[0:3], 0x12 ; C2078112 s_buffer_load_dword s16, s[0:3], 0x13 ; C2080113 v_mul_f32_e32 v15, v0, v6 ; 101E0D00 v_mac_f32_e32 v12, s6, v15 ; 3E181E06 v_mul_f32_e32 v16, v9, v9 ; 10201309 v_mad_f32 v16, v0, v0, -v16 ; D2820010 84420100 v_mac_f32_e32 v12, s7, v16 ; 3E182007 s_buffer_load_dword s6, s[0:3], 0x19 ; C2030119 exp 15, 34, 0, 0, 0, v0, v9, v6, v1 ; F800022F 01060900 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_add_f32_e32 v1, v8, v12 ; 06021908 v_mul_f32_e32 v8, s14, v11 ; 1010160E v_mac_f32_e32 v8, s5, v13 ; 3E101A05 s_buffer_load_dword s5, s[0:3], 0x26 ; C2028126 v_add_f32_e32 v10, s18, v10 ; 06141412 v_mac_f32_e32 v8, s15, v14 ; 3E101C0F v_mac_f32_e32 v8, s16, v15 ; 3E101E10 s_buffer_load_dword s7, s[0:3], 0x1a ; C203811A v_mac_f32_e32 v8, s6, v16 ; 3E102006 v_add_f32_e32 v8, v10, v8 ; 0610110A v_mul_f32_e32 v10, s11, v2 ; 1014040B v_mac_f32_e32 v10, s4, v3 ; 3E140604 v_mac_f32_e32 v7, s12, v4 ; 3E0E080C s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v10, s5, v4 ; 3E140805 v_mac_f32_e32 v7, s10, v5 ; 3E0E0A0A s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_buffer_load_dword s5, s[0:3], 0x15 ; C2028115 s_buffer_load_dword s6, s[0:3], 0x14 ; C2030114 s_buffer_load_dword s10, s[0:3], 0x16 ; C2050116 s_buffer_load_dword s11, s[0:3], 0x17 ; C2058117 v_mac_f32_e32 v10, s13, v5 ; 3E140A0D s_buffer_load_dword s12, s[0:3], 0xb ; C206010B exp 15, 35, 0, 0, 0, v7, v10, v1, v8 ; F800023F 08010A07 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v1, s20, v9 ; 10021214 v_mac_f32_e32 v1, s19, v0 ; 3E020013 v_mac_f32_e32 v1, s4, v6 ; 3E020C04 v_mul_f32_e32 v0, s5, v11 ; 10001605 v_mac_f32_e32 v0, s6, v13 ; 3E001A06 v_mac_f32_e32 v0, s10, v14 ; 3E001C0A v_mac_f32_e32 v0, s11, v15 ; 3E001E0B v_mac_f32_e32 v0, s7, v16 ; 3E002007 v_add_f32_e32 v1, s12, v1 ; 0602020C v_add_f32_e32 v0, v1, v0 ; 06000101 exp 15, 36, 0, 0, 0, v0, v8, v0, v17 ; F800024F 11000800 s_buffer_load_dword s4, s[0:3], 0x42 ; C2020142 s_buffer_load_dword s5, s[0:3], 0x43 ; C2028143 s_buffer_load_dword s6, s[0:3], 0x44 ; C2030144 s_buffer_load_dword s7, s[0:3], 0x45 ; C2038145 s_buffer_load_dword s10, s[0:3], 0x46 ; C2050146 s_buffer_load_dword s11, s[0:3], 0x47 ; C2058147 s_buffer_load_dword s12, s[0:3], 0x48 ; C2060148 s_buffer_load_dword s13, s[0:3], 0x49 ; C2068149 s_buffer_load_dword s14, s[0:3], 0x4a ; C207014A s_buffer_load_dword s15, s[0:3], 0x4b ; C207814B s_buffer_load_dword s16, s[0:3], 0x4c ; C208014C s_buffer_load_dword s17, s[0:3], 0x4d ; C208814D s_buffer_load_dword s18, s[0:3], 0x4e ; C209014E s_buffer_load_dword s0, s[0:3], 0x4f ; C200014F s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s9, v2 ; 10000409 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v0, s6, v3 ; 3E000606 v_mul_f32_e32 v1, s8, v2 ; 10020408 v_mac_f32_e32 v1, s7, v3 ; 3E020607 v_mul_f32_e32 v6, s4, v2 ; 100C0404 v_mul_f32_e32 v2, s5, v2 ; 10040405 v_mac_f32_e32 v6, s10, v3 ; 3E0C060A v_mac_f32_e32 v2, s11, v3 ; 3E04060B v_mac_f32_e32 v0, s12, v4 ; 3E00080C v_mac_f32_e32 v1, s13, v4 ; 3E02080D v_mac_f32_e32 v6, s14, v4 ; 3E0C080E v_mac_f32_e32 v2, s15, v4 ; 3E04080F v_mac_f32_e32 v0, s16, v5 ; 3E000A10 v_mac_f32_e32 v1, s17, v5 ; 3E020A11 v_mac_f32_e32 v6, s18, v5 ; 3E0C0A12 v_mac_f32_e32 v2, s0, v5 ; 3E040A00 exp 15, 12, 0, 1, 0, v0, v1, v6, v2 ; F80008CF 02060100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 20 Code Size: 960 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL IN[4], GENERIC[4], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL SAMP[7] DCL SAMP[8] DCL SAMP[9] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL SVIEW[4], 2D, FLOAT DCL SVIEW[5], 2D, FLOAT DCL SVIEW[6], 2D, FLOAT DCL SVIEW[7], 2D, FLOAT DCL SVIEW[8], 2D, FLOAT DCL SVIEW[9], 2D, FLOAT DCL CONST[0..2] DCL CONST[13..21] DCL TEMP[0..38], LOCAL IMM[0] FLT32 { -0.2000, 7.0000, 0.0100, 0.5000} IMM[1] FLT32 { 64.0000, -64.0000, 4.0000, 0.6931} IMM[2] FLT32 { 0.0039, 0.0020, 1.0000, 2.0000} IMM[3] FLT32 { 3.0000, 0.0000, -1.0000, 0.0001} IMM[4] FLT32 { 32.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].x, IN[2].wwww 1: MOV TEMP[0].yz, IN[3].yxyy 2: MOV TEMP[1].xy, IN[3].zwzz 3: MOV TEMP[1].z, IN[4].xxxx 4: DP3 TEMP[2].x, CONST[1].xyzz, CONST[1].xyzz 5: RSQ TEMP[2].x, TEMP[2].xxxx 6: MUL TEMP[2].xyz, CONST[1].xyzz, TEMP[2].xxxx 7: ADD TEMP[3].xyz, CONST[0].xyzz, -TEMP[0].xyzz 8: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz 9: RSQ TEMP[4].x, TEMP[4].xxxx 10: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx 11: ABS TEMP[4].xyz, IN[2].xyzz 12: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz 13: RSQ TEMP[5].x, TEMP[5].xxxx 14: MAD TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx, IMM[0].xxxx 15: MUL TEMP[4].xyz, TEMP[4].xyzz, IMM[0].yyyy 16: MAX TEMP[4].xyz, TEMP[4].xyzz, IMM[0].zzzz 17: ADD TEMP[5].x, TEMP[4].xxxx, TEMP[4].yyyy 18: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[4].zzzz 19: RCP TEMP[5].xyz, TEMP[5].xxxx 20: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xyzz 21: ADD TEMP[5], IN[0], IMM[0].wwww 22: FLR TEMP[5].xyz, TEMP[5] 23: MOV TEMP[6].x, CONST[13].xxxx 24: MUL TEMP[7].x, TEMP[5].xxxx, CONST[13].xxxx 25: MOV TEMP[8].x, TEMP[7].xxxx 26: FLR TEMP[7].x, TEMP[7].xxxx 27: MUL TEMP[7].x, TEMP[7].xxxx, CONST[13].xxxx 28: FSGE TEMP[9].x, TEMP[5].xxxx, IMM[1].xxxx 29: UIF TEMP[9].xxxx :0 30: MOV TEMP[6].x, CONST[14].xxxx 31: ADD TEMP[9].x, TEMP[5].xxxx, IMM[1].yyyy 32: MUL TEMP[9].x, TEMP[9].xxxx, CONST[14].xxxx 33: MOV TEMP[8].x, TEMP[9].xxxx 34: FLR TEMP[10].x, TEMP[9].xxxx 35: MUL TEMP[10].x, TEMP[10].xxxx, CONST[14].xxxx 36: MOV TEMP[7].x, TEMP[10].xxxx 37: FRC TEMP[9].x, TEMP[9].xxxx 38: FRC TEMP[11].x, TEMP[10].xxxx 39: MOV TEMP[9].y, TEMP[11].xxxx 40: FLR TEMP[10].x, TEMP[10].xxxx 41: ADD TEMP[10].x, TEMP[10].xxxx, IMM[1].zzzz 42: MOV TEMP[9].z, TEMP[10].xxxx 43: MOV TEMP[9].xyz, TEMP[9].xyzx 44: ELSE :0 45: FRC TEMP[8].x, TEMP[8].xxxx 46: FRC TEMP[10].x, TEMP[7].xxxx 47: MOV TEMP[8].y, TEMP[10].xxxx 48: FLR TEMP[7].x, TEMP[7].xxxx 49: MOV TEMP[8].z, TEMP[7].xxxx 50: MOV TEMP[9].xyz, TEMP[8].xyzx 51: ENDIF 52: MOV TEMP[7].x, CONST[13].xxxx 53: MUL TEMP[8].x, TEMP[5].yyyy, CONST[13].xxxx 54: MOV TEMP[10].x, TEMP[8].xxxx 55: FLR TEMP[8].x, TEMP[8].xxxx 56: MUL TEMP[8].x, TEMP[8].xxxx, CONST[13].xxxx 57: FSGE TEMP[11].x, TEMP[5].yyyy, IMM[1].xxxx 58: UIF TEMP[11].xxxx :0 59: MOV TEMP[7].x, CONST[14].xxxx 60: ADD TEMP[11].x, TEMP[5].yyyy, IMM[1].yyyy 61: MUL TEMP[11].x, TEMP[11].xxxx, CONST[14].xxxx 62: MOV TEMP[10].x, TEMP[11].xxxx 63: FLR TEMP[12].x, TEMP[11].xxxx 64: MUL TEMP[12].x, TEMP[12].xxxx, CONST[14].xxxx 65: MOV TEMP[8].x, TEMP[12].xxxx 66: FRC TEMP[11].x, TEMP[11].xxxx 67: FRC TEMP[13].x, TEMP[12].xxxx 68: MOV TEMP[11].y, TEMP[13].xxxx 69: FLR TEMP[12].x, TEMP[12].xxxx 70: ADD TEMP[12].x, TEMP[12].xxxx, IMM[1].zzzz 71: MOV TEMP[11].z, TEMP[12].xxxx 72: MOV TEMP[11].xyz, TEMP[11].xyzx 73: ELSE :0 74: FRC TEMP[10].x, TEMP[10].xxxx 75: FRC TEMP[12].x, TEMP[8].xxxx 76: MOV TEMP[10].y, TEMP[12].xxxx 77: FLR TEMP[8].x, TEMP[8].xxxx 78: MOV TEMP[10].z, TEMP[8].xxxx 79: MOV TEMP[11].xyz, TEMP[10].xyzx 80: ENDIF 81: MOV TEMP[8].x, CONST[13].xxxx 82: MUL TEMP[10].x, TEMP[5].zzzz, CONST[13].xxxx 83: MOV TEMP[12].x, TEMP[10].xxxx 84: FLR TEMP[10].x, TEMP[10].xxxx 85: MUL TEMP[10].x, TEMP[10].xxxx, CONST[13].xxxx 86: FSGE TEMP[13].x, TEMP[5].zzzz, IMM[1].xxxx 87: UIF TEMP[13].xxxx :0 88: MOV TEMP[8].x, CONST[14].xxxx 89: ADD TEMP[5].x, TEMP[5].zzzz, IMM[1].yyyy 90: MUL TEMP[5].x, TEMP[5].xxxx, CONST[14].xxxx 91: MOV TEMP[12].x, TEMP[5].xxxx 92: FLR TEMP[13].x, TEMP[5].xxxx 93: MUL TEMP[13].x, TEMP[13].xxxx, CONST[14].xxxx 94: MOV TEMP[10].x, TEMP[13].xxxx 95: FRC TEMP[5].x, TEMP[5].xxxx 96: FRC TEMP[14].x, TEMP[13].xxxx 97: MOV TEMP[5].y, TEMP[14].xxxx 98: FLR TEMP[13].x, TEMP[13].xxxx 99: ADD TEMP[13].x, TEMP[13].xxxx, IMM[1].zzzz 100: MOV TEMP[5].z, TEMP[13].xxxx 101: MOV TEMP[5].xyz, TEMP[5].xyzx 102: ELSE :0 103: FRC TEMP[12].x, TEMP[12].xxxx 104: FRC TEMP[13].x, TEMP[10].xxxx 105: MOV TEMP[12].y, TEMP[13].xxxx 106: FLR TEMP[10].x, TEMP[10].xxxx 107: MOV TEMP[12].z, TEMP[10].xxxx 108: MOV TEMP[5].xyz, TEMP[12].xyzx 109: ENDIF 110: ADD TEMP[10].xyz, TEMP[0].xyzz, -CONST[0].xyzz 111: DP3 TEMP[10].x, TEMP[10].xyzz, TEMP[10].xyzz 112: MUL TEMP[10].x, CONST[19].xxxx, TEMP[10].xxxx 113: LG2 TEMP[10].x, TEMP[10].xxxx 114: MUL TEMP[10].x, TEMP[10].xxxx, IMM[1].wwww 115: MUL TEMP[10].x, TEMP[10].xxxx, CONST[18].xxxx 116: MOV TEMP[12].xy, TEMP[0].xyxx 117: MOV TEMP[13].x, IMM[2].xxxx 118: FSNE TEMP[14].x, CONST[13].xxxx, TEMP[6].xxxx 119: UIF TEMP[14].xxxx :0 120: MOV TEMP[13].x, IMM[2].yyyy 121: RCP TEMP[14].x, CONST[16].xxxx 122: MUL TEMP[12].xy, TEMP[0].xyyy, TEMP[14].xxxx 123: ELSE :0 124: RCP TEMP[14].x, CONST[15].xxxx 125: MUL TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx 126: ENDIF 127: FRC TEMP[12].xy, TEMP[12].xyyy 128: MUL TEMP[14].x, CONST[17].xxxx, IMM[2].wwww 129: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[13].xxxx 130: ADD TEMP[14].x, IMM[2].zzzz, -TEMP[14].xxxx 131: MUL TEMP[13].x, TEMP[13].xxxx, CONST[17].xxxx 132: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx, TEMP[13].xxxx 133: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[6].xxxx, TEMP[9].xyyy 134: MOV TEMP[13].xy, TEMP[12].xyyy 135: MOV TEMP[13].w, TEMP[10].xxxx 136: TXL TEMP[13], TEMP[13], SAMP[8], 2D 137: FSEQ TEMP[14].x, TEMP[9].zzzz, IMM[1].zzzz 138: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz 139: MOV TEMP[15].xy, TEMP[12].xyyy 140: MOV TEMP[15].w, TEMP[10].xxxx 141: TXL TEMP[15], TEMP[15], SAMP[6], 2D 142: FSEQ TEMP[16].x, TEMP[9].zzzz, IMM[3].xxxx 143: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz 144: MOV TEMP[17].xy, TEMP[12].xyyy 145: MOV TEMP[17].w, TEMP[10].xxxx 146: TXL TEMP[17], TEMP[17], SAMP[4], 2D 147: FSEQ TEMP[18].x, TEMP[9].zzzz, IMM[2].wwww 148: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz 149: MOV TEMP[19].xy, TEMP[12].xyyy 150: MOV TEMP[19].w, TEMP[10].xxxx 151: TXL TEMP[19], TEMP[19], SAMP[2], 2D 152: FSEQ TEMP[20].x, TEMP[9].zzzz, IMM[2].zzzz 153: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz 154: MOV TEMP[12].xy, TEMP[12].xyyy 155: MOV TEMP[12].w, TEMP[10].xxxx 156: TXL TEMP[12], TEMP[12], SAMP[0], 2D 157: FSEQ TEMP[21].x, TEMP[9].zzzz, IMM[3].yyyy 158: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz 159: MUL TEMP[12], TEMP[12], TEMP[21].xxxx 160: MAD TEMP[12], TEMP[19], TEMP[20].xxxx, TEMP[12] 161: MAD TEMP[12], TEMP[17], TEMP[18].xxxx, TEMP[12] 162: MAD TEMP[12], TEMP[15], TEMP[16].xxxx, TEMP[12] 163: MAD TEMP[12], TEMP[13], TEMP[14].xxxx, TEMP[12] 164: MOV TEMP[13].xy, IN[3].yxyy 165: MOV TEMP[14].x, IMM[2].xxxx 166: FSNE TEMP[15].x, CONST[13].xxxx, TEMP[6].xxxx 167: UIF TEMP[15].xxxx :0 168: MOV TEMP[14].x, IMM[2].yyyy 169: RCP TEMP[15].x, CONST[16].xxxx 170: MUL TEMP[13].xy, IN[3].yxxx, TEMP[15].xxxx 171: ELSE :0 172: RCP TEMP[15].x, CONST[15].xxxx 173: MUL TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx 174: ENDIF 175: FRC TEMP[13].xy, TEMP[13].xyyy 176: MUL TEMP[15].x, CONST[17].xxxx, IMM[2].wwww 177: MUL TEMP[15].x, TEMP[15].xxxx, TEMP[14].xxxx 178: ADD TEMP[15].x, IMM[2].zzzz, -TEMP[15].xxxx 179: MUL TEMP[14].x, TEMP[14].xxxx, CONST[17].xxxx 180: MAD TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx, TEMP[14].xxxx 181: MAD TEMP[13].xy, TEMP[13].xyyy, TEMP[6].xxxx, TEMP[9].xyyy 182: MOV TEMP[14].xy, TEMP[13].xyyy 183: MOV TEMP[14].w, TEMP[10].xxxx 184: TXL TEMP[14], TEMP[14], SAMP[8], 2D 185: FSEQ TEMP[15].x, TEMP[9].zzzz, IMM[1].zzzz 186: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz 187: MOV TEMP[16].xy, TEMP[13].xyyy 188: MOV TEMP[16].w, TEMP[10].xxxx 189: TXL TEMP[16], TEMP[16], SAMP[6], 2D 190: FSEQ TEMP[17].x, TEMP[9].zzzz, IMM[3].xxxx 191: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz 192: MOV TEMP[18].xy, TEMP[13].xyyy 193: MOV TEMP[18].w, TEMP[10].xxxx 194: TXL TEMP[18], TEMP[18], SAMP[4], 2D 195: FSEQ TEMP[19].x, TEMP[9].zzzz, IMM[2].wwww 196: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz 197: MOV TEMP[20].xy, TEMP[13].xyyy 198: MOV TEMP[20].w, TEMP[10].xxxx 199: TXL TEMP[20], TEMP[20], SAMP[2], 2D 200: FSEQ TEMP[21].x, TEMP[9].zzzz, IMM[2].zzzz 201: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz 202: MOV TEMP[13].xy, TEMP[13].xyyy 203: MOV TEMP[13].w, TEMP[10].xxxx 204: TXL TEMP[13], TEMP[13], SAMP[0], 2D 205: FSEQ TEMP[22].x, TEMP[9].zzzz, IMM[3].yyyy 206: AND TEMP[22].x, TEMP[22].xxxx, IMM[2].zzzz 207: MUL TEMP[13], TEMP[13], TEMP[22].xxxx 208: MAD TEMP[13], TEMP[20], TEMP[21].xxxx, TEMP[13] 209: MAD TEMP[13], TEMP[18], TEMP[19].xxxx, TEMP[13] 210: MAD TEMP[13], TEMP[16], TEMP[17].xxxx, TEMP[13] 211: MAD TEMP[13], TEMP[14], TEMP[15].xxxx, TEMP[13] 212: MOV TEMP[14].xy, TEMP[0].zxzz 213: MOV TEMP[15].x, IMM[2].xxxx 214: FSNE TEMP[16].x, CONST[13].xxxx, TEMP[6].xxxx 215: UIF TEMP[16].xxxx :0 216: MOV TEMP[15].x, IMM[2].yyyy 217: RCP TEMP[16].x, CONST[16].xxxx 218: MUL TEMP[14].xy, TEMP[0].zxxx, TEMP[16].xxxx 219: ELSE :0 220: RCP TEMP[16].x, CONST[15].xxxx 221: MUL TEMP[14].xy, TEMP[14].xyyy, TEMP[16].xxxx 222: ENDIF 223: FRC TEMP[14].xy, TEMP[14].xyyy 224: MUL TEMP[16].x, CONST[17].xxxx, IMM[2].wwww 225: MUL TEMP[16].x, TEMP[16].xxxx, TEMP[15].xxxx 226: ADD TEMP[16].x, IMM[2].zzzz, -TEMP[16].xxxx 227: MUL TEMP[15].x, TEMP[15].xxxx, CONST[17].xxxx 228: MAD TEMP[14].xy, TEMP[14].xyyy, TEMP[16].xxxx, TEMP[15].xxxx 229: MAD TEMP[14].xy, TEMP[14].xyyy, TEMP[6].xxxx, TEMP[9].xyyy 230: MOV TEMP[15].xy, TEMP[14].xyyy 231: MOV TEMP[15].w, TEMP[10].xxxx 232: TXL TEMP[15], TEMP[15], SAMP[8], 2D 233: FSEQ TEMP[16].x, TEMP[9].zzzz, IMM[1].zzzz 234: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz 235: MOV TEMP[17].xy, TEMP[14].xyyy 236: MOV TEMP[17].w, TEMP[10].xxxx 237: TXL TEMP[17], TEMP[17], SAMP[6], 2D 238: FSEQ TEMP[18].x, TEMP[9].zzzz, IMM[3].xxxx 239: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz 240: MOV TEMP[19].xy, TEMP[14].xyyy 241: MOV TEMP[19].w, TEMP[10].xxxx 242: TXL TEMP[19], TEMP[19], SAMP[4], 2D 243: FSEQ TEMP[20].x, TEMP[9].zzzz, IMM[2].wwww 244: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz 245: MOV TEMP[21].xy, TEMP[14].xyyy 246: MOV TEMP[21].w, TEMP[10].xxxx 247: TXL TEMP[21], TEMP[21], SAMP[2], 2D 248: FSEQ TEMP[22].x, TEMP[9].zzzz, IMM[2].zzzz 249: AND TEMP[22].x, TEMP[22].xxxx, IMM[2].zzzz 250: MOV TEMP[14].xy, TEMP[14].xyyy 251: MOV TEMP[14].w, TEMP[10].xxxx 252: TXL TEMP[14], TEMP[14], SAMP[0], 2D 253: FSEQ TEMP[23].x, TEMP[9].zzzz, IMM[3].yyyy 254: AND TEMP[23].x, TEMP[23].xxxx, IMM[2].zzzz 255: MUL TEMP[14], TEMP[14], TEMP[23].xxxx 256: MAD TEMP[14], TEMP[21], TEMP[22].xxxx, TEMP[14] 257: MAD TEMP[14], TEMP[19], TEMP[20].xxxx, TEMP[14] 258: MAD TEMP[14], TEMP[17], TEMP[18].xxxx, TEMP[14] 259: MAD TEMP[14], TEMP[15], TEMP[16].xxxx, TEMP[14] 260: MOV TEMP[15].xy, TEMP[0].xyxx 261: MOV TEMP[16].x, IMM[2].xxxx 262: FSNE TEMP[17].x, CONST[13].xxxx, TEMP[7].xxxx 263: UIF TEMP[17].xxxx :0 264: MOV TEMP[16].x, IMM[2].yyyy 265: RCP TEMP[17].x, CONST[16].xxxx 266: MUL TEMP[15].xy, TEMP[0].xyyy, TEMP[17].xxxx 267: ELSE :0 268: RCP TEMP[17].x, CONST[15].xxxx 269: MUL TEMP[15].xy, TEMP[15].xyyy, TEMP[17].xxxx 270: ENDIF 271: FRC TEMP[15].xy, TEMP[15].xyyy 272: MUL TEMP[17].x, CONST[17].xxxx, IMM[2].wwww 273: MUL TEMP[17].x, TEMP[17].xxxx, TEMP[16].xxxx 274: ADD TEMP[17].x, IMM[2].zzzz, -TEMP[17].xxxx 275: MUL TEMP[16].x, TEMP[16].xxxx, CONST[17].xxxx 276: MAD TEMP[15].xy, TEMP[15].xyyy, TEMP[17].xxxx, TEMP[16].xxxx 277: MAD TEMP[15].xy, TEMP[15].xyyy, TEMP[7].xxxx, TEMP[11].xyyy 278: MOV TEMP[16].xy, TEMP[15].xyyy 279: MOV TEMP[16].w, TEMP[10].xxxx 280: TXL TEMP[16], TEMP[16], SAMP[8], 2D 281: FSEQ TEMP[17].x, TEMP[11].zzzz, IMM[1].zzzz 282: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz 283: MOV TEMP[18].xy, TEMP[15].xyyy 284: MOV TEMP[18].w, TEMP[10].xxxx 285: TXL TEMP[18], TEMP[18], SAMP[6], 2D 286: FSEQ TEMP[19].x, TEMP[11].zzzz, IMM[3].xxxx 287: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz 288: MOV TEMP[20].xy, TEMP[15].xyyy 289: MOV TEMP[20].w, TEMP[10].xxxx 290: TXL TEMP[20], TEMP[20], SAMP[4], 2D 291: FSEQ TEMP[21].x, TEMP[11].zzzz, IMM[2].wwww 292: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz 293: MOV TEMP[22].xy, TEMP[15].xyyy 294: MOV TEMP[22].w, TEMP[10].xxxx 295: TXL TEMP[22], TEMP[22], SAMP[2], 2D 296: FSEQ TEMP[23].x, TEMP[11].zzzz, IMM[2].zzzz 297: AND TEMP[23].x, TEMP[23].xxxx, IMM[2].zzzz 298: MOV TEMP[15].xy, TEMP[15].xyyy 299: MOV TEMP[15].w, TEMP[10].xxxx 300: TXL TEMP[15], TEMP[15], SAMP[0], 2D 301: FSEQ TEMP[24].x, TEMP[11].zzzz, IMM[3].yyyy 302: AND TEMP[24].x, TEMP[24].xxxx, IMM[2].zzzz 303: MUL TEMP[15], TEMP[15], TEMP[24].xxxx 304: MAD TEMP[15], TEMP[22], TEMP[23].xxxx, TEMP[15] 305: MAD TEMP[15], TEMP[20], TEMP[21].xxxx, TEMP[15] 306: MAD TEMP[15], TEMP[18], TEMP[19].xxxx, TEMP[15] 307: MAD TEMP[15], TEMP[16], TEMP[17].xxxx, TEMP[15] 308: MOV TEMP[16].xy, IN[3].yxyy 309: MOV TEMP[17].x, IMM[2].xxxx 310: FSNE TEMP[18].x, CONST[13].xxxx, TEMP[7].xxxx 311: UIF TEMP[18].xxxx :0 312: MOV TEMP[17].x, IMM[2].yyyy 313: RCP TEMP[18].x, CONST[16].xxxx 314: MUL TEMP[16].xy, IN[3].yxxx, TEMP[18].xxxx 315: ELSE :0 316: RCP TEMP[18].x, CONST[15].xxxx 317: MUL TEMP[16].xy, TEMP[16].xyyy, TEMP[18].xxxx 318: ENDIF 319: FRC TEMP[16].xy, TEMP[16].xyyy 320: MUL TEMP[18].x, CONST[17].xxxx, IMM[2].wwww 321: MUL TEMP[18].x, TEMP[18].xxxx, TEMP[17].xxxx 322: ADD TEMP[18].x, IMM[2].zzzz, -TEMP[18].xxxx 323: MUL TEMP[17].x, TEMP[17].xxxx, CONST[17].xxxx 324: MAD TEMP[16].xy, TEMP[16].xyyy, TEMP[18].xxxx, TEMP[17].xxxx 325: MAD TEMP[16].xy, TEMP[16].xyyy, TEMP[7].xxxx, TEMP[11].xyyy 326: MOV TEMP[17].xy, TEMP[16].xyyy 327: MOV TEMP[17].w, TEMP[10].xxxx 328: TXL TEMP[17], TEMP[17], SAMP[8], 2D 329: FSEQ TEMP[18].x, TEMP[11].zzzz, IMM[1].zzzz 330: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz 331: MOV TEMP[19].xy, TEMP[16].xyyy 332: MOV TEMP[19].w, TEMP[10].xxxx 333: TXL TEMP[19], TEMP[19], SAMP[6], 2D 334: FSEQ TEMP[20].x, TEMP[11].zzzz, IMM[3].xxxx 335: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz 336: MOV TEMP[21].xy, TEMP[16].xyyy 337: MOV TEMP[21].w, TEMP[10].xxxx 338: TXL TEMP[21], TEMP[21], SAMP[4], 2D 339: FSEQ TEMP[22].x, TEMP[11].zzzz, IMM[2].wwww 340: AND TEMP[22].x, TEMP[22].xxxx, IMM[2].zzzz 341: MOV TEMP[23].xy, TEMP[16].xyyy 342: MOV TEMP[23].w, TEMP[10].xxxx 343: TXL TEMP[23], TEMP[23], SAMP[2], 2D 344: FSEQ TEMP[24].x, TEMP[11].zzzz, IMM[2].zzzz 345: AND TEMP[24].x, TEMP[24].xxxx, IMM[2].zzzz 346: MOV TEMP[16].xy, TEMP[16].xyyy 347: MOV TEMP[16].w, TEMP[10].xxxx 348: TXL TEMP[16], TEMP[16], SAMP[0], 2D 349: FSEQ TEMP[25].x, TEMP[11].zzzz, IMM[3].yyyy 350: AND TEMP[25].x, TEMP[25].xxxx, IMM[2].zzzz 351: MUL TEMP[16], TEMP[16], TEMP[25].xxxx 352: MAD TEMP[16], TEMP[23], TEMP[24].xxxx, TEMP[16] 353: MAD TEMP[16], TEMP[21], TEMP[22].xxxx, TEMP[16] 354: MAD TEMP[16], TEMP[19], TEMP[20].xxxx, TEMP[16] 355: MAD TEMP[16], TEMP[17], TEMP[18].xxxx, TEMP[16] 356: MOV TEMP[17].xy, TEMP[0].zxzz 357: MOV TEMP[18].x, IMM[2].xxxx 358: FSNE TEMP[19].x, CONST[13].xxxx, TEMP[7].xxxx 359: UIF TEMP[19].xxxx :0 360: MOV TEMP[18].x, IMM[2].yyyy 361: RCP TEMP[19].x, CONST[16].xxxx 362: MUL TEMP[17].xy, TEMP[0].zxxx, TEMP[19].xxxx 363: ELSE :0 364: RCP TEMP[19].x, CONST[15].xxxx 365: MUL TEMP[17].xy, TEMP[17].xyyy, TEMP[19].xxxx 366: ENDIF 367: FRC TEMP[17].xy, TEMP[17].xyyy 368: MUL TEMP[19].x, CONST[17].xxxx, IMM[2].wwww 369: MUL TEMP[19].x, TEMP[19].xxxx, TEMP[18].xxxx 370: ADD TEMP[19].x, IMM[2].zzzz, -TEMP[19].xxxx 371: MUL TEMP[18].x, TEMP[18].xxxx, CONST[17].xxxx 372: MAD TEMP[17].xy, TEMP[17].xyyy, TEMP[19].xxxx, TEMP[18].xxxx 373: MAD TEMP[17].xy, TEMP[17].xyyy, TEMP[7].xxxx, TEMP[11].xyyy 374: MOV TEMP[18].xy, TEMP[17].xyyy 375: MOV TEMP[18].w, TEMP[10].xxxx 376: TXL TEMP[18], TEMP[18], SAMP[8], 2D 377: FSEQ TEMP[19].x, TEMP[11].zzzz, IMM[1].zzzz 378: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz 379: MOV TEMP[20].xy, TEMP[17].xyyy 380: MOV TEMP[20].w, TEMP[10].xxxx 381: TXL TEMP[20], TEMP[20], SAMP[6], 2D 382: FSEQ TEMP[21].x, TEMP[11].zzzz, IMM[3].xxxx 383: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz 384: MOV TEMP[22].xy, TEMP[17].xyyy 385: MOV TEMP[22].w, TEMP[10].xxxx 386: TXL TEMP[22], TEMP[22], SAMP[4], 2D 387: FSEQ TEMP[23].x, TEMP[11].zzzz, IMM[2].wwww 388: AND TEMP[23].x, TEMP[23].xxxx, IMM[2].zzzz 389: MOV TEMP[24].xy, TEMP[17].xyyy 390: MOV TEMP[24].w, TEMP[10].xxxx 391: TXL TEMP[24], TEMP[24], SAMP[2], 2D 392: FSEQ TEMP[25].x, TEMP[11].zzzz, IMM[2].zzzz 393: AND TEMP[25].x, TEMP[25].xxxx, IMM[2].zzzz 394: MOV TEMP[17].xy, TEMP[17].xyyy 395: MOV TEMP[17].w, TEMP[10].xxxx 396: TXL TEMP[17], TEMP[17], SAMP[0], 2D 397: FSEQ TEMP[26].x, TEMP[11].zzzz, IMM[3].yyyy 398: AND TEMP[26].x, TEMP[26].xxxx, IMM[2].zzzz 399: MUL TEMP[17], TEMP[17], TEMP[26].xxxx 400: MAD TEMP[17], TEMP[24], TEMP[25].xxxx, TEMP[17] 401: MAD TEMP[17], TEMP[22], TEMP[23].xxxx, TEMP[17] 402: MAD TEMP[17], TEMP[20], TEMP[21].xxxx, TEMP[17] 403: MAD TEMP[17], TEMP[18], TEMP[19].xxxx, TEMP[17] 404: MOV TEMP[18].xy, TEMP[0].xyxx 405: MOV TEMP[19].x, IMM[2].xxxx 406: FSNE TEMP[20].x, CONST[13].xxxx, TEMP[8].xxxx 407: UIF TEMP[20].xxxx :0 408: MOV TEMP[19].x, IMM[2].yyyy 409: RCP TEMP[20].x, CONST[16].xxxx 410: MUL TEMP[18].xy, TEMP[0].xyyy, TEMP[20].xxxx 411: ELSE :0 412: RCP TEMP[20].x, CONST[15].xxxx 413: MUL TEMP[18].xy, TEMP[18].xyyy, TEMP[20].xxxx 414: ENDIF 415: FRC TEMP[18].xy, TEMP[18].xyyy 416: MUL TEMP[20].x, CONST[17].xxxx, IMM[2].wwww 417: MUL TEMP[20].x, TEMP[20].xxxx, TEMP[19].xxxx 418: ADD TEMP[20].x, IMM[2].zzzz, -TEMP[20].xxxx 419: MUL TEMP[19].x, TEMP[19].xxxx, CONST[17].xxxx 420: MAD TEMP[18].xy, TEMP[18].xyyy, TEMP[20].xxxx, TEMP[19].xxxx 421: MAD TEMP[18].xy, TEMP[18].xyyy, TEMP[8].xxxx, TEMP[5].xyyy 422: MOV TEMP[19].xy, TEMP[18].xyyy 423: MOV TEMP[19].w, TEMP[10].xxxx 424: TXL TEMP[19], TEMP[19], SAMP[8], 2D 425: FSEQ TEMP[20].x, TEMP[5].zzzz, IMM[1].zzzz 426: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz 427: MOV TEMP[21].xy, TEMP[18].xyyy 428: MOV TEMP[21].w, TEMP[10].xxxx 429: TXL TEMP[21], TEMP[21], SAMP[6], 2D 430: FSEQ TEMP[22].x, TEMP[5].zzzz, IMM[3].xxxx 431: AND TEMP[22].x, TEMP[22].xxxx, IMM[2].zzzz 432: MOV TEMP[23].xy, TEMP[18].xyyy 433: MOV TEMP[23].w, TEMP[10].xxxx 434: TXL TEMP[23], TEMP[23], SAMP[4], 2D 435: FSEQ TEMP[24].x, TEMP[5].zzzz, IMM[2].wwww 436: AND TEMP[24].x, TEMP[24].xxxx, IMM[2].zzzz 437: MOV TEMP[25].xy, TEMP[18].xyyy 438: MOV TEMP[25].w, TEMP[10].xxxx 439: TXL TEMP[25], TEMP[25], SAMP[2], 2D 440: FSEQ TEMP[26].x, TEMP[5].zzzz, IMM[2].zzzz 441: AND TEMP[26].x, TEMP[26].xxxx, IMM[2].zzzz 442: MOV TEMP[18].xy, TEMP[18].xyyy 443: MOV TEMP[18].w, TEMP[10].xxxx 444: TXL TEMP[18], TEMP[18], SAMP[0], 2D 445: FSEQ TEMP[27].x, TEMP[5].zzzz, IMM[3].yyyy 446: AND TEMP[27].x, TEMP[27].xxxx, IMM[2].zzzz 447: MUL TEMP[18], TEMP[18], TEMP[27].xxxx 448: MAD TEMP[18], TEMP[25], TEMP[26].xxxx, TEMP[18] 449: MAD TEMP[18], TEMP[23], TEMP[24].xxxx, TEMP[18] 450: MAD TEMP[18], TEMP[21], TEMP[22].xxxx, TEMP[18] 451: MAD TEMP[18], TEMP[19], TEMP[20].xxxx, TEMP[18] 452: MOV TEMP[19].xy, IN[3].yxyy 453: MOV TEMP[20].x, IMM[2].xxxx 454: FSNE TEMP[21].x, CONST[13].xxxx, TEMP[8].xxxx 455: UIF TEMP[21].xxxx :0 456: MOV TEMP[20].x, IMM[2].yyyy 457: RCP TEMP[21].x, CONST[16].xxxx 458: MUL TEMP[19].xy, IN[3].yxxx, TEMP[21].xxxx 459: ELSE :0 460: RCP TEMP[21].x, CONST[15].xxxx 461: MUL TEMP[19].xy, TEMP[19].xyyy, TEMP[21].xxxx 462: ENDIF 463: FRC TEMP[19].xy, TEMP[19].xyyy 464: MUL TEMP[21].x, CONST[17].xxxx, IMM[2].wwww 465: MUL TEMP[21].x, TEMP[21].xxxx, TEMP[20].xxxx 466: ADD TEMP[21].x, IMM[2].zzzz, -TEMP[21].xxxx 467: MUL TEMP[20].x, TEMP[20].xxxx, CONST[17].xxxx 468: MAD TEMP[19].xy, TEMP[19].xyyy, TEMP[21].xxxx, TEMP[20].xxxx 469: MAD TEMP[19].xy, TEMP[19].xyyy, TEMP[8].xxxx, TEMP[5].xyyy 470: MOV TEMP[20].xy, TEMP[19].xyyy 471: MOV TEMP[20].w, TEMP[10].xxxx 472: TXL TEMP[20], TEMP[20], SAMP[8], 2D 473: FSEQ TEMP[21].x, TEMP[5].zzzz, IMM[1].zzzz 474: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz 475: MOV TEMP[22].xy, TEMP[19].xyyy 476: MOV TEMP[22].w, TEMP[10].xxxx 477: TXL TEMP[22], TEMP[22], SAMP[6], 2D 478: FSEQ TEMP[23].x, TEMP[5].zzzz, IMM[3].xxxx 479: AND TEMP[23].x, TEMP[23].xxxx, IMM[2].zzzz 480: MOV TEMP[24].xy, TEMP[19].xyyy 481: MOV TEMP[24].w, TEMP[10].xxxx 482: TXL TEMP[24], TEMP[24], SAMP[4], 2D 483: FSEQ TEMP[25].x, TEMP[5].zzzz, IMM[2].wwww 484: AND TEMP[25].x, TEMP[25].xxxx, IMM[2].zzzz 485: MOV TEMP[26].xy, TEMP[19].xyyy 486: MOV TEMP[26].w, TEMP[10].xxxx 487: TXL TEMP[26], TEMP[26], SAMP[2], 2D 488: FSEQ TEMP[27].x, TEMP[5].zzzz, IMM[2].zzzz 489: AND TEMP[27].x, TEMP[27].xxxx, IMM[2].zzzz 490: MOV TEMP[19].xy, TEMP[19].xyyy 491: MOV TEMP[19].w, TEMP[10].xxxx 492: TXL TEMP[19], TEMP[19], SAMP[0], 2D 493: FSEQ TEMP[28].x, TEMP[5].zzzz, IMM[3].yyyy 494: AND TEMP[28].x, TEMP[28].xxxx, IMM[2].zzzz 495: MUL TEMP[19], TEMP[19], TEMP[28].xxxx 496: MAD TEMP[19], TEMP[26], TEMP[27].xxxx, TEMP[19] 497: MAD TEMP[19], TEMP[24], TEMP[25].xxxx, TEMP[19] 498: MAD TEMP[19], TEMP[22], TEMP[23].xxxx, TEMP[19] 499: MAD TEMP[19], TEMP[20], TEMP[21].xxxx, TEMP[19] 500: MOV TEMP[20].xy, TEMP[0].zxzz 501: MOV TEMP[21].x, IMM[2].xxxx 502: FSNE TEMP[22].x, CONST[13].xxxx, TEMP[8].xxxx 503: UIF TEMP[22].xxxx :0 504: MOV TEMP[21].x, IMM[2].yyyy 505: RCP TEMP[22].x, CONST[16].xxxx 506: MUL TEMP[20].xy, TEMP[0].zxxx, TEMP[22].xxxx 507: ELSE :0 508: RCP TEMP[22].x, CONST[15].xxxx 509: MUL TEMP[20].xy, TEMP[20].xyyy, TEMP[22].xxxx 510: ENDIF 511: FRC TEMP[20].xy, TEMP[20].xyyy 512: MUL TEMP[22].x, CONST[17].xxxx, IMM[2].wwww 513: MUL TEMP[22].x, TEMP[22].xxxx, TEMP[21].xxxx 514: ADD TEMP[22].x, IMM[2].zzzz, -TEMP[22].xxxx 515: MUL TEMP[21].x, TEMP[21].xxxx, CONST[17].xxxx 516: MAD TEMP[20].xy, TEMP[20].xyyy, TEMP[22].xxxx, TEMP[21].xxxx 517: MAD TEMP[20].xy, TEMP[20].xyyy, TEMP[8].xxxx, TEMP[5].xyyy 518: MOV TEMP[21].xy, TEMP[20].xyyy 519: MOV TEMP[21].w, TEMP[10].xxxx 520: TXL TEMP[21], TEMP[21], SAMP[8], 2D 521: FSEQ TEMP[22].x, TEMP[5].zzzz, IMM[1].zzzz 522: AND TEMP[22].x, TEMP[22].xxxx, IMM[2].zzzz 523: MOV TEMP[23].xy, TEMP[20].xyyy 524: MOV TEMP[23].w, TEMP[10].xxxx 525: TXL TEMP[23], TEMP[23], SAMP[6], 2D 526: FSEQ TEMP[24].x, TEMP[5].zzzz, IMM[3].xxxx 527: AND TEMP[24].x, TEMP[24].xxxx, IMM[2].zzzz 528: MOV TEMP[25].xy, TEMP[20].xyyy 529: MOV TEMP[25].w, TEMP[10].xxxx 530: TXL TEMP[25], TEMP[25], SAMP[4], 2D 531: FSEQ TEMP[26].x, TEMP[5].zzzz, IMM[2].wwww 532: AND TEMP[26].x, TEMP[26].xxxx, IMM[2].zzzz 533: MOV TEMP[27].xy, TEMP[20].xyyy 534: MOV TEMP[27].w, TEMP[10].xxxx 535: TXL TEMP[27], TEMP[27], SAMP[2], 2D 536: FSEQ TEMP[28].x, TEMP[5].zzzz, IMM[2].zzzz 537: AND TEMP[28].x, TEMP[28].xxxx, IMM[2].zzzz 538: MOV TEMP[20].xy, TEMP[20].xyyy 539: MOV TEMP[20].w, TEMP[10].xxxx 540: TXL TEMP[20], TEMP[20], SAMP[0], 2D 541: FSEQ TEMP[29].x, TEMP[5].zzzz, IMM[3].yyyy 542: AND TEMP[29].x, TEMP[29].xxxx, IMM[2].zzzz 543: MUL TEMP[20], TEMP[20], TEMP[29].xxxx 544: MAD TEMP[20], TEMP[27], TEMP[28].xxxx, TEMP[20] 545: MAD TEMP[20], TEMP[25], TEMP[26].xxxx, TEMP[20] 546: MAD TEMP[20], TEMP[23], TEMP[24].xxxx, TEMP[20] 547: MAD TEMP[20], TEMP[21], TEMP[22].xxxx, TEMP[20] 548: MUL TEMP[18], TEMP[18], TEMP[4].zzzz 549: MAD TEMP[18], TEMP[19], TEMP[4].xxxx, TEMP[18] 550: MAD TEMP[18], TEMP[20], TEMP[4].yyyy, TEMP[18] 551: MUL TEMP[15], TEMP[15], TEMP[4].zzzz 552: MAD TEMP[15], TEMP[16], TEMP[4].xxxx, TEMP[15] 553: MAD TEMP[15], TEMP[17], TEMP[4].yyyy, TEMP[15] 554: MUL TEMP[12], TEMP[12], TEMP[4].zzzz 555: MAD TEMP[12], TEMP[13], TEMP[4].xxxx, TEMP[12] 556: MAD TEMP[12], TEMP[14], TEMP[4].yyyy, TEMP[12] 557: MUL TEMP[12], IN[1].xxxx, TEMP[12] 558: MAD TEMP[12], IN[1].yyyy, TEMP[15], TEMP[12] 559: MAD TEMP[12].xyz, IN[1].zzzz, TEMP[18], TEMP[12] 560: MOV TEMP[13].xy, IN[3].yxyy 561: MOV TEMP[14].x, IMM[2].xxxx 562: FSNE TEMP[15].x, CONST[13].xxxx, TEMP[6].xxxx 563: UIF TEMP[15].xxxx :0 564: MOV TEMP[14].x, IMM[2].yyyy 565: RCP TEMP[15].x, CONST[16].xxxx 566: MUL TEMP[13].xy, IN[3].yxxx, TEMP[15].xxxx 567: ELSE :0 568: RCP TEMP[15].x, CONST[15].xxxx 569: MUL TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx 570: ENDIF 571: FRC TEMP[13].xy, TEMP[13].xyyy 572: MUL TEMP[15].x, CONST[17].xxxx, IMM[2].wwww 573: MUL TEMP[15].x, TEMP[15].xxxx, TEMP[14].xxxx 574: ADD TEMP[15].x, IMM[2].zzzz, -TEMP[15].xxxx 575: MUL TEMP[14].x, TEMP[14].xxxx, CONST[17].xxxx 576: MAD TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx, TEMP[14].xxxx 577: MAD TEMP[13].xy, TEMP[13].xyyy, TEMP[6].xxxx, TEMP[9].xyyy 578: MOV TEMP[14].xy, TEMP[13].xyyy 579: MOV TEMP[14].w, TEMP[10].xxxx 580: TXL TEMP[14], TEMP[14], SAMP[9], 2D 581: FSEQ TEMP[15].x, TEMP[9].zzzz, IMM[1].zzzz 582: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz 583: MOV TEMP[16].xy, TEMP[13].xyyy 584: MOV TEMP[16].w, TEMP[10].xxxx 585: TXL TEMP[16], TEMP[16], SAMP[7], 2D 586: FSEQ TEMP[17].x, TEMP[9].zzzz, IMM[3].xxxx 587: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz 588: MOV TEMP[18].xy, TEMP[13].xyyy 589: MOV TEMP[18].w, TEMP[10].xxxx 590: TXL TEMP[18], TEMP[18], SAMP[5], 2D 591: FSEQ TEMP[19].x, TEMP[9].zzzz, IMM[2].wwww 592: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz 593: MOV TEMP[20].xy, TEMP[13].xyyy 594: MOV TEMP[20].w, TEMP[10].xxxx 595: TXL TEMP[20], TEMP[20], SAMP[3], 2D 596: FSEQ TEMP[21].x, TEMP[9].zzzz, IMM[2].zzzz 597: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz 598: MOV TEMP[13].xy, TEMP[13].xyyy 599: MOV TEMP[13].w, TEMP[10].xxxx 600: TXL TEMP[13], TEMP[13], SAMP[1], 2D 601: FSEQ TEMP[22].x, TEMP[9].zzzz, IMM[3].yyyy 602: AND TEMP[22].x, TEMP[22].xxxx, IMM[2].zzzz 603: MUL TEMP[13], TEMP[13], TEMP[22].xxxx 604: MAD TEMP[13], TEMP[20], TEMP[21].xxxx, TEMP[13] 605: MAD TEMP[13], TEMP[18], TEMP[19].xxxx, TEMP[13] 606: MAD TEMP[13], TEMP[16], TEMP[17].xxxx, TEMP[13] 607: MAD TEMP[13].yw, TEMP[14], TEMP[15].xxxx, TEMP[13] 608: MAD TEMP[13].xy, TEMP[13].wyyy, IMM[2].wwww, IMM[3].zzzz 609: DP2 TEMP[14].x, TEMP[13].xyyy, TEMP[13].xyyy 610: MOV_SAT TEMP[30].x, TEMP[14].xxxx 611: MOV TEMP[14].xy, TEMP[0].zxzz 612: MOV TEMP[15].x, IMM[2].xxxx 613: FSNE TEMP[16].x, CONST[13].xxxx, TEMP[6].xxxx 614: UIF TEMP[16].xxxx :0 615: MOV TEMP[15].x, IMM[2].yyyy 616: RCP TEMP[16].x, CONST[16].xxxx 617: MUL TEMP[14].xy, TEMP[0].zxxx, TEMP[16].xxxx 618: ELSE :0 619: RCP TEMP[16].x, CONST[15].xxxx 620: MUL TEMP[14].xy, TEMP[14].xyyy, TEMP[16].xxxx 621: ENDIF 622: FRC TEMP[14].xy, TEMP[14].xyyy 623: MUL TEMP[16].x, CONST[17].xxxx, IMM[2].wwww 624: MUL TEMP[16].x, TEMP[16].xxxx, TEMP[15].xxxx 625: ADD TEMP[16].x, IMM[2].zzzz, -TEMP[16].xxxx 626: MUL TEMP[15].x, TEMP[15].xxxx, CONST[17].xxxx 627: MAD TEMP[14].xy, TEMP[14].xyyy, TEMP[16].xxxx, TEMP[15].xxxx 628: MAD TEMP[14].xy, TEMP[14].xyyy, TEMP[6].xxxx, TEMP[9].xyyy 629: MOV TEMP[15].xy, TEMP[14].xyyy 630: MOV TEMP[15].w, TEMP[10].xxxx 631: TXL TEMP[15], TEMP[15], SAMP[9], 2D 632: FSEQ TEMP[16].x, TEMP[9].zzzz, IMM[1].zzzz 633: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz 634: MOV TEMP[17].xy, TEMP[14].xyyy 635: MOV TEMP[17].w, TEMP[10].xxxx 636: TXL TEMP[17], TEMP[17], SAMP[7], 2D 637: FSEQ TEMP[18].x, TEMP[9].zzzz, IMM[3].xxxx 638: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz 639: MOV TEMP[19].xy, TEMP[14].xyyy 640: MOV TEMP[19].w, TEMP[10].xxxx 641: TXL TEMP[19], TEMP[19], SAMP[5], 2D 642: FSEQ TEMP[20].x, TEMP[9].zzzz, IMM[2].wwww 643: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz 644: MOV TEMP[21].xy, TEMP[14].xyyy 645: MOV TEMP[21].w, TEMP[10].xxxx 646: TXL TEMP[21], TEMP[21], SAMP[3], 2D 647: FSEQ TEMP[22].x, TEMP[9].zzzz, IMM[2].zzzz 648: AND TEMP[22].x, TEMP[22].xxxx, IMM[2].zzzz 649: MOV TEMP[14].xy, TEMP[14].xyyy 650: MOV TEMP[14].w, TEMP[10].xxxx 651: TXL TEMP[14], TEMP[14], SAMP[1], 2D 652: FSEQ TEMP[23].x, TEMP[9].zzzz, IMM[3].yyyy 653: AND TEMP[23].x, TEMP[23].xxxx, IMM[2].zzzz 654: MUL TEMP[14], TEMP[14], TEMP[23].xxxx 655: MAD TEMP[14], TEMP[21], TEMP[22].xxxx, TEMP[14] 656: MAD TEMP[14], TEMP[19], TEMP[20].xxxx, TEMP[14] 657: MAD TEMP[14], TEMP[17], TEMP[18].xxxx, TEMP[14] 658: MAD TEMP[14].yw, TEMP[15], TEMP[16].xxxx, TEMP[14] 659: MAD TEMP[14].xy, TEMP[14].wyyy, IMM[2].wwww, IMM[3].zzzz 660: DP2 TEMP[15].x, TEMP[14].xyyy, TEMP[14].xyyy 661: MOV_SAT TEMP[31].x, TEMP[15].xxxx 662: MOV TEMP[15].xy, TEMP[0].xyxx 663: MOV TEMP[16].x, IMM[2].xxxx 664: FSNE TEMP[17].x, CONST[13].xxxx, TEMP[6].xxxx 665: UIF TEMP[17].xxxx :0 666: MOV TEMP[16].x, IMM[2].yyyy 667: RCP TEMP[17].x, CONST[16].xxxx 668: MUL TEMP[15].xy, TEMP[0].xyyy, TEMP[17].xxxx 669: ELSE :0 670: RCP TEMP[17].x, CONST[15].xxxx 671: MUL TEMP[15].xy, TEMP[15].xyyy, TEMP[17].xxxx 672: ENDIF 673: FRC TEMP[15].xy, TEMP[15].xyyy 674: MUL TEMP[17].x, CONST[17].xxxx, IMM[2].wwww 675: MUL TEMP[17].x, TEMP[17].xxxx, TEMP[16].xxxx 676: ADD TEMP[17].x, IMM[2].zzzz, -TEMP[17].xxxx 677: MUL TEMP[16].x, TEMP[16].xxxx, CONST[17].xxxx 678: MAD TEMP[15].xy, TEMP[15].xyyy, TEMP[17].xxxx, TEMP[16].xxxx 679: MAD TEMP[6].xy, TEMP[15].xyyy, TEMP[6].xxxx, TEMP[9].xyyy 680: MOV TEMP[15].xy, TEMP[6].xyyy 681: MOV TEMP[15].w, TEMP[10].xxxx 682: TXL TEMP[15], TEMP[15], SAMP[9], 2D 683: FSEQ TEMP[16].x, TEMP[9].zzzz, IMM[1].zzzz 684: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz 685: MOV TEMP[17].xy, TEMP[6].xyyy 686: MOV TEMP[17].w, TEMP[10].xxxx 687: TXL TEMP[17], TEMP[17], SAMP[7], 2D 688: FSEQ TEMP[18].x, TEMP[9].zzzz, IMM[3].xxxx 689: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz 690: MOV TEMP[19].xy, TEMP[6].xyyy 691: MOV TEMP[19].w, TEMP[10].xxxx 692: TXL TEMP[19], TEMP[19], SAMP[5], 2D 693: FSEQ TEMP[20].x, TEMP[9].zzzz, IMM[2].wwww 694: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz 695: MOV TEMP[21].xy, TEMP[6].xyyy 696: MOV TEMP[21].w, TEMP[10].xxxx 697: TXL TEMP[21], TEMP[21], SAMP[3], 2D 698: FSEQ TEMP[22].x, TEMP[9].zzzz, IMM[2].zzzz 699: AND TEMP[22].x, TEMP[22].xxxx, IMM[2].zzzz 700: MOV TEMP[6].xy, TEMP[6].xyyy 701: MOV TEMP[6].w, TEMP[10].xxxx 702: TXL TEMP[6], TEMP[6], SAMP[1], 2D 703: FSEQ TEMP[9].x, TEMP[9].zzzz, IMM[3].yyyy 704: AND TEMP[9].x, TEMP[9].xxxx, IMM[2].zzzz 705: MUL TEMP[6], TEMP[6], TEMP[9].xxxx 706: MAD TEMP[6], TEMP[21], TEMP[22].xxxx, TEMP[6] 707: MAD TEMP[6], TEMP[19], TEMP[20].xxxx, TEMP[6] 708: MAD TEMP[6], TEMP[17], TEMP[18].xxxx, TEMP[6] 709: MAD TEMP[6].yw, TEMP[15], TEMP[16].xxxx, TEMP[6] 710: MAD TEMP[6].xy, TEMP[6].wyyy, IMM[2].wwww, IMM[3].zzzz 711: DP2 TEMP[9].x, TEMP[6].xyyy, TEMP[6].xyyy 712: MOV_SAT TEMP[32].x, TEMP[9].xxxx 713: MOV TEMP[9].x, IMM[3].yyyy 714: MOV TEMP[9].y, TEMP[13].xxxx 715: MOV TEMP[9].z, TEMP[13].yyyy 716: MOV TEMP[13].y, IMM[3].yyyy 717: MOV TEMP[13].x, TEMP[14].yyyy 718: MOV TEMP[13].z, TEMP[14].xxxx 719: MOV TEMP[14].z, IMM[3].yyyy 720: MOV TEMP[14].xy, TEMP[6].xyxx 721: MUL TEMP[6].xyz, TEMP[9].xyzz, TEMP[4].xxxx 722: MAD TEMP[6].xyz, TEMP[13].xyzz, TEMP[4].yyyy, TEMP[6].xyzz 723: MAD TEMP[6].xyz, TEMP[14].xyzz, TEMP[4].zzzz, TEMP[6].xyzz 724: MOV TEMP[9].xy, IN[3].yxyy 725: MOV TEMP[13].x, IMM[2].xxxx 726: FSNE TEMP[14].x, CONST[13].xxxx, TEMP[7].xxxx 727: UIF TEMP[14].xxxx :0 728: MOV TEMP[13].x, IMM[2].yyyy 729: RCP TEMP[14].x, CONST[16].xxxx 730: MUL TEMP[9].xy, IN[3].yxxx, TEMP[14].xxxx 731: ELSE :0 732: RCP TEMP[14].x, CONST[15].xxxx 733: MUL TEMP[9].xy, TEMP[9].xyyy, TEMP[14].xxxx 734: ENDIF 735: FRC TEMP[9].xy, TEMP[9].xyyy 736: MUL TEMP[14].x, CONST[17].xxxx, IMM[2].wwww 737: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[13].xxxx 738: ADD TEMP[14].x, IMM[2].zzzz, -TEMP[14].xxxx 739: MUL TEMP[13].x, TEMP[13].xxxx, CONST[17].xxxx 740: MAD TEMP[9].xy, TEMP[9].xyyy, TEMP[14].xxxx, TEMP[13].xxxx 741: MAD TEMP[9].xy, TEMP[9].xyyy, TEMP[7].xxxx, TEMP[11].xyyy 742: MOV TEMP[13].xy, TEMP[9].xyyy 743: MOV TEMP[13].w, TEMP[10].xxxx 744: TXL TEMP[13], TEMP[13], SAMP[9], 2D 745: FSEQ TEMP[14].x, TEMP[11].zzzz, IMM[1].zzzz 746: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz 747: MOV TEMP[15].xy, TEMP[9].xyyy 748: MOV TEMP[15].w, TEMP[10].xxxx 749: TXL TEMP[15], TEMP[15], SAMP[7], 2D 750: FSEQ TEMP[16].x, TEMP[11].zzzz, IMM[3].xxxx 751: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz 752: MOV TEMP[17].xy, TEMP[9].xyyy 753: MOV TEMP[17].w, TEMP[10].xxxx 754: TXL TEMP[17], TEMP[17], SAMP[5], 2D 755: FSEQ TEMP[18].x, TEMP[11].zzzz, IMM[2].wwww 756: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz 757: MOV TEMP[19].xy, TEMP[9].xyyy 758: MOV TEMP[19].w, TEMP[10].xxxx 759: TXL TEMP[19], TEMP[19], SAMP[3], 2D 760: FSEQ TEMP[20].x, TEMP[11].zzzz, IMM[2].zzzz 761: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz 762: MOV TEMP[9].xy, TEMP[9].xyyy 763: MOV TEMP[9].w, TEMP[10].xxxx 764: TXL TEMP[9], TEMP[9], SAMP[1], 2D 765: FSEQ TEMP[21].x, TEMP[11].zzzz, IMM[3].yyyy 766: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz 767: MUL TEMP[9], TEMP[9], TEMP[21].xxxx 768: MAD TEMP[9], TEMP[19], TEMP[20].xxxx, TEMP[9] 769: MAD TEMP[9], TEMP[17], TEMP[18].xxxx, TEMP[9] 770: MAD TEMP[9], TEMP[15], TEMP[16].xxxx, TEMP[9] 771: MAD TEMP[9].yw, TEMP[13], TEMP[14].xxxx, TEMP[9] 772: MAD TEMP[9].xy, TEMP[9].wyyy, IMM[2].wwww, IMM[3].zzzz 773: DP2 TEMP[13].x, TEMP[9].xyyy, TEMP[9].xyyy 774: MOV_SAT TEMP[33].x, TEMP[13].xxxx 775: MOV TEMP[13].xy, TEMP[0].zxzz 776: MOV TEMP[14].x, IMM[2].xxxx 777: FSNE TEMP[15].x, CONST[13].xxxx, TEMP[7].xxxx 778: UIF TEMP[15].xxxx :0 779: MOV TEMP[14].x, IMM[2].yyyy 780: RCP TEMP[15].x, CONST[16].xxxx 781: MUL TEMP[13].xy, TEMP[0].zxxx, TEMP[15].xxxx 782: ELSE :0 783: RCP TEMP[15].x, CONST[15].xxxx 784: MUL TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx 785: ENDIF 786: FRC TEMP[13].xy, TEMP[13].xyyy 787: MUL TEMP[15].x, CONST[17].xxxx, IMM[2].wwww 788: MUL TEMP[15].x, TEMP[15].xxxx, TEMP[14].xxxx 789: ADD TEMP[15].x, IMM[2].zzzz, -TEMP[15].xxxx 790: MUL TEMP[14].x, TEMP[14].xxxx, CONST[17].xxxx 791: MAD TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx, TEMP[14].xxxx 792: MAD TEMP[13].xy, TEMP[13].xyyy, TEMP[7].xxxx, TEMP[11].xyyy 793: MOV TEMP[14].xy, TEMP[13].xyyy 794: MOV TEMP[14].w, TEMP[10].xxxx 795: TXL TEMP[14], TEMP[14], SAMP[9], 2D 796: FSEQ TEMP[15].x, TEMP[11].zzzz, IMM[1].zzzz 797: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz 798: MOV TEMP[16].xy, TEMP[13].xyyy 799: MOV TEMP[16].w, TEMP[10].xxxx 800: TXL TEMP[16], TEMP[16], SAMP[7], 2D 801: FSEQ TEMP[17].x, TEMP[11].zzzz, IMM[3].xxxx 802: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz 803: MOV TEMP[18].xy, TEMP[13].xyyy 804: MOV TEMP[18].w, TEMP[10].xxxx 805: TXL TEMP[18], TEMP[18], SAMP[5], 2D 806: FSEQ TEMP[19].x, TEMP[11].zzzz, IMM[2].wwww 807: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz 808: MOV TEMP[20].xy, TEMP[13].xyyy 809: MOV TEMP[20].w, TEMP[10].xxxx 810: TXL TEMP[20], TEMP[20], SAMP[3], 2D 811: FSEQ TEMP[21].x, TEMP[11].zzzz, IMM[2].zzzz 812: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz 813: MOV TEMP[13].xy, TEMP[13].xyyy 814: MOV TEMP[13].w, TEMP[10].xxxx 815: TXL TEMP[13], TEMP[13], SAMP[1], 2D 816: FSEQ TEMP[22].x, TEMP[11].zzzz, IMM[3].yyyy 817: AND TEMP[22].x, TEMP[22].xxxx, IMM[2].zzzz 818: MUL TEMP[13], TEMP[13], TEMP[22].xxxx 819: MAD TEMP[13], TEMP[20], TEMP[21].xxxx, TEMP[13] 820: MAD TEMP[13], TEMP[18], TEMP[19].xxxx, TEMP[13] 821: MAD TEMP[13], TEMP[16], TEMP[17].xxxx, TEMP[13] 822: MAD TEMP[13].yw, TEMP[14], TEMP[15].xxxx, TEMP[13] 823: MAD TEMP[13].xy, TEMP[13].wyyy, IMM[2].wwww, IMM[3].zzzz 824: DP2 TEMP[14].x, TEMP[13].xyyy, TEMP[13].xyyy 825: MOV_SAT TEMP[34].x, TEMP[14].xxxx 826: MOV TEMP[14].xy, TEMP[0].xyxx 827: MOV TEMP[15].x, IMM[2].xxxx 828: FSNE TEMP[16].x, CONST[13].xxxx, TEMP[7].xxxx 829: UIF TEMP[16].xxxx :0 830: MOV TEMP[15].x, IMM[2].yyyy 831: RCP TEMP[16].x, CONST[16].xxxx 832: MUL TEMP[14].xy, TEMP[0].xyyy, TEMP[16].xxxx 833: ELSE :0 834: RCP TEMP[16].x, CONST[15].xxxx 835: MUL TEMP[14].xy, TEMP[14].xyyy, TEMP[16].xxxx 836: ENDIF 837: FRC TEMP[14].xy, TEMP[14].xyyy 838: MUL TEMP[16].x, CONST[17].xxxx, IMM[2].wwww 839: MUL TEMP[16].x, TEMP[16].xxxx, TEMP[15].xxxx 840: ADD TEMP[16].x, IMM[2].zzzz, -TEMP[16].xxxx 841: MUL TEMP[15].x, TEMP[15].xxxx, CONST[17].xxxx 842: MAD TEMP[14].xy, TEMP[14].xyyy, TEMP[16].xxxx, TEMP[15].xxxx 843: MAD TEMP[7].xy, TEMP[14].xyyy, TEMP[7].xxxx, TEMP[11].xyyy 844: MOV TEMP[14].xy, TEMP[7].xyyy 845: MOV TEMP[14].w, TEMP[10].xxxx 846: TXL TEMP[14], TEMP[14], SAMP[9], 2D 847: FSEQ TEMP[15].x, TEMP[11].zzzz, IMM[1].zzzz 848: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz 849: MOV TEMP[16].xy, TEMP[7].xyyy 850: MOV TEMP[16].w, TEMP[10].xxxx 851: TXL TEMP[16], TEMP[16], SAMP[7], 2D 852: FSEQ TEMP[17].x, TEMP[11].zzzz, IMM[3].xxxx 853: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz 854: MOV TEMP[18].xy, TEMP[7].xyyy 855: MOV TEMP[18].w, TEMP[10].xxxx 856: TXL TEMP[18], TEMP[18], SAMP[5], 2D 857: FSEQ TEMP[19].x, TEMP[11].zzzz, IMM[2].wwww 858: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz 859: MOV TEMP[20].xy, TEMP[7].xyyy 860: MOV TEMP[20].w, TEMP[10].xxxx 861: TXL TEMP[20], TEMP[20], SAMP[3], 2D 862: FSEQ TEMP[21].x, TEMP[11].zzzz, IMM[2].zzzz 863: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz 864: MOV TEMP[7].xy, TEMP[7].xyyy 865: MOV TEMP[7].w, TEMP[10].xxxx 866: TXL TEMP[7], TEMP[7], SAMP[1], 2D 867: FSEQ TEMP[11].x, TEMP[11].zzzz, IMM[3].yyyy 868: AND TEMP[11].x, TEMP[11].xxxx, IMM[2].zzzz 869: MUL TEMP[7], TEMP[7], TEMP[11].xxxx 870: MAD TEMP[7], TEMP[20], TEMP[21].xxxx, TEMP[7] 871: MAD TEMP[7], TEMP[18], TEMP[19].xxxx, TEMP[7] 872: MAD TEMP[7], TEMP[16], TEMP[17].xxxx, TEMP[7] 873: MAD TEMP[7].yw, TEMP[14], TEMP[15].xxxx, TEMP[7] 874: MAD TEMP[7].xy, TEMP[7].wyyy, IMM[2].wwww, IMM[3].zzzz 875: DP2 TEMP[11].x, TEMP[7].xyyy, TEMP[7].xyyy 876: MOV_SAT TEMP[35].x, TEMP[11].xxxx 877: MOV TEMP[11].x, IMM[3].yyyy 878: MOV TEMP[11].y, TEMP[9].xxxx 879: MOV TEMP[11].z, TEMP[9].yyyy 880: MOV TEMP[9].y, IMM[3].yyyy 881: MOV TEMP[9].x, TEMP[13].yyyy 882: MOV TEMP[9].z, TEMP[13].xxxx 883: MOV TEMP[13].z, IMM[3].yyyy 884: MOV TEMP[13].xy, TEMP[7].xyxx 885: MUL TEMP[7].xyz, TEMP[11].xyzz, TEMP[4].xxxx 886: MAD TEMP[7].xyz, TEMP[9].xyzz, TEMP[4].yyyy, TEMP[7].xyzz 887: MAD TEMP[7].xyz, TEMP[13].xyzz, TEMP[4].zzzz, TEMP[7].xyzz 888: MOV TEMP[9].xy, IN[3].yxyy 889: MOV TEMP[11].x, IMM[2].xxxx 890: FSNE TEMP[13].x, CONST[13].xxxx, TEMP[8].xxxx 891: UIF TEMP[13].xxxx :0 892: MOV TEMP[11].x, IMM[2].yyyy 893: RCP TEMP[13].x, CONST[16].xxxx 894: MUL TEMP[9].xy, IN[3].yxxx, TEMP[13].xxxx 895: ELSE :0 896: RCP TEMP[13].x, CONST[15].xxxx 897: MUL TEMP[9].xy, TEMP[9].xyyy, TEMP[13].xxxx 898: ENDIF 899: FRC TEMP[9].xy, TEMP[9].xyyy 900: MUL TEMP[13].x, CONST[17].xxxx, IMM[2].wwww 901: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[11].xxxx 902: ADD TEMP[13].x, IMM[2].zzzz, -TEMP[13].xxxx 903: MUL TEMP[11].x, TEMP[11].xxxx, CONST[17].xxxx 904: MAD TEMP[9].xy, TEMP[9].xyyy, TEMP[13].xxxx, TEMP[11].xxxx 905: MAD TEMP[9].xy, TEMP[9].xyyy, TEMP[8].xxxx, TEMP[5].xyyy 906: MOV TEMP[11].xy, TEMP[9].xyyy 907: MOV TEMP[11].w, TEMP[10].xxxx 908: TXL TEMP[11], TEMP[11], SAMP[9], 2D 909: FSEQ TEMP[13].x, TEMP[5].zzzz, IMM[1].zzzz 910: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz 911: MOV TEMP[14].xy, TEMP[9].xyyy 912: MOV TEMP[14].w, TEMP[10].xxxx 913: TXL TEMP[14], TEMP[14], SAMP[7], 2D 914: FSEQ TEMP[15].x, TEMP[5].zzzz, IMM[3].xxxx 915: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz 916: MOV TEMP[16].xy, TEMP[9].xyyy 917: MOV TEMP[16].w, TEMP[10].xxxx 918: TXL TEMP[16], TEMP[16], SAMP[5], 2D 919: FSEQ TEMP[17].x, TEMP[5].zzzz, IMM[2].wwww 920: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz 921: MOV TEMP[18].xy, TEMP[9].xyyy 922: MOV TEMP[18].w, TEMP[10].xxxx 923: TXL TEMP[18], TEMP[18], SAMP[3], 2D 924: FSEQ TEMP[19].x, TEMP[5].zzzz, IMM[2].zzzz 925: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz 926: MOV TEMP[9].xy, TEMP[9].xyyy 927: MOV TEMP[9].w, TEMP[10].xxxx 928: TXL TEMP[9], TEMP[9], SAMP[1], 2D 929: FSEQ TEMP[20].x, TEMP[5].zzzz, IMM[3].yyyy 930: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz 931: MUL TEMP[9], TEMP[9], TEMP[20].xxxx 932: MAD TEMP[9], TEMP[18], TEMP[19].xxxx, TEMP[9] 933: MAD TEMP[9], TEMP[16], TEMP[17].xxxx, TEMP[9] 934: MAD TEMP[9], TEMP[14], TEMP[15].xxxx, TEMP[9] 935: MAD TEMP[9].yw, TEMP[11], TEMP[13].xxxx, TEMP[9] 936: MAD TEMP[9].xy, TEMP[9].wyyy, IMM[2].wwww, IMM[3].zzzz 937: DP2 TEMP[11].x, TEMP[9].xyyy, TEMP[9].xyyy 938: MOV_SAT TEMP[36].x, TEMP[11].xxxx 939: MOV TEMP[11].xy, TEMP[0].zxzz 940: MOV TEMP[13].x, IMM[2].xxxx 941: FSNE TEMP[14].x, CONST[13].xxxx, TEMP[8].xxxx 942: UIF TEMP[14].xxxx :0 943: MOV TEMP[13].x, IMM[2].yyyy 944: RCP TEMP[14].x, CONST[16].xxxx 945: MUL TEMP[11].xy, TEMP[0].zxxx, TEMP[14].xxxx 946: ELSE :0 947: RCP TEMP[14].x, CONST[15].xxxx 948: MUL TEMP[11].xy, TEMP[11].xyyy, TEMP[14].xxxx 949: ENDIF 950: FRC TEMP[11].xy, TEMP[11].xyyy 951: MUL TEMP[14].x, CONST[17].xxxx, IMM[2].wwww 952: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[13].xxxx 953: ADD TEMP[14].x, IMM[2].zzzz, -TEMP[14].xxxx 954: MUL TEMP[13].x, TEMP[13].xxxx, CONST[17].xxxx 955: MAD TEMP[11].xy, TEMP[11].xyyy, TEMP[14].xxxx, TEMP[13].xxxx 956: MAD TEMP[11].xy, TEMP[11].xyyy, TEMP[8].xxxx, TEMP[5].xyyy 957: MOV TEMP[13].xy, TEMP[11].xyyy 958: MOV TEMP[13].w, TEMP[10].xxxx 959: TXL TEMP[13], TEMP[13], SAMP[9], 2D 960: FSEQ TEMP[14].x, TEMP[5].zzzz, IMM[1].zzzz 961: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz 962: MOV TEMP[15].xy, TEMP[11].xyyy 963: MOV TEMP[15].w, TEMP[10].xxxx 964: TXL TEMP[15], TEMP[15], SAMP[7], 2D 965: FSEQ TEMP[16].x, TEMP[5].zzzz, IMM[3].xxxx 966: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz 967: MOV TEMP[17].xy, TEMP[11].xyyy 968: MOV TEMP[17].w, TEMP[10].xxxx 969: TXL TEMP[17], TEMP[17], SAMP[5], 2D 970: FSEQ TEMP[18].x, TEMP[5].zzzz, IMM[2].wwww 971: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz 972: MOV TEMP[19].xy, TEMP[11].xyyy 973: MOV TEMP[19].w, TEMP[10].xxxx 974: TXL TEMP[19], TEMP[19], SAMP[3], 2D 975: FSEQ TEMP[20].x, TEMP[5].zzzz, IMM[2].zzzz 976: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz 977: MOV TEMP[11].xy, TEMP[11].xyyy 978: MOV TEMP[11].w, TEMP[10].xxxx 979: TXL TEMP[11], TEMP[11], SAMP[1], 2D 980: FSEQ TEMP[21].x, TEMP[5].zzzz, IMM[3].yyyy 981: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz 982: MUL TEMP[11], TEMP[11], TEMP[21].xxxx 983: MAD TEMP[11], TEMP[19], TEMP[20].xxxx, TEMP[11] 984: MAD TEMP[11], TEMP[17], TEMP[18].xxxx, TEMP[11] 985: MAD TEMP[11], TEMP[15], TEMP[16].xxxx, TEMP[11] 986: MAD TEMP[11].yw, TEMP[13], TEMP[14].xxxx, TEMP[11] 987: MAD TEMP[11].xy, TEMP[11].wyyy, IMM[2].wwww, IMM[3].zzzz 988: DP2 TEMP[13].x, TEMP[11].xyyy, TEMP[11].xyyy 989: MOV_SAT TEMP[37].x, TEMP[13].xxxx 990: MOV TEMP[13].xy, TEMP[0].xyxx 991: MOV TEMP[14].x, IMM[2].xxxx 992: FSNE TEMP[15].x, CONST[13].xxxx, TEMP[8].xxxx 993: UIF TEMP[15].xxxx :0 994: MOV TEMP[14].x, IMM[2].yyyy 995: RCP TEMP[15].x, CONST[16].xxxx 996: MUL TEMP[13].xy, TEMP[0].xyyy, TEMP[15].xxxx 997: ELSE :0 998: RCP TEMP[0].x, CONST[15].xxxx 999: MUL TEMP[13].xy, TEMP[13].xyyy, TEMP[0].xxxx 1000: ENDIF 1001: FRC TEMP[0].xy, TEMP[13].xyyy 1002: MUL TEMP[13].x, CONST[17].xxxx, IMM[2].wwww 1003: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[14].xxxx 1004: ADD TEMP[13].x, IMM[2].zzzz, -TEMP[13].xxxx 1005: MUL TEMP[14].x, TEMP[14].xxxx, CONST[17].xxxx 1006: MAD TEMP[0].xy, TEMP[0].xyyy, TEMP[13].xxxx, TEMP[14].xxxx 1007: MAD TEMP[0].xy, TEMP[0].xyyy, TEMP[8].xxxx, TEMP[5].xyyy 1008: MOV TEMP[8].xy, TEMP[0].xyyy 1009: MOV TEMP[8].w, TEMP[10].xxxx 1010: TXL TEMP[8], TEMP[8], SAMP[9], 2D 1011: FSEQ TEMP[13].x, TEMP[5].zzzz, IMM[1].zzzz 1012: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz 1013: MOV TEMP[14].xy, TEMP[0].xyyy 1014: MOV TEMP[14].w, TEMP[10].xxxx 1015: TXL TEMP[14], TEMP[14], SAMP[7], 2D 1016: FSEQ TEMP[15].x, TEMP[5].zzzz, IMM[3].xxxx 1017: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz 1018: MOV TEMP[16].xy, TEMP[0].xyyy 1019: MOV TEMP[16].w, TEMP[10].xxxx 1020: TXL TEMP[16], TEMP[16], SAMP[5], 2D 1021: FSEQ TEMP[17].x, TEMP[5].zzzz, IMM[2].wwww 1022: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz 1023: MOV TEMP[18].xy, TEMP[0].xyyy 1024: MOV TEMP[18].w, TEMP[10].xxxx 1025: TXL TEMP[18], TEMP[18], SAMP[3], 2D 1026: FSEQ TEMP[19].x, TEMP[5].zzzz, IMM[2].zzzz 1027: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz 1028: MOV TEMP[0].xy, TEMP[0].xyyy 1029: MOV TEMP[0].w, TEMP[10].xxxx 1030: TXL TEMP[0], TEMP[0], SAMP[1], 2D 1031: FSEQ TEMP[5].x, TEMP[5].zzzz, IMM[3].yyyy 1032: AND TEMP[5].x, TEMP[5].xxxx, IMM[2].zzzz 1033: MUL TEMP[0], TEMP[0], TEMP[5].xxxx 1034: MAD TEMP[0], TEMP[18], TEMP[19].xxxx, TEMP[0] 1035: MAD TEMP[0], TEMP[16], TEMP[17].xxxx, TEMP[0] 1036: MAD TEMP[0], TEMP[14], TEMP[15].xxxx, TEMP[0] 1037: MAD TEMP[0].yw, TEMP[8], TEMP[13].xxxx, TEMP[0] 1038: MAD TEMP[0].xy, TEMP[0].wyyy, IMM[2].wwww, IMM[3].zzzz 1039: DP2 TEMP[5].x, TEMP[0].xyyy, TEMP[0].xyyy 1040: MOV_SAT TEMP[38].x, TEMP[5].xxxx 1041: MOV TEMP[5].x, IMM[3].yyyy 1042: MOV TEMP[5].y, TEMP[9].xxxx 1043: MOV TEMP[5].z, TEMP[9].yyyy 1044: MOV TEMP[8].y, IMM[3].yyyy 1045: MOV TEMP[8].x, TEMP[11].yyyy 1046: MOV TEMP[8].z, TEMP[11].xxxx 1047: MOV TEMP[9].z, IMM[3].yyyy 1048: MOV TEMP[9].xy, TEMP[0].xyxx 1049: MOV TEMP[0].w, IMM[2].zzzz 1050: MUL TEMP[5].xyz, TEMP[5].xyzz, TEMP[4].xxxx 1051: MAD TEMP[5].xyz, TEMP[8].xyzz, TEMP[4].yyyy, TEMP[5].xyzz 1052: MAD TEMP[4].xyz, TEMP[9].xyzz, TEMP[4].zzzz, TEMP[5].xyzz 1053: MUL TEMP[5].xyz, IN[1].xxxx, TEMP[6].xyzz 1054: MAD TEMP[5].xyz, IN[1].yyyy, TEMP[7].xyzz, TEMP[5].xyzz 1055: MAD TEMP[0].xyz, IN[1].zzzz, TEMP[4].xyzz, TEMP[5].xyzz 1056: DP4 TEMP[4].x, TEMP[0], TEMP[0] 1057: RSQ TEMP[4].x, TEMP[4].xxxx 1058: MUL TEMP[0].xyz, TEMP[0], TEMP[4].xxxx 1059: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[0].wwww 1060: ADD TEMP[0].xyz, IN[2].xyzz, -TEMP[0].xyzz 1061: DP3 TEMP[4].x, TEMP[0].xyzz, TEMP[0].xyzz 1062: RSQ TEMP[4].x, TEMP[4].xxxx 1063: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[4].xxxx 1064: MOV TEMP[4].w, IMM[3].yyyy 1065: MUL TEMP[4].xyz, TEMP[12].xyzz, TEMP[1].xyzz 1066: ADD TEMP[1].xyz, TEMP[2].xyzz, TEMP[3].xyzz 1067: DP3 TEMP[3].x, TEMP[1].xyzz, TEMP[1].xyzz 1068: RSQ TEMP[3].x, TEMP[3].xxxx 1069: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xxxx 1070: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[1].xyzz 1071: MAX TEMP[1].x, IMM[3].wwww, TEMP[1].xxxx 1072: MUL TEMP[3].x, IMM[4].xxxx, IN[1].wwww 1073: POW TEMP[1].x, TEMP[1].xxxx, TEMP[3].xxxx 1074: MOV_SAT TEMP[1].x, TEMP[1].xxxx 1075: MOV TEMP[3].w, IMM[3].yyyy 1076: MOV TEMP[3].xyz, CONST[20].xyzx 1077: MOV TEMP[5].w, IMM[2].zzzz 1078: MUL TEMP[6].x, IMM[2].wwww, TEMP[1].xxxx 1079: ADD TEMP[6].x, IMM[3].xxxx, -TEMP[6].xxxx 1080: MUL TEMP[6].x, TEMP[1].xxxx, TEMP[6].xxxx 1081: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[6].xxxx 1082: MUL TEMP[1].x, TEMP[1].xxxx, IN[1].wwww 1083: MUL TEMP[6].xyz, TEMP[12].xyzz, CONST[2].xyzz 1084: DP3 TEMP[0].x, TEMP[0].xyzz, TEMP[2].xyzz 1085: MOV_SAT TEMP[0].x, TEMP[0].xxxx 1086: MUL TEMP[2], CONST[21], IMM[2].wwww 1087: MAX TEMP[2], TEMP[2], TEMP[3] 1088: MIN TEMP[2].xyz, TEMP[2], IMM[4].yyyz 1089: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[12].xyzz 1090: MAD TEMP[0].xyz, TEMP[6].xyzz, TEMP[0].xxxx, TEMP[2].xyzz 1091: MAD TEMP[0].xyz, CONST[2].xyzz, TEMP[1].xxxx, TEMP[0].xyzz 1092: MUL TEMP[5].xyz, TEMP[0].xyzz, IMM[0].wwww 1093: ADD TEMP[0].xyz, TEMP[4], TEMP[5] 1094: MOV TEMP[4].xyz, TEMP[0].xyzx 1095: MOV TEMP[4].w, IMM[2].zzzz 1096: MOV OUT[0], TEMP[4] 1097: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 272) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 288) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 304) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 320) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 324) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 328) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 336) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 340) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 344) %46 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %47 = load <8 x i32>, <8 x i32> addrspace(2)* %46, align 32, !tbaa !0 %48 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %49 = load <4 x i32>, <4 x i32> addrspace(2)* %48, align 16, !tbaa !0 %50 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %51 = load <8 x i32>, <8 x i32> addrspace(2)* %50, align 32, !tbaa !0 %52 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %53 = load <4 x i32>, <4 x i32> addrspace(2)* %52, align 16, !tbaa !0 %54 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %55 = load <8 x i32>, <8 x i32> addrspace(2)* %54, align 32, !tbaa !0 %56 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %57 = load <4 x i32>, <4 x i32> addrspace(2)* %56, align 16, !tbaa !0 %58 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %59 = load <8 x i32>, <8 x i32> addrspace(2)* %58, align 32, !tbaa !0 %60 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %61 = load <4 x i32>, <4 x i32> addrspace(2)* %60, align 16, !tbaa !0 %62 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %63 = load <8 x i32>, <8 x i32> addrspace(2)* %62, align 32, !tbaa !0 %64 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %65 = load <4 x i32>, <4 x i32> addrspace(2)* %64, align 16, !tbaa !0 %66 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5 %67 = load <8 x i32>, <8 x i32> addrspace(2)* %66, align 32, !tbaa !0 %68 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5 %69 = load <4 x i32>, <4 x i32> addrspace(2)* %68, align 16, !tbaa !0 %70 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 6 %71 = load <8 x i32>, <8 x i32> addrspace(2)* %70, align 32, !tbaa !0 %72 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 6 %73 = load <4 x i32>, <4 x i32> addrspace(2)* %72, align 16, !tbaa !0 %74 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 7 %75 = load <8 x i32>, <8 x i32> addrspace(2)* %74, align 32, !tbaa !0 %76 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 7 %77 = load <4 x i32>, <4 x i32> addrspace(2)* %76, align 16, !tbaa !0 %78 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 8 %79 = load <8 x i32>, <8 x i32> addrspace(2)* %78, align 32, !tbaa !0 %80 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 8 %81 = load <4 x i32>, <4 x i32> addrspace(2)* %80, align 16, !tbaa !0 %82 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 9 %83 = load <8 x i32>, <8 x i32> addrspace(2)* %82, align 32, !tbaa !0 %84 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 9 %85 = load <4 x i32>, <4 x i32> addrspace(2)* %84, align 16, !tbaa !0 %86 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %87 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %88 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %89 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %90 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %91 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %92 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %93 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %94 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %95 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %96 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %97 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %98 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %99 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %100 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %101 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7) %102 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %103 = fmul float %27, %27 %104 = fmul float %28, %28 %105 = fadd float %104, %103 %106 = fmul float %29, %29 %107 = fadd float %105, %106 %108 = call float @llvm.AMDGPU.rsq.clamped.f32(float %107) %109 = fmul float %27, %108 %110 = fmul float %28, %108 %111 = fmul float %29, %108 %112 = fsub float %24, %97 %113 = fsub float %25, %98 %114 = fsub float %26, %99 %115 = fmul float %112, %112 %116 = fmul float %113, %113 %117 = fadd float %116, %115 %118 = fmul float %114, %114 %119 = fadd float %117, %118 %120 = call float @llvm.AMDGPU.rsq.clamped.f32(float %119) %121 = fmul float %112, %120 %122 = fmul float %113, %120 %123 = fmul float %114, %120 %124 = call float @llvm.fabs.f32(float %94) %125 = call float @llvm.fabs.f32(float %95) %126 = call float @llvm.fabs.f32(float %96) %127 = fmul float %124, %124 %128 = fmul float %125, %125 %129 = fadd float %128, %127 %130 = fmul float %126, %126 %131 = fadd float %129, %130 %132 = call float @llvm.AMDGPU.rsq.clamped.f32(float %131) %133 = fmul float %124, %132 %134 = fadd float %133, 0xBFC99999A0000000 %135 = fmul float %125, %132 %136 = fadd float %135, 0xBFC99999A0000000 %137 = fmul float %126, %132 %138 = fadd float %137, 0xBFC99999A0000000 %139 = fmul float %134, 7.000000e+00 %140 = fmul float %136, 7.000000e+00 %141 = fmul float %138, 7.000000e+00 %142 = call float @llvm.maxnum.f32(float %139, float 0x3F847AE140000000) %143 = call float @llvm.maxnum.f32(float %140, float 0x3F847AE140000000) %144 = call float @llvm.maxnum.f32(float %141, float 0x3F847AE140000000) %145 = fadd float %142, %143 %146 = fadd float %145, %144 %147 = fdiv float 1.000000e+00, %146 %148 = fmul float %142, %147 %149 = fmul float %143, %147 %150 = fmul float %144, %147 %151 = fadd float %86, 5.000000e-01 %152 = fadd float %87, 5.000000e-01 %153 = fadd float %88, 5.000000e-01 %154 = call float @llvm.floor.f32(float %151) %155 = call float @llvm.floor.f32(float %152) %156 = call float @llvm.floor.f32(float %153) %157 = fmul float %154, %33 %158 = call float @llvm.floor.f32(float %157) %159 = fmul float %158, %33 %160 = fcmp ult float %154, 6.400000e+01 br i1 %160, label %ELSE, label %IF IF: ; preds = %main_body %161 = fadd float %154, -6.400000e+01 %162 = fmul float %161, %34 %163 = call float @llvm.floor.f32(float %162) %164 = fmul float %163, %34 %165 = call float @llvm.floor.f32(float %162) %166 = fsub float %162, %165 %167 = call float @llvm.floor.f32(float %164) %168 = fsub float %164, %167 %169 = call float @llvm.floor.f32(float %164) %170 = fadd float %169, 4.000000e+00 br label %ENDIF ELSE: ; preds = %main_body %171 = call float @llvm.floor.f32(float %157) %172 = fsub float %157, %171 %173 = call float @llvm.floor.f32(float %159) %174 = fsub float %159, %173 %175 = call float @llvm.floor.f32(float %159) br label %ENDIF ENDIF: ; preds = %ELSE, %IF %temp36.0 = phi float [ %166, %IF ], [ %172, %ELSE ] %temp37.0 = phi float [ %168, %IF ], [ %174, %ELSE ] %temp38.0 = phi float [ %170, %IF ], [ %175, %ELSE ] %temp24.0 = phi float [ %34, %IF ], [ %33, %ELSE ] %176 = fmul float %155, %33 %177 = call float @llvm.floor.f32(float %176) %178 = fmul float %177, %33 %179 = fcmp ult float %155, 6.400000e+01 br i1 %179, label %ELSE158, label %IF157 IF157: ; preds = %ENDIF %180 = fadd float %155, -6.400000e+01 %181 = fmul float %180, %34 %182 = call float @llvm.floor.f32(float %181) %183 = fmul float %182, %34 %184 = call float @llvm.floor.f32(float %181) %185 = fsub float %181, %184 %186 = call float @llvm.floor.f32(float %183) %187 = fsub float %183, %186 %188 = call float @llvm.floor.f32(float %183) %189 = fadd float %188, 4.000000e+00 br label %ENDIF156 ELSE158: ; preds = %ENDIF %190 = call float @llvm.floor.f32(float %176) %191 = fsub float %176, %190 %192 = call float @llvm.floor.f32(float %178) %193 = fsub float %178, %192 %194 = call float @llvm.floor.f32(float %178) br label %ENDIF156 ENDIF156: ; preds = %ELSE158, %IF157 %temp44.0 = phi float [ %185, %IF157 ], [ %191, %ELSE158 ] %temp45.0 = phi float [ %187, %IF157 ], [ %193, %ELSE158 ] %temp46.0 = phi float [ %189, %IF157 ], [ %194, %ELSE158 ] %temp28.0 = phi float [ %34, %IF157 ], [ %33, %ELSE158 ] %195 = fmul float %156, %33 %196 = call float @llvm.floor.f32(float %195) %197 = fmul float %196, %33 %198 = fcmp ult float %156, 6.400000e+01 br i1 %198, label %ELSE161, label %IF160 IF160: ; preds = %ENDIF156 %199 = fadd float %156, -6.400000e+01 %200 = fmul float %199, %34 %201 = call float @llvm.floor.f32(float %200) %202 = fmul float %201, %34 %203 = call float @llvm.floor.f32(float %200) %204 = fsub float %200, %203 %205 = call float @llvm.floor.f32(float %202) %206 = fsub float %202, %205 %207 = call float @llvm.floor.f32(float %202) %208 = fadd float %207, 4.000000e+00 br label %ENDIF159 ELSE161: ; preds = %ENDIF156 %209 = call float @llvm.floor.f32(float %195) %210 = fsub float %195, %209 %211 = call float @llvm.floor.f32(float %197) %212 = fsub float %197, %211 %213 = call float @llvm.floor.f32(float %197) br label %ENDIF159 ENDIF159: ; preds = %ELSE161, %IF160 %temp32.0 = phi float [ %34, %IF160 ], [ %33, %ELSE161 ] %temp22.0 = phi float [ %208, %IF160 ], [ %213, %ELSE161 ] %temp21.0 = phi float [ %206, %IF160 ], [ %212, %ELSE161 ] %temp20.0 = phi float [ %204, %IF160 ], [ %210, %ELSE161 ] %214 = fsub float %97, %24 %215 = fsub float %98, %25 %216 = fsub float %99, %26 %217 = fmul float %214, %214 %218 = fmul float %215, %215 %219 = fadd float %218, %217 %220 = fmul float %216, %216 %221 = fadd float %219, %220 %222 = fmul float %39, %221 %223 = call float @llvm.log2.f32(float %222) %224 = fmul float %223, 0x3FE62E4300000000 %225 = fmul float %224, %38 %226 = fcmp une float %33, %temp24.0 %.sink216 = select i1 %226, float %36, float %35 %temp52.0 = select i1 %226, float 1.953125e-03, float 3.906250e-03 %227 = fdiv float 1.000000e+00, %.sink216 %228 = fmul float %97, %227 %229 = fmul float %98, %227 %230 = call float @llvm.floor.f32(float %228) %231 = fsub float %228, %230 %232 = call float @llvm.floor.f32(float %229) %233 = fsub float %229, %232 %234 = fmul float %37, 2.000000e+00 %235 = fmul float %234, %temp52.0 %236 = fsub float 1.000000e+00, %235 %237 = fmul float %temp52.0, %37 %238 = fmul float %231, %236 %239 = fadd float %238, %237 %240 = fmul float %233, %236 %241 = fadd float %240, %237 %242 = fmul float %239, %temp24.0 %243 = fadd float %242, %temp36.0 %244 = fmul float %241, %temp24.0 %245 = fadd float %244, %temp37.0 %246 = bitcast float %243 to i32 %247 = bitcast float %245 to i32 %248 = bitcast float %225 to i32 %249 = insertelement <4 x i32> undef, i32 %246, i32 0 %250 = insertelement <4 x i32> %249, i32 %247, i32 1 %251 = insertelement <4 x i32> %250, i32 %248, i32 2 %252 = bitcast <8 x i32> %79 to <32 x i8> %253 = bitcast <4 x i32> %81 to <16 x i8> %254 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %251, <32 x i8> %252, <16 x i8> %253, i32 2) %255 = extractelement <4 x float> %254, i32 0 %256 = extractelement <4 x float> %254, i32 1 %257 = extractelement <4 x float> %254, i32 2 %258 = fcmp oeq float %temp38.0, 4.000000e+00 %259 = select i1 %258, float 1.000000e+00, float 0.000000e+00 %260 = bitcast float %243 to i32 %261 = bitcast float %245 to i32 %262 = bitcast float %225 to i32 %263 = insertelement <4 x i32> undef, i32 %260, i32 0 %264 = insertelement <4 x i32> %263, i32 %261, i32 1 %265 = insertelement <4 x i32> %264, i32 %262, i32 2 %266 = bitcast <8 x i32> %71 to <32 x i8> %267 = bitcast <4 x i32> %73 to <16 x i8> %268 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %265, <32 x i8> %266, <16 x i8> %267, i32 2) %269 = extractelement <4 x float> %268, i32 0 %270 = extractelement <4 x float> %268, i32 1 %271 = extractelement <4 x float> %268, i32 2 %272 = fcmp oeq float %temp38.0, 3.000000e+00 %273 = select i1 %272, float 1.000000e+00, float 0.000000e+00 %274 = bitcast float %243 to i32 %275 = bitcast float %245 to i32 %276 = bitcast float %225 to i32 %277 = insertelement <4 x i32> undef, i32 %274, i32 0 %278 = insertelement <4 x i32> %277, i32 %275, i32 1 %279 = insertelement <4 x i32> %278, i32 %276, i32 2 %280 = bitcast <8 x i32> %63 to <32 x i8> %281 = bitcast <4 x i32> %65 to <16 x i8> %282 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %279, <32 x i8> %280, <16 x i8> %281, i32 2) %283 = extractelement <4 x float> %282, i32 0 %284 = extractelement <4 x float> %282, i32 1 %285 = extractelement <4 x float> %282, i32 2 %286 = fcmp oeq float %temp38.0, 2.000000e+00 %287 = select i1 %286, float 1.000000e+00, float 0.000000e+00 %288 = bitcast float %243 to i32 %289 = bitcast float %245 to i32 %290 = bitcast float %225 to i32 %291 = insertelement <4 x i32> undef, i32 %288, i32 0 %292 = insertelement <4 x i32> %291, i32 %289, i32 1 %293 = insertelement <4 x i32> %292, i32 %290, i32 2 %294 = bitcast <8 x i32> %55 to <32 x i8> %295 = bitcast <4 x i32> %57 to <16 x i8> %296 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %293, <32 x i8> %294, <16 x i8> %295, i32 2) %297 = extractelement <4 x float> %296, i32 0 %298 = extractelement <4 x float> %296, i32 1 %299 = extractelement <4 x float> %296, i32 2 %300 = fcmp oeq float %temp38.0, 1.000000e+00 %301 = select i1 %300, float 1.000000e+00, float 0.000000e+00 %302 = bitcast float %243 to i32 %303 = bitcast float %245 to i32 %304 = bitcast float %225 to i32 %305 = insertelement <4 x i32> undef, i32 %302, i32 0 %306 = insertelement <4 x i32> %305, i32 %303, i32 1 %307 = insertelement <4 x i32> %306, i32 %304, i32 2 %308 = bitcast <8 x i32> %47 to <32 x i8> %309 = bitcast <4 x i32> %49 to <16 x i8> %310 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %307, <32 x i8> %308, <16 x i8> %309, i32 2) %311 = extractelement <4 x float> %310, i32 0 %312 = extractelement <4 x float> %310, i32 1 %313 = extractelement <4 x float> %310, i32 2 %314 = fcmp oeq float %temp38.0, 0.000000e+00 %315 = select i1 %314, float 1.000000e+00, float 0.000000e+00 %316 = fmul float %311, %315 %317 = fmul float %312, %315 %318 = fmul float %313, %315 %319 = fmul float %297, %301 %320 = fadd float %319, %316 %321 = fmul float %298, %301 %322 = fadd float %321, %317 %323 = fmul float %299, %301 %324 = fadd float %323, %318 %325 = fmul float %283, %287 %326 = fadd float %325, %320 %327 = fmul float %284, %287 %328 = fadd float %327, %322 %329 = fmul float %285, %287 %330 = fadd float %329, %324 %331 = fmul float %269, %273 %332 = fadd float %331, %326 %333 = fmul float %270, %273 %334 = fadd float %333, %328 %335 = fmul float %271, %273 %336 = fadd float %335, %330 %337 = fmul float %255, %259 %338 = fadd float %337, %332 %339 = fmul float %256, %259 %340 = fadd float %339, %334 %341 = fmul float %257, %259 %342 = fadd float %341, %336 %343 = fcmp une float %33, %temp24.0 %.sink217 = select i1 %343, float %36, float %35 %temp56.0 = select i1 %343, float 1.953125e-03, float 3.906250e-03 %344 = fdiv float 1.000000e+00, %.sink217 %345 = fmul float %99, %344 %346 = fmul float %98, %344 %347 = call float @llvm.floor.f32(float %345) %348 = fsub float %345, %347 %349 = call float @llvm.floor.f32(float %346) %350 = fsub float %346, %349 %351 = fmul float %37, 2.000000e+00 %352 = fmul float %351, %temp56.0 %353 = fsub float 1.000000e+00, %352 %354 = fmul float %temp56.0, %37 %355 = fmul float %348, %353 %356 = fadd float %355, %354 %357 = fmul float %350, %353 %358 = fadd float %357, %354 %359 = fmul float %356, %temp24.0 %360 = fadd float %359, %temp36.0 %361 = fmul float %358, %temp24.0 %362 = fadd float %361, %temp37.0 %363 = bitcast float %360 to i32 %364 = bitcast float %362 to i32 %365 = bitcast float %225 to i32 %366 = insertelement <4 x i32> undef, i32 %363, i32 0 %367 = insertelement <4 x i32> %366, i32 %364, i32 1 %368 = insertelement <4 x i32> %367, i32 %365, i32 2 %369 = bitcast <8 x i32> %79 to <32 x i8> %370 = bitcast <4 x i32> %81 to <16 x i8> %371 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %368, <32 x i8> %369, <16 x i8> %370, i32 2) %372 = extractelement <4 x float> %371, i32 0 %373 = extractelement <4 x float> %371, i32 1 %374 = extractelement <4 x float> %371, i32 2 %375 = fcmp oeq float %temp38.0, 4.000000e+00 %376 = select i1 %375, float 1.000000e+00, float 0.000000e+00 %377 = bitcast float %360 to i32 %378 = bitcast float %362 to i32 %379 = bitcast float %225 to i32 %380 = insertelement <4 x i32> undef, i32 %377, i32 0 %381 = insertelement <4 x i32> %380, i32 %378, i32 1 %382 = insertelement <4 x i32> %381, i32 %379, i32 2 %383 = bitcast <8 x i32> %71 to <32 x i8> %384 = bitcast <4 x i32> %73 to <16 x i8> %385 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %382, <32 x i8> %383, <16 x i8> %384, i32 2) %386 = extractelement <4 x float> %385, i32 0 %387 = extractelement <4 x float> %385, i32 1 %388 = extractelement <4 x float> %385, i32 2 %389 = fcmp oeq float %temp38.0, 3.000000e+00 %390 = select i1 %389, float 1.000000e+00, float 0.000000e+00 %391 = bitcast float %360 to i32 %392 = bitcast float %362 to i32 %393 = bitcast float %225 to i32 %394 = insertelement <4 x i32> undef, i32 %391, i32 0 %395 = insertelement <4 x i32> %394, i32 %392, i32 1 %396 = insertelement <4 x i32> %395, i32 %393, i32 2 %397 = bitcast <8 x i32> %63 to <32 x i8> %398 = bitcast <4 x i32> %65 to <16 x i8> %399 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %396, <32 x i8> %397, <16 x i8> %398, i32 2) %400 = extractelement <4 x float> %399, i32 0 %401 = extractelement <4 x float> %399, i32 1 %402 = extractelement <4 x float> %399, i32 2 %403 = fcmp oeq float %temp38.0, 2.000000e+00 %404 = select i1 %403, float 1.000000e+00, float 0.000000e+00 %405 = bitcast float %360 to i32 %406 = bitcast float %362 to i32 %407 = bitcast float %225 to i32 %408 = insertelement <4 x i32> undef, i32 %405, i32 0 %409 = insertelement <4 x i32> %408, i32 %406, i32 1 %410 = insertelement <4 x i32> %409, i32 %407, i32 2 %411 = bitcast <8 x i32> %55 to <32 x i8> %412 = bitcast <4 x i32> %57 to <16 x i8> %413 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %410, <32 x i8> %411, <16 x i8> %412, i32 2) %414 = extractelement <4 x float> %413, i32 0 %415 = extractelement <4 x float> %413, i32 1 %416 = extractelement <4 x float> %413, i32 2 %417 = fcmp oeq float %temp38.0, 1.000000e+00 %418 = select i1 %417, float 1.000000e+00, float 0.000000e+00 %419 = bitcast float %360 to i32 %420 = bitcast float %362 to i32 %421 = bitcast float %225 to i32 %422 = insertelement <4 x i32> undef, i32 %419, i32 0 %423 = insertelement <4 x i32> %422, i32 %420, i32 1 %424 = insertelement <4 x i32> %423, i32 %421, i32 2 %425 = bitcast <8 x i32> %47 to <32 x i8> %426 = bitcast <4 x i32> %49 to <16 x i8> %427 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %424, <32 x i8> %425, <16 x i8> %426, i32 2) %428 = extractelement <4 x float> %427, i32 0 %429 = extractelement <4 x float> %427, i32 1 %430 = extractelement <4 x float> %427, i32 2 %431 = fcmp oeq float %temp38.0, 0.000000e+00 %432 = select i1 %431, float 1.000000e+00, float 0.000000e+00 %433 = fmul float %428, %432 %434 = fmul float %429, %432 %435 = fmul float %430, %432 %436 = fmul float %414, %418 %437 = fadd float %436, %433 %438 = fmul float %415, %418 %439 = fadd float %438, %434 %440 = fmul float %416, %418 %441 = fadd float %440, %435 %442 = fmul float %400, %404 %443 = fadd float %442, %437 %444 = fmul float %401, %404 %445 = fadd float %444, %439 %446 = fmul float %402, %404 %447 = fadd float %446, %441 %448 = fmul float %386, %390 %449 = fadd float %448, %443 %450 = fmul float %387, %390 %451 = fadd float %450, %445 %452 = fmul float %388, %390 %453 = fadd float %452, %447 %454 = fmul float %372, %376 %455 = fadd float %454, %449 %456 = fmul float %373, %376 %457 = fadd float %456, %451 %458 = fmul float %374, %376 %459 = fadd float %458, %453 %460 = fcmp une float %33, %temp24.0 %.sink218 = select i1 %460, float %36, float %35 %temp60.0 = select i1 %460, float 1.953125e-03, float 3.906250e-03 %461 = fdiv float 1.000000e+00, %.sink218 %462 = fmul float %99, %461 %463 = fmul float %97, %461 %464 = call float @llvm.floor.f32(float %462) %465 = fsub float %462, %464 %466 = call float @llvm.floor.f32(float %463) %467 = fsub float %463, %466 %468 = fmul float %37, 2.000000e+00 %469 = fmul float %468, %temp60.0 %470 = fsub float 1.000000e+00, %469 %471 = fmul float %temp60.0, %37 %472 = fmul float %465, %470 %473 = fadd float %472, %471 %474 = fmul float %467, %470 %475 = fadd float %474, %471 %476 = fmul float %473, %temp24.0 %477 = fadd float %476, %temp36.0 %478 = fmul float %475, %temp24.0 %479 = fadd float %478, %temp37.0 %480 = bitcast float %477 to i32 %481 = bitcast float %479 to i32 %482 = bitcast float %225 to i32 %483 = insertelement <4 x i32> undef, i32 %480, i32 0 %484 = insertelement <4 x i32> %483, i32 %481, i32 1 %485 = insertelement <4 x i32> %484, i32 %482, i32 2 %486 = bitcast <8 x i32> %79 to <32 x i8> %487 = bitcast <4 x i32> %81 to <16 x i8> %488 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %485, <32 x i8> %486, <16 x i8> %487, i32 2) %489 = extractelement <4 x float> %488, i32 0 %490 = extractelement <4 x float> %488, i32 1 %491 = extractelement <4 x float> %488, i32 2 %492 = fcmp oeq float %temp38.0, 4.000000e+00 %493 = select i1 %492, float 1.000000e+00, float 0.000000e+00 %494 = bitcast float %477 to i32 %495 = bitcast float %479 to i32 %496 = bitcast float %225 to i32 %497 = insertelement <4 x i32> undef, i32 %494, i32 0 %498 = insertelement <4 x i32> %497, i32 %495, i32 1 %499 = insertelement <4 x i32> %498, i32 %496, i32 2 %500 = bitcast <8 x i32> %71 to <32 x i8> %501 = bitcast <4 x i32> %73 to <16 x i8> %502 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %499, <32 x i8> %500, <16 x i8> %501, i32 2) %503 = extractelement <4 x float> %502, i32 0 %504 = extractelement <4 x float> %502, i32 1 %505 = extractelement <4 x float> %502, i32 2 %506 = fcmp oeq float %temp38.0, 3.000000e+00 %507 = select i1 %506, float 1.000000e+00, float 0.000000e+00 %508 = bitcast float %477 to i32 %509 = bitcast float %479 to i32 %510 = bitcast float %225 to i32 %511 = insertelement <4 x i32> undef, i32 %508, i32 0 %512 = insertelement <4 x i32> %511, i32 %509, i32 1 %513 = insertelement <4 x i32> %512, i32 %510, i32 2 %514 = bitcast <8 x i32> %63 to <32 x i8> %515 = bitcast <4 x i32> %65 to <16 x i8> %516 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %513, <32 x i8> %514, <16 x i8> %515, i32 2) %517 = extractelement <4 x float> %516, i32 0 %518 = extractelement <4 x float> %516, i32 1 %519 = extractelement <4 x float> %516, i32 2 %520 = fcmp oeq float %temp38.0, 2.000000e+00 %521 = select i1 %520, float 1.000000e+00, float 0.000000e+00 %522 = bitcast float %477 to i32 %523 = bitcast float %479 to i32 %524 = bitcast float %225 to i32 %525 = insertelement <4 x i32> undef, i32 %522, i32 0 %526 = insertelement <4 x i32> %525, i32 %523, i32 1 %527 = insertelement <4 x i32> %526, i32 %524, i32 2 %528 = bitcast <8 x i32> %55 to <32 x i8> %529 = bitcast <4 x i32> %57 to <16 x i8> %530 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %527, <32 x i8> %528, <16 x i8> %529, i32 2) %531 = extractelement <4 x float> %530, i32 0 %532 = extractelement <4 x float> %530, i32 1 %533 = extractelement <4 x float> %530, i32 2 %534 = fcmp oeq float %temp38.0, 1.000000e+00 %535 = select i1 %534, float 1.000000e+00, float 0.000000e+00 %536 = bitcast float %477 to i32 %537 = bitcast float %479 to i32 %538 = bitcast float %225 to i32 %539 = insertelement <4 x i32> undef, i32 %536, i32 0 %540 = insertelement <4 x i32> %539, i32 %537, i32 1 %541 = insertelement <4 x i32> %540, i32 %538, i32 2 %542 = bitcast <8 x i32> %47 to <32 x i8> %543 = bitcast <4 x i32> %49 to <16 x i8> %544 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %541, <32 x i8> %542, <16 x i8> %543, i32 2) %545 = extractelement <4 x float> %544, i32 0 %546 = extractelement <4 x float> %544, i32 1 %547 = extractelement <4 x float> %544, i32 2 %548 = fcmp oeq float %temp38.0, 0.000000e+00 %549 = select i1 %548, float 1.000000e+00, float 0.000000e+00 %550 = fmul float %545, %549 %551 = fmul float %546, %549 %552 = fmul float %547, %549 %553 = fmul float %531, %535 %554 = fadd float %553, %550 %555 = fmul float %532, %535 %556 = fadd float %555, %551 %557 = fmul float %533, %535 %558 = fadd float %557, %552 %559 = fmul float %517, %521 %560 = fadd float %559, %554 %561 = fmul float %518, %521 %562 = fadd float %561, %556 %563 = fmul float %519, %521 %564 = fadd float %563, %558 %565 = fmul float %503, %507 %566 = fadd float %565, %560 %567 = fmul float %504, %507 %568 = fadd float %567, %562 %569 = fmul float %505, %507 %570 = fadd float %569, %564 %571 = fmul float %489, %493 %572 = fadd float %571, %566 %573 = fmul float %490, %493 %574 = fadd float %573, %568 %575 = fmul float %491, %493 %576 = fadd float %575, %570 %577 = fcmp une float %33, %temp28.0 %.sink219 = select i1 %577, float %36, float %35 %temp64.0 = select i1 %577, float 1.953125e-03, float 3.906250e-03 %578 = fdiv float 1.000000e+00, %.sink219 %579 = fmul float %97, %578 %580 = fmul float %98, %578 %581 = call float @llvm.floor.f32(float %579) %582 = fsub float %579, %581 %583 = call float @llvm.floor.f32(float %580) %584 = fsub float %580, %583 %585 = fmul float %37, 2.000000e+00 %586 = fmul float %585, %temp64.0 %587 = fsub float 1.000000e+00, %586 %588 = fmul float %temp64.0, %37 %589 = fmul float %582, %587 %590 = fadd float %589, %588 %591 = fmul float %584, %587 %592 = fadd float %591, %588 %593 = fmul float %590, %temp28.0 %594 = fadd float %593, %temp44.0 %595 = fmul float %592, %temp28.0 %596 = fadd float %595, %temp45.0 %597 = bitcast float %594 to i32 %598 = bitcast float %596 to i32 %599 = bitcast float %225 to i32 %600 = insertelement <4 x i32> undef, i32 %597, i32 0 %601 = insertelement <4 x i32> %600, i32 %598, i32 1 %602 = insertelement <4 x i32> %601, i32 %599, i32 2 %603 = bitcast <8 x i32> %79 to <32 x i8> %604 = bitcast <4 x i32> %81 to <16 x i8> %605 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %602, <32 x i8> %603, <16 x i8> %604, i32 2) %606 = extractelement <4 x float> %605, i32 0 %607 = extractelement <4 x float> %605, i32 1 %608 = extractelement <4 x float> %605, i32 2 %609 = fcmp oeq float %temp46.0, 4.000000e+00 %610 = select i1 %609, float 1.000000e+00, float 0.000000e+00 %611 = bitcast float %594 to i32 %612 = bitcast float %596 to i32 %613 = bitcast float %225 to i32 %614 = insertelement <4 x i32> undef, i32 %611, i32 0 %615 = insertelement <4 x i32> %614, i32 %612, i32 1 %616 = insertelement <4 x i32> %615, i32 %613, i32 2 %617 = bitcast <8 x i32> %71 to <32 x i8> %618 = bitcast <4 x i32> %73 to <16 x i8> %619 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %616, <32 x i8> %617, <16 x i8> %618, i32 2) %620 = extractelement <4 x float> %619, i32 0 %621 = extractelement <4 x float> %619, i32 1 %622 = extractelement <4 x float> %619, i32 2 %623 = fcmp oeq float %temp46.0, 3.000000e+00 %624 = select i1 %623, float 1.000000e+00, float 0.000000e+00 %625 = bitcast float %594 to i32 %626 = bitcast float %596 to i32 %627 = bitcast float %225 to i32 %628 = insertelement <4 x i32> undef, i32 %625, i32 0 %629 = insertelement <4 x i32> %628, i32 %626, i32 1 %630 = insertelement <4 x i32> %629, i32 %627, i32 2 %631 = bitcast <8 x i32> %63 to <32 x i8> %632 = bitcast <4 x i32> %65 to <16 x i8> %633 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %630, <32 x i8> %631, <16 x i8> %632, i32 2) %634 = extractelement <4 x float> %633, i32 0 %635 = extractelement <4 x float> %633, i32 1 %636 = extractelement <4 x float> %633, i32 2 %637 = fcmp oeq float %temp46.0, 2.000000e+00 %638 = select i1 %637, float 1.000000e+00, float 0.000000e+00 %639 = bitcast float %594 to i32 %640 = bitcast float %596 to i32 %641 = bitcast float %225 to i32 %642 = insertelement <4 x i32> undef, i32 %639, i32 0 %643 = insertelement <4 x i32> %642, i32 %640, i32 1 %644 = insertelement <4 x i32> %643, i32 %641, i32 2 %645 = bitcast <8 x i32> %55 to <32 x i8> %646 = bitcast <4 x i32> %57 to <16 x i8> %647 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %644, <32 x i8> %645, <16 x i8> %646, i32 2) %648 = extractelement <4 x float> %647, i32 0 %649 = extractelement <4 x float> %647, i32 1 %650 = extractelement <4 x float> %647, i32 2 %651 = fcmp oeq float %temp46.0, 1.000000e+00 %652 = select i1 %651, float 1.000000e+00, float 0.000000e+00 %653 = bitcast float %594 to i32 %654 = bitcast float %596 to i32 %655 = bitcast float %225 to i32 %656 = insertelement <4 x i32> undef, i32 %653, i32 0 %657 = insertelement <4 x i32> %656, i32 %654, i32 1 %658 = insertelement <4 x i32> %657, i32 %655, i32 2 %659 = bitcast <8 x i32> %47 to <32 x i8> %660 = bitcast <4 x i32> %49 to <16 x i8> %661 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %658, <32 x i8> %659, <16 x i8> %660, i32 2) %662 = extractelement <4 x float> %661, i32 0 %663 = extractelement <4 x float> %661, i32 1 %664 = extractelement <4 x float> %661, i32 2 %665 = fcmp oeq float %temp46.0, 0.000000e+00 %666 = select i1 %665, float 1.000000e+00, float 0.000000e+00 %667 = fmul float %662, %666 %668 = fmul float %663, %666 %669 = fmul float %664, %666 %670 = fmul float %648, %652 %671 = fadd float %670, %667 %672 = fmul float %649, %652 %673 = fadd float %672, %668 %674 = fmul float %650, %652 %675 = fadd float %674, %669 %676 = fmul float %634, %638 %677 = fadd float %676, %671 %678 = fmul float %635, %638 %679 = fadd float %678, %673 %680 = fmul float %636, %638 %681 = fadd float %680, %675 %682 = fmul float %620, %624 %683 = fadd float %682, %677 %684 = fmul float %621, %624 %685 = fadd float %684, %679 %686 = fmul float %622, %624 %687 = fadd float %686, %681 %688 = fmul float %606, %610 %689 = fadd float %688, %683 %690 = fmul float %607, %610 %691 = fadd float %690, %685 %692 = fmul float %608, %610 %693 = fadd float %692, %687 %694 = fcmp une float %33, %temp28.0 %.sink220 = select i1 %694, float %36, float %35 %temp68.0 = select i1 %694, float 1.953125e-03, float 3.906250e-03 %695 = fdiv float 1.000000e+00, %.sink220 %696 = fmul float %99, %695 %697 = fmul float %98, %695 %698 = call float @llvm.floor.f32(float %696) %699 = fsub float %696, %698 %700 = call float @llvm.floor.f32(float %697) %701 = fsub float %697, %700 %702 = fmul float %37, 2.000000e+00 %703 = fmul float %702, %temp68.0 %704 = fsub float 1.000000e+00, %703 %705 = fmul float %temp68.0, %37 %706 = fmul float %699, %704 %707 = fadd float %706, %705 %708 = fmul float %701, %704 %709 = fadd float %708, %705 %710 = fmul float %707, %temp28.0 %711 = fadd float %710, %temp44.0 %712 = fmul float %709, %temp28.0 %713 = fadd float %712, %temp45.0 %714 = bitcast float %711 to i32 %715 = bitcast float %713 to i32 %716 = bitcast float %225 to i32 %717 = insertelement <4 x i32> undef, i32 %714, i32 0 %718 = insertelement <4 x i32> %717, i32 %715, i32 1 %719 = insertelement <4 x i32> %718, i32 %716, i32 2 %720 = bitcast <8 x i32> %79 to <32 x i8> %721 = bitcast <4 x i32> %81 to <16 x i8> %722 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %719, <32 x i8> %720, <16 x i8> %721, i32 2) %723 = extractelement <4 x float> %722, i32 0 %724 = extractelement <4 x float> %722, i32 1 %725 = extractelement <4 x float> %722, i32 2 %726 = fcmp oeq float %temp46.0, 4.000000e+00 %727 = select i1 %726, float 1.000000e+00, float 0.000000e+00 %728 = bitcast float %711 to i32 %729 = bitcast float %713 to i32 %730 = bitcast float %225 to i32 %731 = insertelement <4 x i32> undef, i32 %728, i32 0 %732 = insertelement <4 x i32> %731, i32 %729, i32 1 %733 = insertelement <4 x i32> %732, i32 %730, i32 2 %734 = bitcast <8 x i32> %71 to <32 x i8> %735 = bitcast <4 x i32> %73 to <16 x i8> %736 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %733, <32 x i8> %734, <16 x i8> %735, i32 2) %737 = extractelement <4 x float> %736, i32 0 %738 = extractelement <4 x float> %736, i32 1 %739 = extractelement <4 x float> %736, i32 2 %740 = fcmp oeq float %temp46.0, 3.000000e+00 %741 = select i1 %740, float 1.000000e+00, float 0.000000e+00 %742 = bitcast float %711 to i32 %743 = bitcast float %713 to i32 %744 = bitcast float %225 to i32 %745 = insertelement <4 x i32> undef, i32 %742, i32 0 %746 = insertelement <4 x i32> %745, i32 %743, i32 1 %747 = insertelement <4 x i32> %746, i32 %744, i32 2 %748 = bitcast <8 x i32> %63 to <32 x i8> %749 = bitcast <4 x i32> %65 to <16 x i8> %750 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %747, <32 x i8> %748, <16 x i8> %749, i32 2) %751 = extractelement <4 x float> %750, i32 0 %752 = extractelement <4 x float> %750, i32 1 %753 = extractelement <4 x float> %750, i32 2 %754 = fcmp oeq float %temp46.0, 2.000000e+00 %755 = select i1 %754, float 1.000000e+00, float 0.000000e+00 %756 = bitcast float %711 to i32 %757 = bitcast float %713 to i32 %758 = bitcast float %225 to i32 %759 = insertelement <4 x i32> undef, i32 %756, i32 0 %760 = insertelement <4 x i32> %759, i32 %757, i32 1 %761 = insertelement <4 x i32> %760, i32 %758, i32 2 %762 = bitcast <8 x i32> %55 to <32 x i8> %763 = bitcast <4 x i32> %57 to <16 x i8> %764 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %761, <32 x i8> %762, <16 x i8> %763, i32 2) %765 = extractelement <4 x float> %764, i32 0 %766 = extractelement <4 x float> %764, i32 1 %767 = extractelement <4 x float> %764, i32 2 %768 = fcmp oeq float %temp46.0, 1.000000e+00 %769 = select i1 %768, float 1.000000e+00, float 0.000000e+00 %770 = bitcast float %711 to i32 %771 = bitcast float %713 to i32 %772 = bitcast float %225 to i32 %773 = insertelement <4 x i32> undef, i32 %770, i32 0 %774 = insertelement <4 x i32> %773, i32 %771, i32 1 %775 = insertelement <4 x i32> %774, i32 %772, i32 2 %776 = bitcast <8 x i32> %47 to <32 x i8> %777 = bitcast <4 x i32> %49 to <16 x i8> %778 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %775, <32 x i8> %776, <16 x i8> %777, i32 2) %779 = extractelement <4 x float> %778, i32 0 %780 = extractelement <4 x float> %778, i32 1 %781 = extractelement <4 x float> %778, i32 2 %782 = fcmp oeq float %temp46.0, 0.000000e+00 %783 = select i1 %782, float 1.000000e+00, float 0.000000e+00 %784 = fmul float %779, %783 %785 = fmul float %780, %783 %786 = fmul float %781, %783 %787 = fmul float %765, %769 %788 = fadd float %787, %784 %789 = fmul float %766, %769 %790 = fadd float %789, %785 %791 = fmul float %767, %769 %792 = fadd float %791, %786 %793 = fmul float %751, %755 %794 = fadd float %793, %788 %795 = fmul float %752, %755 %796 = fadd float %795, %790 %797 = fmul float %753, %755 %798 = fadd float %797, %792 %799 = fmul float %737, %741 %800 = fadd float %799, %794 %801 = fmul float %738, %741 %802 = fadd float %801, %796 %803 = fmul float %739, %741 %804 = fadd float %803, %798 %805 = fmul float %723, %727 %806 = fadd float %805, %800 %807 = fmul float %724, %727 %808 = fadd float %807, %802 %809 = fmul float %725, %727 %810 = fadd float %809, %804 %811 = fcmp une float %33, %temp28.0 %.sink221 = select i1 %811, float %36, float %35 %temp72.0 = select i1 %811, float 1.953125e-03, float 3.906250e-03 %812 = fdiv float 1.000000e+00, %.sink221 %813 = fmul float %99, %812 %814 = fmul float %97, %812 %815 = call float @llvm.floor.f32(float %813) %816 = fsub float %813, %815 %817 = call float @llvm.floor.f32(float %814) %818 = fsub float %814, %817 %819 = fmul float %37, 2.000000e+00 %820 = fmul float %819, %temp72.0 %821 = fsub float 1.000000e+00, %820 %822 = fmul float %temp72.0, %37 %823 = fmul float %816, %821 %824 = fadd float %823, %822 %825 = fmul float %818, %821 %826 = fadd float %825, %822 %827 = fmul float %824, %temp28.0 %828 = fadd float %827, %temp44.0 %829 = fmul float %826, %temp28.0 %830 = fadd float %829, %temp45.0 %831 = bitcast float %828 to i32 %832 = bitcast float %830 to i32 %833 = bitcast float %225 to i32 %834 = insertelement <4 x i32> undef, i32 %831, i32 0 %835 = insertelement <4 x i32> %834, i32 %832, i32 1 %836 = insertelement <4 x i32> %835, i32 %833, i32 2 %837 = bitcast <8 x i32> %79 to <32 x i8> %838 = bitcast <4 x i32> %81 to <16 x i8> %839 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %836, <32 x i8> %837, <16 x i8> %838, i32 2) %840 = extractelement <4 x float> %839, i32 0 %841 = extractelement <4 x float> %839, i32 1 %842 = extractelement <4 x float> %839, i32 2 %843 = fcmp oeq float %temp46.0, 4.000000e+00 %844 = select i1 %843, float 1.000000e+00, float 0.000000e+00 %845 = bitcast float %828 to i32 %846 = bitcast float %830 to i32 %847 = bitcast float %225 to i32 %848 = insertelement <4 x i32> undef, i32 %845, i32 0 %849 = insertelement <4 x i32> %848, i32 %846, i32 1 %850 = insertelement <4 x i32> %849, i32 %847, i32 2 %851 = bitcast <8 x i32> %71 to <32 x i8> %852 = bitcast <4 x i32> %73 to <16 x i8> %853 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %850, <32 x i8> %851, <16 x i8> %852, i32 2) %854 = extractelement <4 x float> %853, i32 0 %855 = extractelement <4 x float> %853, i32 1 %856 = extractelement <4 x float> %853, i32 2 %857 = fcmp oeq float %temp46.0, 3.000000e+00 %858 = select i1 %857, float 1.000000e+00, float 0.000000e+00 %859 = bitcast float %828 to i32 %860 = bitcast float %830 to i32 %861 = bitcast float %225 to i32 %862 = insertelement <4 x i32> undef, i32 %859, i32 0 %863 = insertelement <4 x i32> %862, i32 %860, i32 1 %864 = insertelement <4 x i32> %863, i32 %861, i32 2 %865 = bitcast <8 x i32> %63 to <32 x i8> %866 = bitcast <4 x i32> %65 to <16 x i8> %867 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %864, <32 x i8> %865, <16 x i8> %866, i32 2) %868 = extractelement <4 x float> %867, i32 0 %869 = extractelement <4 x float> %867, i32 1 %870 = extractelement <4 x float> %867, i32 2 %871 = fcmp oeq float %temp46.0, 2.000000e+00 %872 = select i1 %871, float 1.000000e+00, float 0.000000e+00 %873 = bitcast float %828 to i32 %874 = bitcast float %830 to i32 %875 = bitcast float %225 to i32 %876 = insertelement <4 x i32> undef, i32 %873, i32 0 %877 = insertelement <4 x i32> %876, i32 %874, i32 1 %878 = insertelement <4 x i32> %877, i32 %875, i32 2 %879 = bitcast <8 x i32> %55 to <32 x i8> %880 = bitcast <4 x i32> %57 to <16 x i8> %881 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %878, <32 x i8> %879, <16 x i8> %880, i32 2) %882 = extractelement <4 x float> %881, i32 0 %883 = extractelement <4 x float> %881, i32 1 %884 = extractelement <4 x float> %881, i32 2 %885 = fcmp oeq float %temp46.0, 1.000000e+00 %886 = select i1 %885, float 1.000000e+00, float 0.000000e+00 %887 = bitcast float %828 to i32 %888 = bitcast float %830 to i32 %889 = bitcast float %225 to i32 %890 = insertelement <4 x i32> undef, i32 %887, i32 0 %891 = insertelement <4 x i32> %890, i32 %888, i32 1 %892 = insertelement <4 x i32> %891, i32 %889, i32 2 %893 = bitcast <8 x i32> %47 to <32 x i8> %894 = bitcast <4 x i32> %49 to <16 x i8> %895 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %892, <32 x i8> %893, <16 x i8> %894, i32 2) %896 = extractelement <4 x float> %895, i32 0 %897 = extractelement <4 x float> %895, i32 1 %898 = extractelement <4 x float> %895, i32 2 %899 = fcmp oeq float %temp46.0, 0.000000e+00 %900 = select i1 %899, float 1.000000e+00, float 0.000000e+00 %901 = fmul float %896, %900 %902 = fmul float %897, %900 %903 = fmul float %898, %900 %904 = fmul float %882, %886 %905 = fadd float %904, %901 %906 = fmul float %883, %886 %907 = fadd float %906, %902 %908 = fmul float %884, %886 %909 = fadd float %908, %903 %910 = fmul float %868, %872 %911 = fadd float %910, %905 %912 = fmul float %869, %872 %913 = fadd float %912, %907 %914 = fmul float %870, %872 %915 = fadd float %914, %909 %916 = fmul float %854, %858 %917 = fadd float %916, %911 %918 = fmul float %855, %858 %919 = fadd float %918, %913 %920 = fmul float %856, %858 %921 = fadd float %920, %915 %922 = fmul float %840, %844 %923 = fadd float %922, %917 %924 = fmul float %841, %844 %925 = fadd float %924, %919 %926 = fmul float %842, %844 %927 = fadd float %926, %921 %928 = fcmp une float %33, %temp32.0 %.sink222 = select i1 %928, float %36, float %35 %temp76.0 = select i1 %928, float 1.953125e-03, float 3.906250e-03 %929 = fdiv float 1.000000e+00, %.sink222 %930 = fmul float %97, %929 %931 = fmul float %98, %929 %932 = call float @llvm.floor.f32(float %930) %933 = fsub float %930, %932 %934 = call float @llvm.floor.f32(float %931) %935 = fsub float %931, %934 %936 = fmul float %37, 2.000000e+00 %937 = fmul float %936, %temp76.0 %938 = fsub float 1.000000e+00, %937 %939 = fmul float %temp76.0, %37 %940 = fmul float %933, %938 %941 = fadd float %940, %939 %942 = fmul float %935, %938 %943 = fadd float %942, %939 %944 = fmul float %941, %temp32.0 %945 = fadd float %944, %temp20.0 %946 = fmul float %943, %temp32.0 %947 = fadd float %946, %temp21.0 %948 = bitcast float %945 to i32 %949 = bitcast float %947 to i32 %950 = bitcast float %225 to i32 %951 = insertelement <4 x i32> undef, i32 %948, i32 0 %952 = insertelement <4 x i32> %951, i32 %949, i32 1 %953 = insertelement <4 x i32> %952, i32 %950, i32 2 %954 = bitcast <8 x i32> %79 to <32 x i8> %955 = bitcast <4 x i32> %81 to <16 x i8> %956 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %953, <32 x i8> %954, <16 x i8> %955, i32 2) %957 = extractelement <4 x float> %956, i32 0 %958 = extractelement <4 x float> %956, i32 1 %959 = extractelement <4 x float> %956, i32 2 %960 = fcmp oeq float %temp22.0, 4.000000e+00 %961 = select i1 %960, float 1.000000e+00, float 0.000000e+00 %962 = bitcast float %945 to i32 %963 = bitcast float %947 to i32 %964 = bitcast float %225 to i32 %965 = insertelement <4 x i32> undef, i32 %962, i32 0 %966 = insertelement <4 x i32> %965, i32 %963, i32 1 %967 = insertelement <4 x i32> %966, i32 %964, i32 2 %968 = bitcast <8 x i32> %71 to <32 x i8> %969 = bitcast <4 x i32> %73 to <16 x i8> %970 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %967, <32 x i8> %968, <16 x i8> %969, i32 2) %971 = extractelement <4 x float> %970, i32 0 %972 = extractelement <4 x float> %970, i32 1 %973 = extractelement <4 x float> %970, i32 2 %974 = fcmp oeq float %temp22.0, 3.000000e+00 %975 = select i1 %974, float 1.000000e+00, float 0.000000e+00 %976 = bitcast float %945 to i32 %977 = bitcast float %947 to i32 %978 = bitcast float %225 to i32 %979 = insertelement <4 x i32> undef, i32 %976, i32 0 %980 = insertelement <4 x i32> %979, i32 %977, i32 1 %981 = insertelement <4 x i32> %980, i32 %978, i32 2 %982 = bitcast <8 x i32> %63 to <32 x i8> %983 = bitcast <4 x i32> %65 to <16 x i8> %984 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %981, <32 x i8> %982, <16 x i8> %983, i32 2) %985 = extractelement <4 x float> %984, i32 0 %986 = extractelement <4 x float> %984, i32 1 %987 = extractelement <4 x float> %984, i32 2 %988 = fcmp oeq float %temp22.0, 2.000000e+00 %989 = select i1 %988, float 1.000000e+00, float 0.000000e+00 %990 = bitcast float %945 to i32 %991 = bitcast float %947 to i32 %992 = bitcast float %225 to i32 %993 = insertelement <4 x i32> undef, i32 %990, i32 0 %994 = insertelement <4 x i32> %993, i32 %991, i32 1 %995 = insertelement <4 x i32> %994, i32 %992, i32 2 %996 = bitcast <8 x i32> %55 to <32 x i8> %997 = bitcast <4 x i32> %57 to <16 x i8> %998 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %995, <32 x i8> %996, <16 x i8> %997, i32 2) %999 = extractelement <4 x float> %998, i32 0 %1000 = extractelement <4 x float> %998, i32 1 %1001 = extractelement <4 x float> %998, i32 2 %1002 = fcmp oeq float %temp22.0, 1.000000e+00 %1003 = select i1 %1002, float 1.000000e+00, float 0.000000e+00 %1004 = bitcast float %945 to i32 %1005 = bitcast float %947 to i32 %1006 = bitcast float %225 to i32 %1007 = insertelement <4 x i32> undef, i32 %1004, i32 0 %1008 = insertelement <4 x i32> %1007, i32 %1005, i32 1 %1009 = insertelement <4 x i32> %1008, i32 %1006, i32 2 %1010 = bitcast <8 x i32> %47 to <32 x i8> %1011 = bitcast <4 x i32> %49 to <16 x i8> %1012 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1009, <32 x i8> %1010, <16 x i8> %1011, i32 2) %1013 = extractelement <4 x float> %1012, i32 0 %1014 = extractelement <4 x float> %1012, i32 1 %1015 = extractelement <4 x float> %1012, i32 2 %1016 = fcmp oeq float %temp22.0, 0.000000e+00 %1017 = select i1 %1016, float 1.000000e+00, float 0.000000e+00 %1018 = fmul float %1013, %1017 %1019 = fmul float %1014, %1017 %1020 = fmul float %1015, %1017 %1021 = fmul float %999, %1003 %1022 = fadd float %1021, %1018 %1023 = fmul float %1000, %1003 %1024 = fadd float %1023, %1019 %1025 = fmul float %1001, %1003 %1026 = fadd float %1025, %1020 %1027 = fmul float %985, %989 %1028 = fadd float %1027, %1022 %1029 = fmul float %986, %989 %1030 = fadd float %1029, %1024 %1031 = fmul float %987, %989 %1032 = fadd float %1031, %1026 %1033 = fmul float %971, %975 %1034 = fadd float %1033, %1028 %1035 = fmul float %972, %975 %1036 = fadd float %1035, %1030 %1037 = fmul float %973, %975 %1038 = fadd float %1037, %1032 %1039 = fmul float %957, %961 %1040 = fadd float %1039, %1034 %1041 = fmul float %958, %961 %1042 = fadd float %1041, %1036 %1043 = fmul float %959, %961 %1044 = fadd float %1043, %1038 %1045 = fcmp une float %33, %temp32.0 %.sink223 = select i1 %1045, float %36, float %35 %temp80.0 = select i1 %1045, float 1.953125e-03, float 3.906250e-03 %1046 = fdiv float 1.000000e+00, %.sink223 %1047 = fmul float %99, %1046 %1048 = fmul float %98, %1046 %1049 = call float @llvm.floor.f32(float %1047) %1050 = fsub float %1047, %1049 %1051 = call float @llvm.floor.f32(float %1048) %1052 = fsub float %1048, %1051 %1053 = fmul float %37, 2.000000e+00 %1054 = fmul float %1053, %temp80.0 %1055 = fsub float 1.000000e+00, %1054 %1056 = fmul float %temp80.0, %37 %1057 = fmul float %1050, %1055 %1058 = fadd float %1057, %1056 %1059 = fmul float %1052, %1055 %1060 = fadd float %1059, %1056 %1061 = fmul float %1058, %temp32.0 %1062 = fadd float %1061, %temp20.0 %1063 = fmul float %1060, %temp32.0 %1064 = fadd float %1063, %temp21.0 %1065 = bitcast float %1062 to i32 %1066 = bitcast float %1064 to i32 %1067 = bitcast float %225 to i32 %1068 = insertelement <4 x i32> undef, i32 %1065, i32 0 %1069 = insertelement <4 x i32> %1068, i32 %1066, i32 1 %1070 = insertelement <4 x i32> %1069, i32 %1067, i32 2 %1071 = bitcast <8 x i32> %79 to <32 x i8> %1072 = bitcast <4 x i32> %81 to <16 x i8> %1073 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1070, <32 x i8> %1071, <16 x i8> %1072, i32 2) %1074 = extractelement <4 x float> %1073, i32 0 %1075 = extractelement <4 x float> %1073, i32 1 %1076 = extractelement <4 x float> %1073, i32 2 %1077 = fcmp oeq float %temp22.0, 4.000000e+00 %1078 = select i1 %1077, float 1.000000e+00, float 0.000000e+00 %1079 = bitcast float %1062 to i32 %1080 = bitcast float %1064 to i32 %1081 = bitcast float %225 to i32 %1082 = insertelement <4 x i32> undef, i32 %1079, i32 0 %1083 = insertelement <4 x i32> %1082, i32 %1080, i32 1 %1084 = insertelement <4 x i32> %1083, i32 %1081, i32 2 %1085 = bitcast <8 x i32> %71 to <32 x i8> %1086 = bitcast <4 x i32> %73 to <16 x i8> %1087 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1084, <32 x i8> %1085, <16 x i8> %1086, i32 2) %1088 = extractelement <4 x float> %1087, i32 0 %1089 = extractelement <4 x float> %1087, i32 1 %1090 = extractelement <4 x float> %1087, i32 2 %1091 = fcmp oeq float %temp22.0, 3.000000e+00 %1092 = select i1 %1091, float 1.000000e+00, float 0.000000e+00 %1093 = bitcast float %1062 to i32 %1094 = bitcast float %1064 to i32 %1095 = bitcast float %225 to i32 %1096 = insertelement <4 x i32> undef, i32 %1093, i32 0 %1097 = insertelement <4 x i32> %1096, i32 %1094, i32 1 %1098 = insertelement <4 x i32> %1097, i32 %1095, i32 2 %1099 = bitcast <8 x i32> %63 to <32 x i8> %1100 = bitcast <4 x i32> %65 to <16 x i8> %1101 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1098, <32 x i8> %1099, <16 x i8> %1100, i32 2) %1102 = extractelement <4 x float> %1101, i32 0 %1103 = extractelement <4 x float> %1101, i32 1 %1104 = extractelement <4 x float> %1101, i32 2 %1105 = fcmp oeq float %temp22.0, 2.000000e+00 %1106 = select i1 %1105, float 1.000000e+00, float 0.000000e+00 %1107 = bitcast float %1062 to i32 %1108 = bitcast float %1064 to i32 %1109 = bitcast float %225 to i32 %1110 = insertelement <4 x i32> undef, i32 %1107, i32 0 %1111 = insertelement <4 x i32> %1110, i32 %1108, i32 1 %1112 = insertelement <4 x i32> %1111, i32 %1109, i32 2 %1113 = bitcast <8 x i32> %55 to <32 x i8> %1114 = bitcast <4 x i32> %57 to <16 x i8> %1115 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1112, <32 x i8> %1113, <16 x i8> %1114, i32 2) %1116 = extractelement <4 x float> %1115, i32 0 %1117 = extractelement <4 x float> %1115, i32 1 %1118 = extractelement <4 x float> %1115, i32 2 %1119 = fcmp oeq float %temp22.0, 1.000000e+00 %1120 = select i1 %1119, float 1.000000e+00, float 0.000000e+00 %1121 = bitcast float %1062 to i32 %1122 = bitcast float %1064 to i32 %1123 = bitcast float %225 to i32 %1124 = insertelement <4 x i32> undef, i32 %1121, i32 0 %1125 = insertelement <4 x i32> %1124, i32 %1122, i32 1 %1126 = insertelement <4 x i32> %1125, i32 %1123, i32 2 %1127 = bitcast <8 x i32> %47 to <32 x i8> %1128 = bitcast <4 x i32> %49 to <16 x i8> %1129 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1126, <32 x i8> %1127, <16 x i8> %1128, i32 2) %1130 = extractelement <4 x float> %1129, i32 0 %1131 = extractelement <4 x float> %1129, i32 1 %1132 = extractelement <4 x float> %1129, i32 2 %1133 = fcmp oeq float %temp22.0, 0.000000e+00 %1134 = select i1 %1133, float 1.000000e+00, float 0.000000e+00 %1135 = fmul float %1130, %1134 %1136 = fmul float %1131, %1134 %1137 = fmul float %1132, %1134 %1138 = fmul float %1116, %1120 %1139 = fadd float %1138, %1135 %1140 = fmul float %1117, %1120 %1141 = fadd float %1140, %1136 %1142 = fmul float %1118, %1120 %1143 = fadd float %1142, %1137 %1144 = fmul float %1102, %1106 %1145 = fadd float %1144, %1139 %1146 = fmul float %1103, %1106 %1147 = fadd float %1146, %1141 %1148 = fmul float %1104, %1106 %1149 = fadd float %1148, %1143 %1150 = fmul float %1088, %1092 %1151 = fadd float %1150, %1145 %1152 = fmul float %1089, %1092 %1153 = fadd float %1152, %1147 %1154 = fmul float %1090, %1092 %1155 = fadd float %1154, %1149 %1156 = fmul float %1074, %1078 %1157 = fadd float %1156, %1151 %1158 = fmul float %1075, %1078 %1159 = fadd float %1158, %1153 %1160 = fmul float %1076, %1078 %1161 = fadd float %1160, %1155 %1162 = fcmp une float %33, %temp32.0 %.sink224 = select i1 %1162, float %36, float %35 %temp84.0 = select i1 %1162, float 1.953125e-03, float 3.906250e-03 %1163 = fdiv float 1.000000e+00, %.sink224 %1164 = fmul float %99, %1163 %1165 = fmul float %97, %1163 %1166 = call float @llvm.floor.f32(float %1164) %1167 = fsub float %1164, %1166 %1168 = call float @llvm.floor.f32(float %1165) %1169 = fsub float %1165, %1168 %1170 = fmul float %37, 2.000000e+00 %1171 = fmul float %1170, %temp84.0 %1172 = fsub float 1.000000e+00, %1171 %1173 = fmul float %temp84.0, %37 %1174 = fmul float %1167, %1172 %1175 = fadd float %1174, %1173 %1176 = fmul float %1169, %1172 %1177 = fadd float %1176, %1173 %1178 = fmul float %1175, %temp32.0 %1179 = fadd float %1178, %temp20.0 %1180 = fmul float %1177, %temp32.0 %1181 = fadd float %1180, %temp21.0 %1182 = bitcast float %1179 to i32 %1183 = bitcast float %1181 to i32 %1184 = bitcast float %225 to i32 %1185 = insertelement <4 x i32> undef, i32 %1182, i32 0 %1186 = insertelement <4 x i32> %1185, i32 %1183, i32 1 %1187 = insertelement <4 x i32> %1186, i32 %1184, i32 2 %1188 = bitcast <8 x i32> %79 to <32 x i8> %1189 = bitcast <4 x i32> %81 to <16 x i8> %1190 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1187, <32 x i8> %1188, <16 x i8> %1189, i32 2) %1191 = extractelement <4 x float> %1190, i32 0 %1192 = extractelement <4 x float> %1190, i32 1 %1193 = extractelement <4 x float> %1190, i32 2 %1194 = fcmp oeq float %temp22.0, 4.000000e+00 %1195 = select i1 %1194, float 1.000000e+00, float 0.000000e+00 %1196 = bitcast float %1179 to i32 %1197 = bitcast float %1181 to i32 %1198 = bitcast float %225 to i32 %1199 = insertelement <4 x i32> undef, i32 %1196, i32 0 %1200 = insertelement <4 x i32> %1199, i32 %1197, i32 1 %1201 = insertelement <4 x i32> %1200, i32 %1198, i32 2 %1202 = bitcast <8 x i32> %71 to <32 x i8> %1203 = bitcast <4 x i32> %73 to <16 x i8> %1204 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1201, <32 x i8> %1202, <16 x i8> %1203, i32 2) %1205 = extractelement <4 x float> %1204, i32 0 %1206 = extractelement <4 x float> %1204, i32 1 %1207 = extractelement <4 x float> %1204, i32 2 %1208 = fcmp oeq float %temp22.0, 3.000000e+00 %1209 = select i1 %1208, float 1.000000e+00, float 0.000000e+00 %1210 = bitcast float %1179 to i32 %1211 = bitcast float %1181 to i32 %1212 = bitcast float %225 to i32 %1213 = insertelement <4 x i32> undef, i32 %1210, i32 0 %1214 = insertelement <4 x i32> %1213, i32 %1211, i32 1 %1215 = insertelement <4 x i32> %1214, i32 %1212, i32 2 %1216 = bitcast <8 x i32> %63 to <32 x i8> %1217 = bitcast <4 x i32> %65 to <16 x i8> %1218 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1215, <32 x i8> %1216, <16 x i8> %1217, i32 2) %1219 = extractelement <4 x float> %1218, i32 0 %1220 = extractelement <4 x float> %1218, i32 1 %1221 = extractelement <4 x float> %1218, i32 2 %1222 = fcmp oeq float %temp22.0, 2.000000e+00 %1223 = select i1 %1222, float 1.000000e+00, float 0.000000e+00 %1224 = bitcast float %1179 to i32 %1225 = bitcast float %1181 to i32 %1226 = bitcast float %225 to i32 %1227 = insertelement <4 x i32> undef, i32 %1224, i32 0 %1228 = insertelement <4 x i32> %1227, i32 %1225, i32 1 %1229 = insertelement <4 x i32> %1228, i32 %1226, i32 2 %1230 = bitcast <8 x i32> %55 to <32 x i8> %1231 = bitcast <4 x i32> %57 to <16 x i8> %1232 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1229, <32 x i8> %1230, <16 x i8> %1231, i32 2) %1233 = extractelement <4 x float> %1232, i32 0 %1234 = extractelement <4 x float> %1232, i32 1 %1235 = extractelement <4 x float> %1232, i32 2 %1236 = fcmp oeq float %temp22.0, 1.000000e+00 %1237 = select i1 %1236, float 1.000000e+00, float 0.000000e+00 %1238 = bitcast float %1179 to i32 %1239 = bitcast float %1181 to i32 %1240 = bitcast float %225 to i32 %1241 = insertelement <4 x i32> undef, i32 %1238, i32 0 %1242 = insertelement <4 x i32> %1241, i32 %1239, i32 1 %1243 = insertelement <4 x i32> %1242, i32 %1240, i32 2 %1244 = bitcast <8 x i32> %47 to <32 x i8> %1245 = bitcast <4 x i32> %49 to <16 x i8> %1246 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1243, <32 x i8> %1244, <16 x i8> %1245, i32 2) %1247 = extractelement <4 x float> %1246, i32 0 %1248 = extractelement <4 x float> %1246, i32 1 %1249 = extractelement <4 x float> %1246, i32 2 %1250 = fcmp oeq float %temp22.0, 0.000000e+00 %1251 = select i1 %1250, float 1.000000e+00, float 0.000000e+00 %1252 = fmul float %1247, %1251 %1253 = fmul float %1248, %1251 %1254 = fmul float %1249, %1251 %1255 = fmul float %1233, %1237 %1256 = fadd float %1255, %1252 %1257 = fmul float %1234, %1237 %1258 = fadd float %1257, %1253 %1259 = fmul float %1235, %1237 %1260 = fadd float %1259, %1254 %1261 = fmul float %1219, %1223 %1262 = fadd float %1261, %1256 %1263 = fmul float %1220, %1223 %1264 = fadd float %1263, %1258 %1265 = fmul float %1221, %1223 %1266 = fadd float %1265, %1260 %1267 = fmul float %1205, %1209 %1268 = fadd float %1267, %1262 %1269 = fmul float %1206, %1209 %1270 = fadd float %1269, %1264 %1271 = fmul float %1207, %1209 %1272 = fadd float %1271, %1266 %1273 = fmul float %1191, %1195 %1274 = fadd float %1273, %1268 %1275 = fmul float %1192, %1195 %1276 = fadd float %1275, %1270 %1277 = fmul float %1193, %1195 %1278 = fadd float %1277, %1272 %1279 = fmul float %1040, %150 %1280 = fmul float %1042, %150 %1281 = fmul float %1044, %150 %1282 = fmul float %1157, %148 %1283 = fadd float %1282, %1279 %1284 = fmul float %1159, %148 %1285 = fadd float %1284, %1280 %1286 = fmul float %1161, %148 %1287 = fadd float %1286, %1281 %1288 = fmul float %1274, %149 %1289 = fadd float %1288, %1283 %1290 = fmul float %1276, %149 %1291 = fadd float %1290, %1285 %1292 = fmul float %1278, %149 %1293 = fadd float %1292, %1287 %1294 = fmul float %689, %150 %1295 = fmul float %691, %150 %1296 = fmul float %693, %150 %1297 = fmul float %806, %148 %1298 = fadd float %1297, %1294 %1299 = fmul float %808, %148 %1300 = fadd float %1299, %1295 %1301 = fmul float %810, %148 %1302 = fadd float %1301, %1296 %1303 = fmul float %923, %149 %1304 = fadd float %1303, %1298 %1305 = fmul float %925, %149 %1306 = fadd float %1305, %1300 %1307 = fmul float %927, %149 %1308 = fadd float %1307, %1302 %1309 = fmul float %338, %150 %1310 = fmul float %340, %150 %1311 = fmul float %342, %150 %1312 = fmul float %455, %148 %1313 = fadd float %1312, %1309 %1314 = fmul float %457, %148 %1315 = fadd float %1314, %1310 %1316 = fmul float %459, %148 %1317 = fadd float %1316, %1311 %1318 = fmul float %572, %149 %1319 = fadd float %1318, %1313 %1320 = fmul float %574, %149 %1321 = fadd float %1320, %1315 %1322 = fmul float %576, %149 %1323 = fadd float %1322, %1317 %1324 = fmul float %90, %1319 %1325 = fmul float %90, %1321 %1326 = fmul float %90, %1323 %1327 = fmul float %91, %1304 %1328 = fadd float %1327, %1324 %1329 = fmul float %91, %1306 %1330 = fadd float %1329, %1325 %1331 = fmul float %91, %1308 %1332 = fadd float %1331, %1326 %1333 = fmul float %92, %1289 %1334 = fadd float %1333, %1328 %1335 = fmul float %92, %1291 %1336 = fadd float %1335, %1330 %1337 = fmul float %92, %1293 %1338 = fadd float %1337, %1332 %1339 = fcmp une float %33, %temp24.0 %.sink225 = select i1 %1339, float %36, float %35 %temp56.2 = select i1 %1339, float 1.953125e-03, float 3.906250e-03 %1340 = fdiv float 1.000000e+00, %.sink225 %1341 = fmul float %99, %1340 %1342 = fmul float %98, %1340 %1343 = call float @llvm.floor.f32(float %1341) %1344 = fsub float %1341, %1343 %1345 = call float @llvm.floor.f32(float %1342) %1346 = fsub float %1342, %1345 %1347 = fmul float %37, 2.000000e+00 %1348 = fmul float %1347, %temp56.2 %1349 = fsub float 1.000000e+00, %1348 %1350 = fmul float %temp56.2, %37 %1351 = fmul float %1344, %1349 %1352 = fadd float %1351, %1350 %1353 = fmul float %1346, %1349 %1354 = fadd float %1353, %1350 %1355 = fmul float %1352, %temp24.0 %1356 = fadd float %1355, %temp36.0 %1357 = fmul float %1354, %temp24.0 %1358 = fadd float %1357, %temp37.0 %1359 = bitcast float %1356 to i32 %1360 = bitcast float %1358 to i32 %1361 = bitcast float %225 to i32 %1362 = insertelement <4 x i32> undef, i32 %1359, i32 0 %1363 = insertelement <4 x i32> %1362, i32 %1360, i32 1 %1364 = insertelement <4 x i32> %1363, i32 %1361, i32 2 %1365 = bitcast <8 x i32> %83 to <32 x i8> %1366 = bitcast <4 x i32> %85 to <16 x i8> %1367 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1364, <32 x i8> %1365, <16 x i8> %1366, i32 2) %1368 = extractelement <4 x float> %1367, i32 1 %1369 = extractelement <4 x float> %1367, i32 3 %1370 = fcmp oeq float %temp38.0, 4.000000e+00 %1371 = select i1 %1370, float 1.000000e+00, float 0.000000e+00 %1372 = bitcast float %1356 to i32 %1373 = bitcast float %1358 to i32 %1374 = bitcast float %225 to i32 %1375 = insertelement <4 x i32> undef, i32 %1372, i32 0 %1376 = insertelement <4 x i32> %1375, i32 %1373, i32 1 %1377 = insertelement <4 x i32> %1376, i32 %1374, i32 2 %1378 = bitcast <8 x i32> %75 to <32 x i8> %1379 = bitcast <4 x i32> %77 to <16 x i8> %1380 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1377, <32 x i8> %1378, <16 x i8> %1379, i32 2) %1381 = extractelement <4 x float> %1380, i32 1 %1382 = extractelement <4 x float> %1380, i32 3 %1383 = fcmp oeq float %temp38.0, 3.000000e+00 %1384 = select i1 %1383, float 1.000000e+00, float 0.000000e+00 %1385 = bitcast float %1356 to i32 %1386 = bitcast float %1358 to i32 %1387 = bitcast float %225 to i32 %1388 = insertelement <4 x i32> undef, i32 %1385, i32 0 %1389 = insertelement <4 x i32> %1388, i32 %1386, i32 1 %1390 = insertelement <4 x i32> %1389, i32 %1387, i32 2 %1391 = bitcast <8 x i32> %67 to <32 x i8> %1392 = bitcast <4 x i32> %69 to <16 x i8> %1393 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1390, <32 x i8> %1391, <16 x i8> %1392, i32 2) %1394 = extractelement <4 x float> %1393, i32 1 %1395 = extractelement <4 x float> %1393, i32 3 %1396 = fcmp oeq float %temp38.0, 2.000000e+00 %1397 = select i1 %1396, float 1.000000e+00, float 0.000000e+00 %1398 = bitcast float %1356 to i32 %1399 = bitcast float %1358 to i32 %1400 = bitcast float %225 to i32 %1401 = insertelement <4 x i32> undef, i32 %1398, i32 0 %1402 = insertelement <4 x i32> %1401, i32 %1399, i32 1 %1403 = insertelement <4 x i32> %1402, i32 %1400, i32 2 %1404 = bitcast <8 x i32> %59 to <32 x i8> %1405 = bitcast <4 x i32> %61 to <16 x i8> %1406 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1403, <32 x i8> %1404, <16 x i8> %1405, i32 2) %1407 = extractelement <4 x float> %1406, i32 1 %1408 = extractelement <4 x float> %1406, i32 3 %1409 = fcmp oeq float %temp38.0, 1.000000e+00 %1410 = select i1 %1409, float 1.000000e+00, float 0.000000e+00 %1411 = bitcast float %1356 to i32 %1412 = bitcast float %1358 to i32 %1413 = bitcast float %225 to i32 %1414 = insertelement <4 x i32> undef, i32 %1411, i32 0 %1415 = insertelement <4 x i32> %1414, i32 %1412, i32 1 %1416 = insertelement <4 x i32> %1415, i32 %1413, i32 2 %1417 = bitcast <8 x i32> %51 to <32 x i8> %1418 = bitcast <4 x i32> %53 to <16 x i8> %1419 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1416, <32 x i8> %1417, <16 x i8> %1418, i32 2) %1420 = extractelement <4 x float> %1419, i32 1 %1421 = extractelement <4 x float> %1419, i32 3 %1422 = fcmp oeq float %temp38.0, 0.000000e+00 %1423 = select i1 %1422, float 1.000000e+00, float 0.000000e+00 %1424 = fmul float %1420, %1423 %1425 = fmul float %1421, %1423 %1426 = fmul float %1407, %1410 %1427 = fadd float %1426, %1424 %1428 = fmul float %1408, %1410 %1429 = fadd float %1428, %1425 %1430 = fmul float %1394, %1397 %1431 = fadd float %1430, %1427 %1432 = fmul float %1395, %1397 %1433 = fadd float %1432, %1429 %1434 = fmul float %1381, %1384 %1435 = fadd float %1434, %1431 %1436 = fmul float %1382, %1384 %1437 = fadd float %1436, %1433 %1438 = fmul float %1368, %1371 %1439 = fadd float %1438, %1435 %1440 = fmul float %1369, %1371 %1441 = fadd float %1440, %1437 %1442 = fmul float %1441, 2.000000e+00 %1443 = fadd float %1442, -1.000000e+00 %1444 = fmul float %1439, 2.000000e+00 %1445 = fadd float %1444, -1.000000e+00 %1446 = fmul float %1443, %1443 %1447 = fmul float %1445, %1445 %1448 = fadd float %1446, %1447 %1449 = call float @llvm.AMDIL.clamp.(float %1448, float 0.000000e+00, float 1.000000e+00) %1450 = fcmp une float %33, %temp24.0 %.sink226 = select i1 %1450, float %36, float %35 %temp60.2 = select i1 %1450, float 1.953125e-03, float 3.906250e-03 %1451 = fdiv float 1.000000e+00, %.sink226 %1452 = fmul float %99, %1451 %1453 = fmul float %97, %1451 %1454 = call float @llvm.floor.f32(float %1452) %1455 = fsub float %1452, %1454 %1456 = call float @llvm.floor.f32(float %1453) %1457 = fsub float %1453, %1456 %1458 = fmul float %37, 2.000000e+00 %1459 = fmul float %1458, %temp60.2 %1460 = fsub float 1.000000e+00, %1459 %1461 = fmul float %temp60.2, %37 %1462 = fmul float %1455, %1460 %1463 = fadd float %1462, %1461 %1464 = fmul float %1457, %1460 %1465 = fadd float %1464, %1461 %1466 = fmul float %1463, %temp24.0 %1467 = fadd float %1466, %temp36.0 %1468 = fmul float %1465, %temp24.0 %1469 = fadd float %1468, %temp37.0 %1470 = bitcast float %1467 to i32 %1471 = bitcast float %1469 to i32 %1472 = bitcast float %225 to i32 %1473 = insertelement <4 x i32> undef, i32 %1470, i32 0 %1474 = insertelement <4 x i32> %1473, i32 %1471, i32 1 %1475 = insertelement <4 x i32> %1474, i32 %1472, i32 2 %1476 = bitcast <8 x i32> %83 to <32 x i8> %1477 = bitcast <4 x i32> %85 to <16 x i8> %1478 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1475, <32 x i8> %1476, <16 x i8> %1477, i32 2) %1479 = extractelement <4 x float> %1478, i32 1 %1480 = extractelement <4 x float> %1478, i32 3 %1481 = fcmp oeq float %temp38.0, 4.000000e+00 %1482 = select i1 %1481, float 1.000000e+00, float 0.000000e+00 %1483 = bitcast float %1467 to i32 %1484 = bitcast float %1469 to i32 %1485 = bitcast float %225 to i32 %1486 = insertelement <4 x i32> undef, i32 %1483, i32 0 %1487 = insertelement <4 x i32> %1486, i32 %1484, i32 1 %1488 = insertelement <4 x i32> %1487, i32 %1485, i32 2 %1489 = bitcast <8 x i32> %75 to <32 x i8> %1490 = bitcast <4 x i32> %77 to <16 x i8> %1491 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1488, <32 x i8> %1489, <16 x i8> %1490, i32 2) %1492 = extractelement <4 x float> %1491, i32 1 %1493 = extractelement <4 x float> %1491, i32 3 %1494 = fcmp oeq float %temp38.0, 3.000000e+00 %1495 = select i1 %1494, float 1.000000e+00, float 0.000000e+00 %1496 = bitcast float %1467 to i32 %1497 = bitcast float %1469 to i32 %1498 = bitcast float %225 to i32 %1499 = insertelement <4 x i32> undef, i32 %1496, i32 0 %1500 = insertelement <4 x i32> %1499, i32 %1497, i32 1 %1501 = insertelement <4 x i32> %1500, i32 %1498, i32 2 %1502 = bitcast <8 x i32> %67 to <32 x i8> %1503 = bitcast <4 x i32> %69 to <16 x i8> %1504 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1501, <32 x i8> %1502, <16 x i8> %1503, i32 2) %1505 = extractelement <4 x float> %1504, i32 1 %1506 = extractelement <4 x float> %1504, i32 3 %1507 = fcmp oeq float %temp38.0, 2.000000e+00 %1508 = select i1 %1507, float 1.000000e+00, float 0.000000e+00 %1509 = bitcast float %1467 to i32 %1510 = bitcast float %1469 to i32 %1511 = bitcast float %225 to i32 %1512 = insertelement <4 x i32> undef, i32 %1509, i32 0 %1513 = insertelement <4 x i32> %1512, i32 %1510, i32 1 %1514 = insertelement <4 x i32> %1513, i32 %1511, i32 2 %1515 = bitcast <8 x i32> %59 to <32 x i8> %1516 = bitcast <4 x i32> %61 to <16 x i8> %1517 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1514, <32 x i8> %1515, <16 x i8> %1516, i32 2) %1518 = extractelement <4 x float> %1517, i32 1 %1519 = extractelement <4 x float> %1517, i32 3 %1520 = fcmp oeq float %temp38.0, 1.000000e+00 %1521 = select i1 %1520, float 1.000000e+00, float 0.000000e+00 %1522 = bitcast float %1467 to i32 %1523 = bitcast float %1469 to i32 %1524 = bitcast float %225 to i32 %1525 = insertelement <4 x i32> undef, i32 %1522, i32 0 %1526 = insertelement <4 x i32> %1525, i32 %1523, i32 1 %1527 = insertelement <4 x i32> %1526, i32 %1524, i32 2 %1528 = bitcast <8 x i32> %51 to <32 x i8> %1529 = bitcast <4 x i32> %53 to <16 x i8> %1530 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1527, <32 x i8> %1528, <16 x i8> %1529, i32 2) %1531 = extractelement <4 x float> %1530, i32 1 %1532 = extractelement <4 x float> %1530, i32 3 %1533 = fcmp oeq float %temp38.0, 0.000000e+00 %1534 = select i1 %1533, float 1.000000e+00, float 0.000000e+00 %1535 = fmul float %1531, %1534 %1536 = fmul float %1532, %1534 %1537 = fmul float %1518, %1521 %1538 = fadd float %1537, %1535 %1539 = fmul float %1519, %1521 %1540 = fadd float %1539, %1536 %1541 = fmul float %1505, %1508 %1542 = fadd float %1541, %1538 %1543 = fmul float %1506, %1508 %1544 = fadd float %1543, %1540 %1545 = fmul float %1492, %1495 %1546 = fadd float %1545, %1542 %1547 = fmul float %1493, %1495 %1548 = fadd float %1547, %1544 %1549 = fmul float %1479, %1482 %1550 = fadd float %1549, %1546 %1551 = fmul float %1480, %1482 %1552 = fadd float %1551, %1548 %1553 = fmul float %1552, 2.000000e+00 %1554 = fadd float %1553, -1.000000e+00 %1555 = fmul float %1550, 2.000000e+00 %1556 = fadd float %1555, -1.000000e+00 %1557 = fmul float %1554, %1554 %1558 = fmul float %1556, %1556 %1559 = fadd float %1557, %1558 %1560 = call float @llvm.AMDIL.clamp.(float %1559, float 0.000000e+00, float 1.000000e+00) %1561 = fcmp une float %33, %temp24.0 %.sink227 = select i1 %1561, float %36, float %35 %temp64.2 = select i1 %1561, float 1.953125e-03, float 3.906250e-03 %1562 = fdiv float 1.000000e+00, %.sink227 %1563 = fmul float %97, %1562 %1564 = fmul float %98, %1562 %1565 = call float @llvm.floor.f32(float %1563) %1566 = fsub float %1563, %1565 %1567 = call float @llvm.floor.f32(float %1564) %1568 = fsub float %1564, %1567 %1569 = fmul float %37, 2.000000e+00 %1570 = fmul float %1569, %temp64.2 %1571 = fsub float 1.000000e+00, %1570 %1572 = fmul float %temp64.2, %37 %1573 = fmul float %1566, %1571 %1574 = fadd float %1573, %1572 %1575 = fmul float %1568, %1571 %1576 = fadd float %1575, %1572 %1577 = fmul float %1574, %temp24.0 %1578 = fadd float %1577, %temp36.0 %1579 = fmul float %1576, %temp24.0 %1580 = fadd float %1579, %temp37.0 %1581 = bitcast float %1578 to i32 %1582 = bitcast float %1580 to i32 %1583 = bitcast float %225 to i32 %1584 = insertelement <4 x i32> undef, i32 %1581, i32 0 %1585 = insertelement <4 x i32> %1584, i32 %1582, i32 1 %1586 = insertelement <4 x i32> %1585, i32 %1583, i32 2 %1587 = bitcast <8 x i32> %83 to <32 x i8> %1588 = bitcast <4 x i32> %85 to <16 x i8> %1589 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1586, <32 x i8> %1587, <16 x i8> %1588, i32 2) %1590 = extractelement <4 x float> %1589, i32 1 %1591 = extractelement <4 x float> %1589, i32 3 %1592 = fcmp oeq float %temp38.0, 4.000000e+00 %1593 = select i1 %1592, float 1.000000e+00, float 0.000000e+00 %1594 = bitcast float %1578 to i32 %1595 = bitcast float %1580 to i32 %1596 = bitcast float %225 to i32 %1597 = insertelement <4 x i32> undef, i32 %1594, i32 0 %1598 = insertelement <4 x i32> %1597, i32 %1595, i32 1 %1599 = insertelement <4 x i32> %1598, i32 %1596, i32 2 %1600 = bitcast <8 x i32> %75 to <32 x i8> %1601 = bitcast <4 x i32> %77 to <16 x i8> %1602 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1599, <32 x i8> %1600, <16 x i8> %1601, i32 2) %1603 = extractelement <4 x float> %1602, i32 1 %1604 = extractelement <4 x float> %1602, i32 3 %1605 = fcmp oeq float %temp38.0, 3.000000e+00 %1606 = select i1 %1605, float 1.000000e+00, float 0.000000e+00 %1607 = bitcast float %1578 to i32 %1608 = bitcast float %1580 to i32 %1609 = bitcast float %225 to i32 %1610 = insertelement <4 x i32> undef, i32 %1607, i32 0 %1611 = insertelement <4 x i32> %1610, i32 %1608, i32 1 %1612 = insertelement <4 x i32> %1611, i32 %1609, i32 2 %1613 = bitcast <8 x i32> %67 to <32 x i8> %1614 = bitcast <4 x i32> %69 to <16 x i8> %1615 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1612, <32 x i8> %1613, <16 x i8> %1614, i32 2) %1616 = extractelement <4 x float> %1615, i32 1 %1617 = extractelement <4 x float> %1615, i32 3 %1618 = fcmp oeq float %temp38.0, 2.000000e+00 %1619 = select i1 %1618, float 1.000000e+00, float 0.000000e+00 %1620 = bitcast float %1578 to i32 %1621 = bitcast float %1580 to i32 %1622 = bitcast float %225 to i32 %1623 = insertelement <4 x i32> undef, i32 %1620, i32 0 %1624 = insertelement <4 x i32> %1623, i32 %1621, i32 1 %1625 = insertelement <4 x i32> %1624, i32 %1622, i32 2 %1626 = bitcast <8 x i32> %59 to <32 x i8> %1627 = bitcast <4 x i32> %61 to <16 x i8> %1628 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1625, <32 x i8> %1626, <16 x i8> %1627, i32 2) %1629 = extractelement <4 x float> %1628, i32 1 %1630 = extractelement <4 x float> %1628, i32 3 %1631 = fcmp oeq float %temp38.0, 1.000000e+00 %1632 = select i1 %1631, float 1.000000e+00, float 0.000000e+00 %1633 = bitcast float %1578 to i32 %1634 = bitcast float %1580 to i32 %1635 = bitcast float %225 to i32 %1636 = insertelement <4 x i32> undef, i32 %1633, i32 0 %1637 = insertelement <4 x i32> %1636, i32 %1634, i32 1 %1638 = insertelement <4 x i32> %1637, i32 %1635, i32 2 %1639 = bitcast <8 x i32> %51 to <32 x i8> %1640 = bitcast <4 x i32> %53 to <16 x i8> %1641 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1638, <32 x i8> %1639, <16 x i8> %1640, i32 2) %1642 = extractelement <4 x float> %1641, i32 1 %1643 = extractelement <4 x float> %1641, i32 3 %1644 = fcmp oeq float %temp38.0, 0.000000e+00 %1645 = select i1 %1644, float 1.000000e+00, float 0.000000e+00 %1646 = fmul float %1642, %1645 %1647 = fmul float %1643, %1645 %1648 = fmul float %1629, %1632 %1649 = fadd float %1648, %1646 %1650 = fmul float %1630, %1632 %1651 = fadd float %1650, %1647 %1652 = fmul float %1616, %1619 %1653 = fadd float %1652, %1649 %1654 = fmul float %1617, %1619 %1655 = fadd float %1654, %1651 %1656 = fmul float %1603, %1606 %1657 = fadd float %1656, %1653 %1658 = fmul float %1604, %1606 %1659 = fadd float %1658, %1655 %1660 = fmul float %1590, %1593 %1661 = fadd float %1660, %1657 %1662 = fmul float %1591, %1593 %1663 = fadd float %1662, %1659 %1664 = fmul float %1663, 2.000000e+00 %1665 = fadd float %1664, -1.000000e+00 %1666 = fmul float %1661, 2.000000e+00 %1667 = fadd float %1666, -1.000000e+00 %1668 = fmul float %1665, %1665 %1669 = fmul float %1667, %1667 %1670 = fadd float %1668, %1669 %1671 = call float @llvm.AMDIL.clamp.(float %1670, float 0.000000e+00, float 1.000000e+00) %1672 = fmul float %148, 0.000000e+00 %1673 = fmul float %1443, %148 %1674 = fmul float %1445, %148 %1675 = fmul float %1556, %149 %1676 = fadd float %1675, %1672 %1677 = fmul float %149, 0.000000e+00 %1678 = fadd float %1677, %1673 %1679 = fmul float %1554, %149 %1680 = fadd float %1679, %1674 %1681 = fmul float %1665, %150 %1682 = fadd float %1681, %1676 %1683 = fmul float %1667, %150 %1684 = fadd float %1683, %1678 %1685 = fmul float %150, 0.000000e+00 %1686 = fadd float %1685, %1680 %1687 = fcmp une float %33, %temp28.0 %.sink228 = select i1 %1687, float %36, float %35 %temp52.3 = select i1 %1687, float 1.953125e-03, float 3.906250e-03 %1688 = fdiv float 1.000000e+00, %.sink228 %1689 = fmul float %99, %1688 %1690 = fmul float %98, %1688 %1691 = call float @llvm.floor.f32(float %1689) %1692 = fsub float %1689, %1691 %1693 = call float @llvm.floor.f32(float %1690) %1694 = fsub float %1690, %1693 %1695 = fmul float %37, 2.000000e+00 %1696 = fmul float %1695, %temp52.3 %1697 = fsub float 1.000000e+00, %1696 %1698 = fmul float %temp52.3, %37 %1699 = fmul float %1692, %1697 %1700 = fadd float %1699, %1698 %1701 = fmul float %1694, %1697 %1702 = fadd float %1701, %1698 %1703 = fmul float %1700, %temp28.0 %1704 = fadd float %1703, %temp44.0 %1705 = fmul float %1702, %temp28.0 %1706 = fadd float %1705, %temp45.0 %1707 = bitcast float %1704 to i32 %1708 = bitcast float %1706 to i32 %1709 = bitcast float %225 to i32 %1710 = insertelement <4 x i32> undef, i32 %1707, i32 0 %1711 = insertelement <4 x i32> %1710, i32 %1708, i32 1 %1712 = insertelement <4 x i32> %1711, i32 %1709, i32 2 %1713 = bitcast <8 x i32> %83 to <32 x i8> %1714 = bitcast <4 x i32> %85 to <16 x i8> %1715 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1712, <32 x i8> %1713, <16 x i8> %1714, i32 2) %1716 = extractelement <4 x float> %1715, i32 1 %1717 = extractelement <4 x float> %1715, i32 3 %1718 = fcmp oeq float %temp46.0, 4.000000e+00 %1719 = select i1 %1718, float 1.000000e+00, float 0.000000e+00 %1720 = bitcast float %1704 to i32 %1721 = bitcast float %1706 to i32 %1722 = bitcast float %225 to i32 %1723 = insertelement <4 x i32> undef, i32 %1720, i32 0 %1724 = insertelement <4 x i32> %1723, i32 %1721, i32 1 %1725 = insertelement <4 x i32> %1724, i32 %1722, i32 2 %1726 = bitcast <8 x i32> %75 to <32 x i8> %1727 = bitcast <4 x i32> %77 to <16 x i8> %1728 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1725, <32 x i8> %1726, <16 x i8> %1727, i32 2) %1729 = extractelement <4 x float> %1728, i32 1 %1730 = extractelement <4 x float> %1728, i32 3 %1731 = fcmp oeq float %temp46.0, 3.000000e+00 %1732 = select i1 %1731, float 1.000000e+00, float 0.000000e+00 %1733 = bitcast float %1704 to i32 %1734 = bitcast float %1706 to i32 %1735 = bitcast float %225 to i32 %1736 = insertelement <4 x i32> undef, i32 %1733, i32 0 %1737 = insertelement <4 x i32> %1736, i32 %1734, i32 1 %1738 = insertelement <4 x i32> %1737, i32 %1735, i32 2 %1739 = bitcast <8 x i32> %67 to <32 x i8> %1740 = bitcast <4 x i32> %69 to <16 x i8> %1741 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1738, <32 x i8> %1739, <16 x i8> %1740, i32 2) %1742 = extractelement <4 x float> %1741, i32 1 %1743 = extractelement <4 x float> %1741, i32 3 %1744 = fcmp oeq float %temp46.0, 2.000000e+00 %1745 = select i1 %1744, float 1.000000e+00, float 0.000000e+00 %1746 = bitcast float %1704 to i32 %1747 = bitcast float %1706 to i32 %1748 = bitcast float %225 to i32 %1749 = insertelement <4 x i32> undef, i32 %1746, i32 0 %1750 = insertelement <4 x i32> %1749, i32 %1747, i32 1 %1751 = insertelement <4 x i32> %1750, i32 %1748, i32 2 %1752 = bitcast <8 x i32> %59 to <32 x i8> %1753 = bitcast <4 x i32> %61 to <16 x i8> %1754 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1751, <32 x i8> %1752, <16 x i8> %1753, i32 2) %1755 = extractelement <4 x float> %1754, i32 1 %1756 = extractelement <4 x float> %1754, i32 3 %1757 = fcmp oeq float %temp46.0, 1.000000e+00 %1758 = select i1 %1757, float 1.000000e+00, float 0.000000e+00 %1759 = bitcast float %1704 to i32 %1760 = bitcast float %1706 to i32 %1761 = bitcast float %225 to i32 %1762 = insertelement <4 x i32> undef, i32 %1759, i32 0 %1763 = insertelement <4 x i32> %1762, i32 %1760, i32 1 %1764 = insertelement <4 x i32> %1763, i32 %1761, i32 2 %1765 = bitcast <8 x i32> %51 to <32 x i8> %1766 = bitcast <4 x i32> %53 to <16 x i8> %1767 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1764, <32 x i8> %1765, <16 x i8> %1766, i32 2) %1768 = extractelement <4 x float> %1767, i32 1 %1769 = extractelement <4 x float> %1767, i32 3 %1770 = fcmp oeq float %temp46.0, 0.000000e+00 %1771 = select i1 %1770, float 1.000000e+00, float 0.000000e+00 %1772 = fmul float %1768, %1771 %1773 = fmul float %1769, %1771 %1774 = fmul float %1755, %1758 %1775 = fadd float %1774, %1772 %1776 = fmul float %1756, %1758 %1777 = fadd float %1776, %1773 %1778 = fmul float %1742, %1745 %1779 = fadd float %1778, %1775 %1780 = fmul float %1743, %1745 %1781 = fadd float %1780, %1777 %1782 = fmul float %1729, %1732 %1783 = fadd float %1782, %1779 %1784 = fmul float %1730, %1732 %1785 = fadd float %1784, %1781 %1786 = fmul float %1716, %1719 %1787 = fadd float %1786, %1783 %1788 = fmul float %1717, %1719 %1789 = fadd float %1788, %1785 %1790 = fmul float %1789, 2.000000e+00 %1791 = fadd float %1790, -1.000000e+00 %1792 = fmul float %1787, 2.000000e+00 %1793 = fadd float %1792, -1.000000e+00 %1794 = fmul float %1791, %1791 %1795 = fmul float %1793, %1793 %1796 = fadd float %1794, %1795 %1797 = call float @llvm.AMDIL.clamp.(float %1796, float 0.000000e+00, float 1.000000e+00) %1798 = fcmp une float %33, %temp28.0 %.sink229 = select i1 %1798, float %36, float %35 %temp56.4 = select i1 %1798, float 1.953125e-03, float 3.906250e-03 %1799 = fdiv float 1.000000e+00, %.sink229 %1800 = fmul float %99, %1799 %1801 = fmul float %97, %1799 %1802 = call float @llvm.floor.f32(float %1800) %1803 = fsub float %1800, %1802 %1804 = call float @llvm.floor.f32(float %1801) %1805 = fsub float %1801, %1804 %1806 = fmul float %37, 2.000000e+00 %1807 = fmul float %1806, %temp56.4 %1808 = fsub float 1.000000e+00, %1807 %1809 = fmul float %temp56.4, %37 %1810 = fmul float %1803, %1808 %1811 = fadd float %1810, %1809 %1812 = fmul float %1805, %1808 %1813 = fadd float %1812, %1809 %1814 = fmul float %1811, %temp28.0 %1815 = fadd float %1814, %temp44.0 %1816 = fmul float %1813, %temp28.0 %1817 = fadd float %1816, %temp45.0 %1818 = bitcast float %1815 to i32 %1819 = bitcast float %1817 to i32 %1820 = bitcast float %225 to i32 %1821 = insertelement <4 x i32> undef, i32 %1818, i32 0 %1822 = insertelement <4 x i32> %1821, i32 %1819, i32 1 %1823 = insertelement <4 x i32> %1822, i32 %1820, i32 2 %1824 = bitcast <8 x i32> %83 to <32 x i8> %1825 = bitcast <4 x i32> %85 to <16 x i8> %1826 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1823, <32 x i8> %1824, <16 x i8> %1825, i32 2) %1827 = extractelement <4 x float> %1826, i32 1 %1828 = extractelement <4 x float> %1826, i32 3 %1829 = fcmp oeq float %temp46.0, 4.000000e+00 %1830 = select i1 %1829, float 1.000000e+00, float 0.000000e+00 %1831 = bitcast float %1815 to i32 %1832 = bitcast float %1817 to i32 %1833 = bitcast float %225 to i32 %1834 = insertelement <4 x i32> undef, i32 %1831, i32 0 %1835 = insertelement <4 x i32> %1834, i32 %1832, i32 1 %1836 = insertelement <4 x i32> %1835, i32 %1833, i32 2 %1837 = bitcast <8 x i32> %75 to <32 x i8> %1838 = bitcast <4 x i32> %77 to <16 x i8> %1839 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1836, <32 x i8> %1837, <16 x i8> %1838, i32 2) %1840 = extractelement <4 x float> %1839, i32 1 %1841 = extractelement <4 x float> %1839, i32 3 %1842 = fcmp oeq float %temp46.0, 3.000000e+00 %1843 = select i1 %1842, float 1.000000e+00, float 0.000000e+00 %1844 = bitcast float %1815 to i32 %1845 = bitcast float %1817 to i32 %1846 = bitcast float %225 to i32 %1847 = insertelement <4 x i32> undef, i32 %1844, i32 0 %1848 = insertelement <4 x i32> %1847, i32 %1845, i32 1 %1849 = insertelement <4 x i32> %1848, i32 %1846, i32 2 %1850 = bitcast <8 x i32> %67 to <32 x i8> %1851 = bitcast <4 x i32> %69 to <16 x i8> %1852 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1849, <32 x i8> %1850, <16 x i8> %1851, i32 2) %1853 = extractelement <4 x float> %1852, i32 1 %1854 = extractelement <4 x float> %1852, i32 3 %1855 = fcmp oeq float %temp46.0, 2.000000e+00 %1856 = select i1 %1855, float 1.000000e+00, float 0.000000e+00 %1857 = bitcast float %1815 to i32 %1858 = bitcast float %1817 to i32 %1859 = bitcast float %225 to i32 %1860 = insertelement <4 x i32> undef, i32 %1857, i32 0 %1861 = insertelement <4 x i32> %1860, i32 %1858, i32 1 %1862 = insertelement <4 x i32> %1861, i32 %1859, i32 2 %1863 = bitcast <8 x i32> %59 to <32 x i8> %1864 = bitcast <4 x i32> %61 to <16 x i8> %1865 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1862, <32 x i8> %1863, <16 x i8> %1864, i32 2) %1866 = extractelement <4 x float> %1865, i32 1 %1867 = extractelement <4 x float> %1865, i32 3 %1868 = fcmp oeq float %temp46.0, 1.000000e+00 %1869 = select i1 %1868, float 1.000000e+00, float 0.000000e+00 %1870 = bitcast float %1815 to i32 %1871 = bitcast float %1817 to i32 %1872 = bitcast float %225 to i32 %1873 = insertelement <4 x i32> undef, i32 %1870, i32 0 %1874 = insertelement <4 x i32> %1873, i32 %1871, i32 1 %1875 = insertelement <4 x i32> %1874, i32 %1872, i32 2 %1876 = bitcast <8 x i32> %51 to <32 x i8> %1877 = bitcast <4 x i32> %53 to <16 x i8> %1878 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1875, <32 x i8> %1876, <16 x i8> %1877, i32 2) %1879 = extractelement <4 x float> %1878, i32 1 %1880 = extractelement <4 x float> %1878, i32 3 %1881 = fcmp oeq float %temp46.0, 0.000000e+00 %1882 = select i1 %1881, float 1.000000e+00, float 0.000000e+00 %1883 = fmul float %1879, %1882 %1884 = fmul float %1880, %1882 %1885 = fmul float %1866, %1869 %1886 = fadd float %1885, %1883 %1887 = fmul float %1867, %1869 %1888 = fadd float %1887, %1884 %1889 = fmul float %1853, %1856 %1890 = fadd float %1889, %1886 %1891 = fmul float %1854, %1856 %1892 = fadd float %1891, %1888 %1893 = fmul float %1840, %1843 %1894 = fadd float %1893, %1890 %1895 = fmul float %1841, %1843 %1896 = fadd float %1895, %1892 %1897 = fmul float %1827, %1830 %1898 = fadd float %1897, %1894 %1899 = fmul float %1828, %1830 %1900 = fadd float %1899, %1896 %1901 = fmul float %1900, 2.000000e+00 %1902 = fadd float %1901, -1.000000e+00 %1903 = fmul float %1898, 2.000000e+00 %1904 = fadd float %1903, -1.000000e+00 %1905 = fmul float %1902, %1902 %1906 = fmul float %1904, %1904 %1907 = fadd float %1905, %1906 %1908 = call float @llvm.AMDIL.clamp.(float %1907, float 0.000000e+00, float 1.000000e+00) %1909 = fcmp une float %33, %temp28.0 %.sink230 = select i1 %1909, float %36, float %35 %temp60.4 = select i1 %1909, float 1.953125e-03, float 3.906250e-03 %1910 = fdiv float 1.000000e+00, %.sink230 %1911 = fmul float %97, %1910 %1912 = fmul float %98, %1910 %1913 = call float @llvm.floor.f32(float %1911) %1914 = fsub float %1911, %1913 %1915 = call float @llvm.floor.f32(float %1912) %1916 = fsub float %1912, %1915 %1917 = fmul float %37, 2.000000e+00 %1918 = fmul float %1917, %temp60.4 %1919 = fsub float 1.000000e+00, %1918 %1920 = fmul float %temp60.4, %37 %1921 = fmul float %1914, %1919 %1922 = fadd float %1921, %1920 %1923 = fmul float %1916, %1919 %1924 = fadd float %1923, %1920 %1925 = fmul float %1922, %temp28.0 %1926 = fadd float %1925, %temp44.0 %1927 = fmul float %1924, %temp28.0 %1928 = fadd float %1927, %temp45.0 %1929 = bitcast float %1926 to i32 %1930 = bitcast float %1928 to i32 %1931 = bitcast float %225 to i32 %1932 = insertelement <4 x i32> undef, i32 %1929, i32 0 %1933 = insertelement <4 x i32> %1932, i32 %1930, i32 1 %1934 = insertelement <4 x i32> %1933, i32 %1931, i32 2 %1935 = bitcast <8 x i32> %83 to <32 x i8> %1936 = bitcast <4 x i32> %85 to <16 x i8> %1937 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1934, <32 x i8> %1935, <16 x i8> %1936, i32 2) %1938 = extractelement <4 x float> %1937, i32 1 %1939 = extractelement <4 x float> %1937, i32 3 %1940 = fcmp oeq float %temp46.0, 4.000000e+00 %1941 = select i1 %1940, float 1.000000e+00, float 0.000000e+00 %1942 = bitcast float %1926 to i32 %1943 = bitcast float %1928 to i32 %1944 = bitcast float %225 to i32 %1945 = insertelement <4 x i32> undef, i32 %1942, i32 0 %1946 = insertelement <4 x i32> %1945, i32 %1943, i32 1 %1947 = insertelement <4 x i32> %1946, i32 %1944, i32 2 %1948 = bitcast <8 x i32> %75 to <32 x i8> %1949 = bitcast <4 x i32> %77 to <16 x i8> %1950 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1947, <32 x i8> %1948, <16 x i8> %1949, i32 2) %1951 = extractelement <4 x float> %1950, i32 1 %1952 = extractelement <4 x float> %1950, i32 3 %1953 = fcmp oeq float %temp46.0, 3.000000e+00 %1954 = select i1 %1953, float 1.000000e+00, float 0.000000e+00 %1955 = bitcast float %1926 to i32 %1956 = bitcast float %1928 to i32 %1957 = bitcast float %225 to i32 %1958 = insertelement <4 x i32> undef, i32 %1955, i32 0 %1959 = insertelement <4 x i32> %1958, i32 %1956, i32 1 %1960 = insertelement <4 x i32> %1959, i32 %1957, i32 2 %1961 = bitcast <8 x i32> %67 to <32 x i8> %1962 = bitcast <4 x i32> %69 to <16 x i8> %1963 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1960, <32 x i8> %1961, <16 x i8> %1962, i32 2) %1964 = extractelement <4 x float> %1963, i32 1 %1965 = extractelement <4 x float> %1963, i32 3 %1966 = fcmp oeq float %temp46.0, 2.000000e+00 %1967 = select i1 %1966, float 1.000000e+00, float 0.000000e+00 %1968 = bitcast float %1926 to i32 %1969 = bitcast float %1928 to i32 %1970 = bitcast float %225 to i32 %1971 = insertelement <4 x i32> undef, i32 %1968, i32 0 %1972 = insertelement <4 x i32> %1971, i32 %1969, i32 1 %1973 = insertelement <4 x i32> %1972, i32 %1970, i32 2 %1974 = bitcast <8 x i32> %59 to <32 x i8> %1975 = bitcast <4 x i32> %61 to <16 x i8> %1976 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1973, <32 x i8> %1974, <16 x i8> %1975, i32 2) %1977 = extractelement <4 x float> %1976, i32 1 %1978 = extractelement <4 x float> %1976, i32 3 %1979 = fcmp oeq float %temp46.0, 1.000000e+00 %1980 = select i1 %1979, float 1.000000e+00, float 0.000000e+00 %1981 = bitcast float %1926 to i32 %1982 = bitcast float %1928 to i32 %1983 = bitcast float %225 to i32 %1984 = insertelement <4 x i32> undef, i32 %1981, i32 0 %1985 = insertelement <4 x i32> %1984, i32 %1982, i32 1 %1986 = insertelement <4 x i32> %1985, i32 %1983, i32 2 %1987 = bitcast <8 x i32> %51 to <32 x i8> %1988 = bitcast <4 x i32> %53 to <16 x i8> %1989 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1986, <32 x i8> %1987, <16 x i8> %1988, i32 2) %1990 = extractelement <4 x float> %1989, i32 1 %1991 = extractelement <4 x float> %1989, i32 3 %1992 = fcmp oeq float %temp46.0, 0.000000e+00 %1993 = select i1 %1992, float 1.000000e+00, float 0.000000e+00 %1994 = fmul float %1990, %1993 %1995 = fmul float %1991, %1993 %1996 = fmul float %1977, %1980 %1997 = fadd float %1996, %1994 %1998 = fmul float %1978, %1980 %1999 = fadd float %1998, %1995 %2000 = fmul float %1964, %1967 %2001 = fadd float %2000, %1997 %2002 = fmul float %1965, %1967 %2003 = fadd float %2002, %1999 %2004 = fmul float %1951, %1954 %2005 = fadd float %2004, %2001 %2006 = fmul float %1952, %1954 %2007 = fadd float %2006, %2003 %2008 = fmul float %1938, %1941 %2009 = fadd float %2008, %2005 %2010 = fmul float %1939, %1941 %2011 = fadd float %2010, %2007 %2012 = fmul float %2011, 2.000000e+00 %2013 = fadd float %2012, -1.000000e+00 %2014 = fmul float %2009, 2.000000e+00 %2015 = fadd float %2014, -1.000000e+00 %2016 = fmul float %2013, %2013 %2017 = fmul float %2015, %2015 %2018 = fadd float %2016, %2017 %2019 = call float @llvm.AMDIL.clamp.(float %2018, float 0.000000e+00, float 1.000000e+00) %2020 = fmul float %148, 0.000000e+00 %2021 = fmul float %1791, %148 %2022 = fmul float %1793, %148 %2023 = fmul float %1904, %149 %2024 = fadd float %2023, %2020 %2025 = fmul float %149, 0.000000e+00 %2026 = fadd float %2025, %2021 %2027 = fmul float %1902, %149 %2028 = fadd float %2027, %2022 %2029 = fmul float %2013, %150 %2030 = fadd float %2029, %2024 %2031 = fmul float %2015, %150 %2032 = fadd float %2031, %2026 %2033 = fmul float %150, 0.000000e+00 %2034 = fadd float %2033, %2028 %2035 = fcmp une float %33, %temp32.0 %.sink231 = select i1 %2035, float %36, float %35 %temp44.1 = select i1 %2035, float 1.953125e-03, float 3.906250e-03 %2036 = fdiv float 1.000000e+00, %.sink231 %2037 = fmul float %99, %2036 %2038 = fmul float %98, %2036 %2039 = call float @llvm.floor.f32(float %2037) %2040 = fsub float %2037, %2039 %2041 = call float @llvm.floor.f32(float %2038) %2042 = fsub float %2038, %2041 %2043 = fmul float %37, 2.000000e+00 %2044 = fmul float %2043, %temp44.1 %2045 = fsub float 1.000000e+00, %2044 %2046 = fmul float %temp44.1, %37 %2047 = fmul float %2040, %2045 %2048 = fadd float %2047, %2046 %2049 = fmul float %2042, %2045 %2050 = fadd float %2049, %2046 %2051 = fmul float %2048, %temp32.0 %2052 = fadd float %2051, %temp20.0 %2053 = fmul float %2050, %temp32.0 %2054 = fadd float %2053, %temp21.0 %2055 = bitcast float %2052 to i32 %2056 = bitcast float %2054 to i32 %2057 = bitcast float %225 to i32 %2058 = insertelement <4 x i32> undef, i32 %2055, i32 0 %2059 = insertelement <4 x i32> %2058, i32 %2056, i32 1 %2060 = insertelement <4 x i32> %2059, i32 %2057, i32 2 %2061 = bitcast <8 x i32> %83 to <32 x i8> %2062 = bitcast <4 x i32> %85 to <16 x i8> %2063 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2060, <32 x i8> %2061, <16 x i8> %2062, i32 2) %2064 = extractelement <4 x float> %2063, i32 1 %2065 = extractelement <4 x float> %2063, i32 3 %2066 = fcmp oeq float %temp22.0, 4.000000e+00 %2067 = select i1 %2066, float 1.000000e+00, float 0.000000e+00 %2068 = bitcast float %2052 to i32 %2069 = bitcast float %2054 to i32 %2070 = bitcast float %225 to i32 %2071 = insertelement <4 x i32> undef, i32 %2068, i32 0 %2072 = insertelement <4 x i32> %2071, i32 %2069, i32 1 %2073 = insertelement <4 x i32> %2072, i32 %2070, i32 2 %2074 = bitcast <8 x i32> %75 to <32 x i8> %2075 = bitcast <4 x i32> %77 to <16 x i8> %2076 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2073, <32 x i8> %2074, <16 x i8> %2075, i32 2) %2077 = extractelement <4 x float> %2076, i32 1 %2078 = extractelement <4 x float> %2076, i32 3 %2079 = fcmp oeq float %temp22.0, 3.000000e+00 %2080 = select i1 %2079, float 1.000000e+00, float 0.000000e+00 %2081 = bitcast float %2052 to i32 %2082 = bitcast float %2054 to i32 %2083 = bitcast float %225 to i32 %2084 = insertelement <4 x i32> undef, i32 %2081, i32 0 %2085 = insertelement <4 x i32> %2084, i32 %2082, i32 1 %2086 = insertelement <4 x i32> %2085, i32 %2083, i32 2 %2087 = bitcast <8 x i32> %67 to <32 x i8> %2088 = bitcast <4 x i32> %69 to <16 x i8> %2089 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2086, <32 x i8> %2087, <16 x i8> %2088, i32 2) %2090 = extractelement <4 x float> %2089, i32 1 %2091 = extractelement <4 x float> %2089, i32 3 %2092 = fcmp oeq float %temp22.0, 2.000000e+00 %2093 = select i1 %2092, float 1.000000e+00, float 0.000000e+00 %2094 = bitcast float %2052 to i32 %2095 = bitcast float %2054 to i32 %2096 = bitcast float %225 to i32 %2097 = insertelement <4 x i32> undef, i32 %2094, i32 0 %2098 = insertelement <4 x i32> %2097, i32 %2095, i32 1 %2099 = insertelement <4 x i32> %2098, i32 %2096, i32 2 %2100 = bitcast <8 x i32> %59 to <32 x i8> %2101 = bitcast <4 x i32> %61 to <16 x i8> %2102 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2099, <32 x i8> %2100, <16 x i8> %2101, i32 2) %2103 = extractelement <4 x float> %2102, i32 1 %2104 = extractelement <4 x float> %2102, i32 3 %2105 = fcmp oeq float %temp22.0, 1.000000e+00 %2106 = select i1 %2105, float 1.000000e+00, float 0.000000e+00 %2107 = bitcast float %2052 to i32 %2108 = bitcast float %2054 to i32 %2109 = bitcast float %225 to i32 %2110 = insertelement <4 x i32> undef, i32 %2107, i32 0 %2111 = insertelement <4 x i32> %2110, i32 %2108, i32 1 %2112 = insertelement <4 x i32> %2111, i32 %2109, i32 2 %2113 = bitcast <8 x i32> %51 to <32 x i8> %2114 = bitcast <4 x i32> %53 to <16 x i8> %2115 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2112, <32 x i8> %2113, <16 x i8> %2114, i32 2) %2116 = extractelement <4 x float> %2115, i32 1 %2117 = extractelement <4 x float> %2115, i32 3 %2118 = fcmp oeq float %temp22.0, 0.000000e+00 %2119 = select i1 %2118, float 1.000000e+00, float 0.000000e+00 %2120 = fmul float %2116, %2119 %2121 = fmul float %2117, %2119 %2122 = fmul float %2103, %2106 %2123 = fadd float %2122, %2120 %2124 = fmul float %2104, %2106 %2125 = fadd float %2124, %2121 %2126 = fmul float %2090, %2093 %2127 = fadd float %2126, %2123 %2128 = fmul float %2091, %2093 %2129 = fadd float %2128, %2125 %2130 = fmul float %2077, %2080 %2131 = fadd float %2130, %2127 %2132 = fmul float %2078, %2080 %2133 = fadd float %2132, %2129 %2134 = fmul float %2064, %2067 %2135 = fadd float %2134, %2131 %2136 = fmul float %2065, %2067 %2137 = fadd float %2136, %2133 %2138 = fmul float %2137, 2.000000e+00 %2139 = fadd float %2138, -1.000000e+00 %2140 = fmul float %2135, 2.000000e+00 %2141 = fadd float %2140, -1.000000e+00 %2142 = fmul float %2139, %2139 %2143 = fmul float %2141, %2141 %2144 = fadd float %2142, %2143 %2145 = call float @llvm.AMDIL.clamp.(float %2144, float 0.000000e+00, float 1.000000e+00) %2146 = fcmp une float %33, %temp32.0 %.sink232 = select i1 %2146, float %36, float %35 %temp52.5 = select i1 %2146, float 1.953125e-03, float 3.906250e-03 %2147 = fdiv float 1.000000e+00, %.sink232 %2148 = fmul float %99, %2147 %2149 = fmul float %97, %2147 %2150 = call float @llvm.floor.f32(float %2148) %2151 = fsub float %2148, %2150 %2152 = call float @llvm.floor.f32(float %2149) %2153 = fsub float %2149, %2152 %2154 = fmul float %37, 2.000000e+00 %2155 = fmul float %2154, %temp52.5 %2156 = fsub float 1.000000e+00, %2155 %2157 = fmul float %temp52.5, %37 %2158 = fmul float %2151, %2156 %2159 = fadd float %2158, %2157 %2160 = fmul float %2153, %2156 %2161 = fadd float %2160, %2157 %2162 = fmul float %2159, %temp32.0 %2163 = fadd float %2162, %temp20.0 %2164 = fmul float %2161, %temp32.0 %2165 = fadd float %2164, %temp21.0 %2166 = bitcast float %2163 to i32 %2167 = bitcast float %2165 to i32 %2168 = bitcast float %225 to i32 %2169 = insertelement <4 x i32> undef, i32 %2166, i32 0 %2170 = insertelement <4 x i32> %2169, i32 %2167, i32 1 %2171 = insertelement <4 x i32> %2170, i32 %2168, i32 2 %2172 = bitcast <8 x i32> %83 to <32 x i8> %2173 = bitcast <4 x i32> %85 to <16 x i8> %2174 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2171, <32 x i8> %2172, <16 x i8> %2173, i32 2) %2175 = extractelement <4 x float> %2174, i32 1 %2176 = extractelement <4 x float> %2174, i32 3 %2177 = fcmp oeq float %temp22.0, 4.000000e+00 %2178 = select i1 %2177, float 1.000000e+00, float 0.000000e+00 %2179 = bitcast float %2163 to i32 %2180 = bitcast float %2165 to i32 %2181 = bitcast float %225 to i32 %2182 = insertelement <4 x i32> undef, i32 %2179, i32 0 %2183 = insertelement <4 x i32> %2182, i32 %2180, i32 1 %2184 = insertelement <4 x i32> %2183, i32 %2181, i32 2 %2185 = bitcast <8 x i32> %75 to <32 x i8> %2186 = bitcast <4 x i32> %77 to <16 x i8> %2187 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2184, <32 x i8> %2185, <16 x i8> %2186, i32 2) %2188 = extractelement <4 x float> %2187, i32 1 %2189 = extractelement <4 x float> %2187, i32 3 %2190 = fcmp oeq float %temp22.0, 3.000000e+00 %2191 = select i1 %2190, float 1.000000e+00, float 0.000000e+00 %2192 = bitcast float %2163 to i32 %2193 = bitcast float %2165 to i32 %2194 = bitcast float %225 to i32 %2195 = insertelement <4 x i32> undef, i32 %2192, i32 0 %2196 = insertelement <4 x i32> %2195, i32 %2193, i32 1 %2197 = insertelement <4 x i32> %2196, i32 %2194, i32 2 %2198 = bitcast <8 x i32> %67 to <32 x i8> %2199 = bitcast <4 x i32> %69 to <16 x i8> %2200 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2197, <32 x i8> %2198, <16 x i8> %2199, i32 2) %2201 = extractelement <4 x float> %2200, i32 1 %2202 = extractelement <4 x float> %2200, i32 3 %2203 = fcmp oeq float %temp22.0, 2.000000e+00 %2204 = select i1 %2203, float 1.000000e+00, float 0.000000e+00 %2205 = bitcast float %2163 to i32 %2206 = bitcast float %2165 to i32 %2207 = bitcast float %225 to i32 %2208 = insertelement <4 x i32> undef, i32 %2205, i32 0 %2209 = insertelement <4 x i32> %2208, i32 %2206, i32 1 %2210 = insertelement <4 x i32> %2209, i32 %2207, i32 2 %2211 = bitcast <8 x i32> %59 to <32 x i8> %2212 = bitcast <4 x i32> %61 to <16 x i8> %2213 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2210, <32 x i8> %2211, <16 x i8> %2212, i32 2) %2214 = extractelement <4 x float> %2213, i32 1 %2215 = extractelement <4 x float> %2213, i32 3 %2216 = fcmp oeq float %temp22.0, 1.000000e+00 %2217 = select i1 %2216, float 1.000000e+00, float 0.000000e+00 %2218 = bitcast float %2163 to i32 %2219 = bitcast float %2165 to i32 %2220 = bitcast float %225 to i32 %2221 = insertelement <4 x i32> undef, i32 %2218, i32 0 %2222 = insertelement <4 x i32> %2221, i32 %2219, i32 1 %2223 = insertelement <4 x i32> %2222, i32 %2220, i32 2 %2224 = bitcast <8 x i32> %51 to <32 x i8> %2225 = bitcast <4 x i32> %53 to <16 x i8> %2226 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2223, <32 x i8> %2224, <16 x i8> %2225, i32 2) %2227 = extractelement <4 x float> %2226, i32 1 %2228 = extractelement <4 x float> %2226, i32 3 %2229 = fcmp oeq float %temp22.0, 0.000000e+00 %2230 = select i1 %2229, float 1.000000e+00, float 0.000000e+00 %2231 = fmul float %2227, %2230 %2232 = fmul float %2228, %2230 %2233 = fmul float %2214, %2217 %2234 = fadd float %2233, %2231 %2235 = fmul float %2215, %2217 %2236 = fadd float %2235, %2232 %2237 = fmul float %2201, %2204 %2238 = fadd float %2237, %2234 %2239 = fmul float %2202, %2204 %2240 = fadd float %2239, %2236 %2241 = fmul float %2188, %2191 %2242 = fadd float %2241, %2238 %2243 = fmul float %2189, %2191 %2244 = fadd float %2243, %2240 %2245 = fmul float %2175, %2178 %2246 = fadd float %2245, %2242 %2247 = fmul float %2176, %2178 %2248 = fadd float %2247, %2244 %2249 = fmul float %2248, 2.000000e+00 %2250 = fadd float %2249, -1.000000e+00 %2251 = fmul float %2246, 2.000000e+00 %2252 = fadd float %2251, -1.000000e+00 %2253 = fmul float %2250, %2250 %2254 = fmul float %2252, %2252 %2255 = fadd float %2253, %2254 %2256 = call float @llvm.AMDIL.clamp.(float %2255, float 0.000000e+00, float 1.000000e+00) %2257 = fcmp une float %33, %temp32.0 %.sink233 = select i1 %2257, float %36, float %35 %temp56.6 = select i1 %2257, float 1.953125e-03, float 3.906250e-03 %2258 = fdiv float 1.000000e+00, %.sink233 %2259 = fmul float %97, %2258 %2260 = fmul float %98, %2258 %2261 = call float @llvm.floor.f32(float %2259) %2262 = fsub float %2259, %2261 %2263 = call float @llvm.floor.f32(float %2260) %2264 = fsub float %2260, %2263 %2265 = fmul float %37, 2.000000e+00 %2266 = fmul float %2265, %temp56.6 %2267 = fsub float 1.000000e+00, %2266 %2268 = fmul float %temp56.6, %37 %2269 = fmul float %2262, %2267 %2270 = fadd float %2269, %2268 %2271 = fmul float %2264, %2267 %2272 = fadd float %2271, %2268 %2273 = fmul float %2270, %temp32.0 %2274 = fadd float %2273, %temp20.0 %2275 = fmul float %2272, %temp32.0 %2276 = fadd float %2275, %temp21.0 %2277 = bitcast float %2274 to i32 %2278 = bitcast float %2276 to i32 %2279 = bitcast float %225 to i32 %2280 = insertelement <4 x i32> undef, i32 %2277, i32 0 %2281 = insertelement <4 x i32> %2280, i32 %2278, i32 1 %2282 = insertelement <4 x i32> %2281, i32 %2279, i32 2 %2283 = bitcast <8 x i32> %83 to <32 x i8> %2284 = bitcast <4 x i32> %85 to <16 x i8> %2285 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2282, <32 x i8> %2283, <16 x i8> %2284, i32 2) %2286 = extractelement <4 x float> %2285, i32 1 %2287 = extractelement <4 x float> %2285, i32 3 %2288 = fcmp oeq float %temp22.0, 4.000000e+00 %2289 = select i1 %2288, float 1.000000e+00, float 0.000000e+00 %2290 = bitcast float %2274 to i32 %2291 = bitcast float %2276 to i32 %2292 = bitcast float %225 to i32 %2293 = insertelement <4 x i32> undef, i32 %2290, i32 0 %2294 = insertelement <4 x i32> %2293, i32 %2291, i32 1 %2295 = insertelement <4 x i32> %2294, i32 %2292, i32 2 %2296 = bitcast <8 x i32> %75 to <32 x i8> %2297 = bitcast <4 x i32> %77 to <16 x i8> %2298 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2295, <32 x i8> %2296, <16 x i8> %2297, i32 2) %2299 = extractelement <4 x float> %2298, i32 1 %2300 = extractelement <4 x float> %2298, i32 3 %2301 = fcmp oeq float %temp22.0, 3.000000e+00 %2302 = select i1 %2301, float 1.000000e+00, float 0.000000e+00 %2303 = bitcast float %2274 to i32 %2304 = bitcast float %2276 to i32 %2305 = bitcast float %225 to i32 %2306 = insertelement <4 x i32> undef, i32 %2303, i32 0 %2307 = insertelement <4 x i32> %2306, i32 %2304, i32 1 %2308 = insertelement <4 x i32> %2307, i32 %2305, i32 2 %2309 = bitcast <8 x i32> %67 to <32 x i8> %2310 = bitcast <4 x i32> %69 to <16 x i8> %2311 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2308, <32 x i8> %2309, <16 x i8> %2310, i32 2) %2312 = extractelement <4 x float> %2311, i32 1 %2313 = extractelement <4 x float> %2311, i32 3 %2314 = fcmp oeq float %temp22.0, 2.000000e+00 %2315 = select i1 %2314, float 1.000000e+00, float 0.000000e+00 %2316 = bitcast float %2274 to i32 %2317 = bitcast float %2276 to i32 %2318 = bitcast float %225 to i32 %2319 = insertelement <4 x i32> undef, i32 %2316, i32 0 %2320 = insertelement <4 x i32> %2319, i32 %2317, i32 1 %2321 = insertelement <4 x i32> %2320, i32 %2318, i32 2 %2322 = bitcast <8 x i32> %59 to <32 x i8> %2323 = bitcast <4 x i32> %61 to <16 x i8> %2324 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2321, <32 x i8> %2322, <16 x i8> %2323, i32 2) %2325 = extractelement <4 x float> %2324, i32 1 %2326 = extractelement <4 x float> %2324, i32 3 %2327 = fcmp oeq float %temp22.0, 1.000000e+00 %2328 = select i1 %2327, float 1.000000e+00, float 0.000000e+00 %2329 = bitcast float %2274 to i32 %2330 = bitcast float %2276 to i32 %2331 = bitcast float %225 to i32 %2332 = insertelement <4 x i32> undef, i32 %2329, i32 0 %2333 = insertelement <4 x i32> %2332, i32 %2330, i32 1 %2334 = insertelement <4 x i32> %2333, i32 %2331, i32 2 %2335 = bitcast <8 x i32> %51 to <32 x i8> %2336 = bitcast <4 x i32> %53 to <16 x i8> %2337 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2334, <32 x i8> %2335, <16 x i8> %2336, i32 2) %2338 = extractelement <4 x float> %2337, i32 1 %2339 = extractelement <4 x float> %2337, i32 3 %2340 = fcmp oeq float %temp22.0, 0.000000e+00 %2341 = select i1 %2340, float 1.000000e+00, float 0.000000e+00 %2342 = fmul float %2338, %2341 %2343 = fmul float %2339, %2341 %2344 = fmul float %2325, %2328 %2345 = fadd float %2344, %2342 %2346 = fmul float %2326, %2328 %2347 = fadd float %2346, %2343 %2348 = fmul float %2312, %2315 %2349 = fadd float %2348, %2345 %2350 = fmul float %2313, %2315 %2351 = fadd float %2350, %2347 %2352 = fmul float %2299, %2302 %2353 = fadd float %2352, %2349 %2354 = fmul float %2300, %2302 %2355 = fadd float %2354, %2351 %2356 = fmul float %2286, %2289 %2357 = fadd float %2356, %2353 %2358 = fmul float %2287, %2289 %2359 = fadd float %2358, %2355 %2360 = fmul float %2359, 2.000000e+00 %2361 = fadd float %2360, -1.000000e+00 %2362 = fmul float %2357, 2.000000e+00 %2363 = fadd float %2362, -1.000000e+00 %2364 = fmul float %2361, %2361 %2365 = fmul float %2363, %2363 %2366 = fadd float %2364, %2365 %2367 = call float @llvm.AMDIL.clamp.(float %2366, float 0.000000e+00, float 1.000000e+00) %2368 = fmul float %148, 0.000000e+00 %2369 = fmul float %2139, %148 %2370 = fmul float %2141, %148 %2371 = fmul float %2252, %149 %2372 = fadd float %2371, %2368 %2373 = fmul float %149, 0.000000e+00 %2374 = fadd float %2373, %2369 %2375 = fmul float %2250, %149 %2376 = fadd float %2375, %2370 %2377 = fmul float %2361, %150 %2378 = fadd float %2377, %2372 %2379 = fmul float %2363, %150 %2380 = fadd float %2379, %2374 %2381 = fmul float %150, 0.000000e+00 %2382 = fadd float %2381, %2376 %2383 = fmul float %90, %1682 %2384 = fmul float %90, %1684 %2385 = fmul float %90, %1686 %2386 = fmul float %91, %2030 %2387 = fadd float %2386, %2383 %2388 = fmul float %91, %2032 %2389 = fadd float %2388, %2384 %2390 = fmul float %91, %2034 %2391 = fadd float %2390, %2385 %2392 = fmul float %92, %2378 %2393 = fadd float %2392, %2387 %2394 = fmul float %92, %2380 %2395 = fadd float %2394, %2389 %2396 = fmul float %92, %2382 %2397 = fadd float %2396, %2391 %2398 = fmul float %2393, %2393 %2399 = fmul float %2395, %2395 %2400 = fadd float %2398, %2399 %2401 = fmul float %2397, %2397 %2402 = fadd float %2400, %2401 %2403 = fadd float %2402, 1.000000e+00 %2404 = call float @llvm.AMDGPU.rsq.clamped.f32(float %2403) %2405 = fmul float %2393, %2404 %2406 = fmul float %2395, %2404 %2407 = fmul float %2397, %2404 %2408 = fmul float %2405, %89 %2409 = fmul float %2406, %89 %2410 = fmul float %2407, %89 %2411 = fsub float %94, %2408 %2412 = fsub float %95, %2409 %2413 = fsub float %96, %2410 %2414 = fmul float %2411, %2411 %2415 = fmul float %2412, %2412 %2416 = fadd float %2415, %2414 %2417 = fmul float %2413, %2413 %2418 = fadd float %2416, %2417 %2419 = call float @llvm.AMDGPU.rsq.clamped.f32(float %2418) %2420 = fmul float %2411, %2419 %2421 = fmul float %2412, %2419 %2422 = fmul float %2413, %2419 %2423 = fmul float %1334, %100 %2424 = fmul float %1336, %101 %2425 = fmul float %1338, %102 %2426 = fadd float %109, %121 %2427 = fadd float %110, %122 %2428 = fadd float %111, %123 %2429 = fmul float %2426, %2426 %2430 = fmul float %2427, %2427 %2431 = fadd float %2430, %2429 %2432 = fmul float %2428, %2428 %2433 = fadd float %2431, %2432 %2434 = call float @llvm.AMDGPU.rsq.clamped.f32(float %2433) %2435 = fmul float %2426, %2434 %2436 = fmul float %2427, %2434 %2437 = fmul float %2428, %2434 %2438 = fmul float %2420, %2435 %2439 = fmul float %2421, %2436 %2440 = fadd float %2439, %2438 %2441 = fmul float %2422, %2437 %2442 = fadd float %2440, %2441 %2443 = call float @llvm.maxnum.f32(float %2442, float 0x3F1A36E2E0000000) %2444 = fmul float %93, 3.200000e+01 %2445 = call float @llvm.pow.f32(float %2443, float %2444) %2446 = call float @llvm.AMDIL.clamp.(float %2445, float 0.000000e+00, float 1.000000e+00) %2447 = fmul float %2446, 2.000000e+00 %2448 = fsub float 3.000000e+00, %2447 %2449 = fmul float %2446, %2448 %2450 = fmul float %2446, %2449 %2451 = fmul float %2450, %93 %2452 = fmul float %1334, %30 %2453 = fmul float %1336, %31 %2454 = fmul float %1338, %32 %2455 = fmul float %2420, %109 %2456 = fmul float %2421, %110 %2457 = fadd float %2456, %2455 %2458 = fmul float %2422, %111 %2459 = fadd float %2457, %2458 %2460 = call float @llvm.AMDIL.clamp.(float %2459, float 0.000000e+00, float 1.000000e+00) %2461 = fmul float %43, 2.000000e+00 %2462 = fmul float %44, 2.000000e+00 %2463 = fmul float %45, 2.000000e+00 %2464 = call float @llvm.maxnum.f32(float %2461, float %40) %2465 = call float @llvm.maxnum.f32(float %2462, float %41) %2466 = call float @llvm.maxnum.f32(float %2463, float %42) %2467 = call float @llvm.minnum.f32(float %2464, float 1.000000e+00) %2468 = call float @llvm.minnum.f32(float %2465, float 1.000000e+00) %2469 = call float @llvm.minnum.f32(float %2466, float 1.000000e+00) %2470 = fmul float %2467, %1334 %2471 = fmul float %2468, %1336 %2472 = fmul float %2469, %1338 %2473 = fmul float %2452, %2460 %2474 = fadd float %2473, %2470 %2475 = fmul float %2453, %2460 %2476 = fadd float %2475, %2471 %2477 = fmul float %2454, %2460 %2478 = fadd float %2477, %2472 %2479 = fmul float %30, %2451 %2480 = fadd float %2479, %2474 %2481 = fmul float %31, %2451 %2482 = fadd float %2481, %2476 %2483 = fmul float %32, %2451 %2484 = fadd float %2483, %2478 %2485 = fmul float %2480, 5.000000e-01 %2486 = fmul float %2482, 5.000000e-01 %2487 = fmul float %2484, 5.000000e-01 %2488 = fadd float %2423, %2485 %2489 = fadd float %2424, %2486 %2490 = fadd float %2425, %2487 %2491 = call i32 @llvm.SI.packf16(float %2488, float %2489) %2492 = bitcast i32 %2491 to float %2493 = call i32 @llvm.SI.packf16(float %2490, float 1.000000e+00) %2494 = bitcast i32 %2493 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %2492, float %2494, float %2492, float %2494) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.floor.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_writelane_b32 v254, s10, 52 ; 05FD680A s_mov_b64 s[100:101], s[4:5] ; BEE40404 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v12, v0, 0, 0, [m0] ; C8300000 v_interp_p2_f32 v12, [v12], v1, 0, 0, [m0] ; C8310001 v_interp_p1_f32 v20, v0, 1, 0, [m0] ; C8500100 v_interp_p2_f32 v20, [v20], v1, 1, 0, [m0] ; C8510101 v_interp_p1_f32 v21, v0, 2, 0, [m0] ; C8540200 v_interp_p2_f32 v21, [v21], v1, 2, 0, [m0] ; C8550201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v10, v0, 0, 1, [m0] ; C8280400 v_interp_p2_f32 v10, [v10], v1, 0, 1, [m0] ; C8290401 v_interp_p1_f32 v11, v0, 1, 1, [m0] ; C82C0500 v_interp_p2_f32 v11, [v11], v1, 1, 1, [m0] ; C82D0501 v_interp_p1_f32 v6, v0, 2, 1, [m0] ; C8180600 v_interp_p2_f32 v6, [v6], v1, 2, 1, [m0] ; C8190601 v_interp_p1_f32 v2, v0, 3, 1, [m0] ; C8080700 v_interp_p2_f32 v2, [v2], v1, 3, 1, [m0] ; C8090701 v_interp_p1_f32 v7, v0, 0, 2, [m0] ; C81C0800 v_interp_p2_f32 v7, [v7], v1, 0, 2, [m0] ; C81D0801 v_interp_p1_f32 v8, v0, 1, 2, [m0] ; C8200900 v_interp_p2_f32 v8, [v8], v1, 1, 2, [m0] ; C8210901 s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300 v_interp_p1_f32 v9, v0, 2, 2, [m0] ; C8240A00 v_interp_p2_f32 v9, [v9], v1, 2, 2, [m0] ; C8250A01 v_interp_p1_f32 v33, v0, 3, 2, [m0] ; C8840B00 v_interp_p2_f32 v33, [v33], v1, 3, 2, [m0] ; C8850B01 v_interp_p1_f32 v37, v0, 0, 3, [m0] ; C8940C00 v_interp_p2_f32 v37, [v37], v1, 0, 3, [m0] ; C8950C01 v_interp_p1_f32 v27, v0, 1, 3, [m0] ; C86C0D00 v_interp_p2_f32 v27, [v27], v1, 1, 3, [m0] ; C86D0D01 v_interp_p1_f32 v3, v0, 2, 3, [m0] ; C80C0E00 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s1, s[8:11], 0x4 ; C2008904 s_buffer_load_dword s0, s[8:11], 0x5 ; C2000905 v_interp_p2_f32 v3, [v3], v1, 2, 3, [m0] ; C80D0E01 v_interp_p1_f32 v4, v0, 3, 3, [m0] ; C8100F00 s_buffer_load_dword s38, s[8:11], 0x6 ; C2130906 v_interp_p2_f32 v4, [v4], v1, 3, 3, [m0] ; C8110F01 v_interp_p1_f32 v0, v0, 0, 4, [m0] ; C8001000 s_buffer_load_dword s40, s[8:11], 0x0 ; C2140900 s_buffer_load_dword s39, s[8:11], 0x1 ; C2138901 v_interp_p2_f32 v0, [v0], v1, 0, 4, [m0] ; C8011001 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e64 v1, s1, s1 ; D2100001 00000201 v_mac_f32_e64 v1, s0, s0 ; D23E0001 00000000 s_buffer_load_dword s2, s[8:11], 0x34 ; C2010934 s_buffer_load_dword s3, s[8:11], 0x38 ; C2018938 v_mac_f32_e64 v1, s38, s38 ; D23E0001 00004C26 s_buffer_load_dword s41, s[8:11], 0x2 ; C2148902 v_rsq_clamp_f32_e32 v15, v1 ; 7E1E5901 v_sub_f32_e32 v16, s40, v33 ; 08204228 v_sub_f32_e32 v17, s39, v37 ; 08224A27 v_mul_f32_e32 v1, v16, v16 ; 10022110 v_mac_f32_e32 v1, v17, v17 ; 3E022311 v_add_f32_e32 v12, 0.5, v12 ; 061818F0 v_floor_f32_e32 v18, v12 ; 7E24490C v_mov_b32_e32 v12, 0x42800000 ; 7E1802FF 42800000 v_cmp_le_f32_e32 vcc, v12, v18 ; 7C06250C s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v19, s41, v27 ; 08263629 v_mac_f32_e32 v1, v19, v19 ; 3E022713 v_rsq_clamp_f32_e32 v23, v1 ; 7E2E5901 v_mov_b32_e32 v13, 0x7fffffff ; 7E1A02FF 7FFFFFFF v_and_b32_e32 v1, v7, v13 ; 36021B07 s_mov_b32 s12, SCRATCH_RSRC_DWORD0 ; BE8C03FF 00000000 s_mov_b32 s13, SCRATCH_RSRC_DWORD1 ; BE8D03FF 00000000 s_mov_b32 s14, -1 ; BE8E03C1 s_mov_b32 s15, 0x80f000 ; BE8F03FF 0080F000 v_readlane_b32 s12, v254, 52 ; 021969FE s_nop 2 ; BF800002 buffer_store_dword v1, s[12:15], s12 ; E0700000 0C030100 v_and_b32_e32 v12, v8, v13 ; 36181B08 v_and_b32_e32 v13, v9, v13 ; 361A1B09 v_mul_f32_e64 v14, |v7|, |v7| ; D210030E 00020F07 v_mad_f32 v14, |v8|, |v8|, v14 ; D282030E 043A1108 v_mad_f32 v14, |v9|, |v9|, v14 ; D282030E 043A1309 v_rsq_clamp_f32_e32 v14, v14 ; 7E1C590E v_add_f32_e32 v20, 0.5, v20 ; 062828F0 v_add_f32_e32 v21, 0.5, v21 ; 062A2AF0 v_floor_f32_e32 v24, v20 ; 7E304914 v_floor_f32_e32 v20, v21 ; 7E284915 v_mov_b32_e32 v30, s3 ; 7E3C0203 v_mul_f32_e32 v25, s2, v18 ; 10322402 v_floor_f32_e32 v22, v25 ; 7E2C4919 s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700 s_and_saveexec_b64 s[12:13], vcc ; BE8C246A s_xor_b64 s[12:13], exec, s[12:13] ; 898C0C7E v_mov_b32_e32 v21, 0xc2800000 ; 7E2A02FF C2800000 v_add_f32_e32 v18, v18, v21 ; 06242B12 v_mul_f32_e32 v21, s3, v18 ; 102A2403 v_floor_f32_e32 v21, v21 ; 7E2A4915 v_mul_f32_e32 v26, s3, v21 ; 10342A03 v_mad_f32 v31, v18, s3, -v21 ; D282001F 84540712 v_floor_f32_e32 v18, v26 ; 7E24491A v_mad_f32 v32, v21, s3, -v18 ; D2820020 84480715 v_add_f32_e32 v21, 4.0, v18 ; 062A24F6 s_or_saveexec_b64 s[12:13], s[12:13] ; BE8C250C v_mov_b32_e32 v18, s2 ; 7E240202 v_mov_b32_e32 v41, v30 ; 7E52031E s_xor_b64 exec, exec, s[12:13] ; 89FE0C7E v_mul_f32_e32 v21, s2, v22 ; 102A2C02 v_floor_f32_e32 v26, v25 ; 7E344919 v_subrev_f32_e32 v31, v26, v25 ; 0A3E331A v_floor_f32_e32 v21, v21 ; 7E2A4915 v_mad_f32 v32, v22, s2, -v21 ; D2820020 84540516 v_mov_b32_e32 v41, v18 ; 7E520312 s_or_b64 exec, exec, s[12:13] ; 88FE0C7E v_mul_f32_e32 v26, s2, v24 ; 10343002 v_floor_f32_e32 v25, v26 ; 7E32491A v_mov_b32_e32 v22, 0x42800000 ; 7E2C02FF 42800000 v_cmp_le_f32_e32 vcc, v22, v24 ; 7C063116 s_and_saveexec_b64 s[12:13], vcc ; BE8C246A s_xor_b64 s[12:13], exec, s[12:13] ; 898C0C7E v_mov_b32_e32 v22, 0xc2800000 ; 7E2C02FF C2800000 v_add_f32_e32 v22, v24, v22 ; 062C2D18 v_mul_f32_e32 v24, s3, v22 ; 10302C03 v_floor_f32_e32 v24, v24 ; 7E304918 v_mul_f32_e32 v29, s3, v24 ; 103A3003 v_mad_f32 v28, v22, s3, -v24 ; D282001C 84600716 v_floor_f32_e32 v22, v29 ; 7E2C491D v_mad_f32 v29, v24, s3, -v22 ; D282001D 84580718 v_add_f32_e32 v22, 4.0, v22 ; 062C2CF6 s_or_saveexec_b64 s[12:13], s[12:13] ; BE8C250C v_mov_b32_e32 v38, v30 ; 7E4C031E s_xor_b64 exec, exec, s[12:13] ; 89FE0C7E v_mul_f32_e32 v22, s2, v25 ; 102C3202 v_floor_f32_e32 v24, v26 ; 7E30491A v_subrev_f32_e32 v28, v24, v26 ; 0A383518 v_floor_f32_e32 v22, v22 ; 7E2C4916 v_mad_f32 v29, v25, s2, -v22 ; D282001D 84580519 v_mov_b32_e32 v38, v18 ; 7E4C0312 s_or_b64 exec, exec, s[12:13] ; 88FE0C7E s_buffer_load_dword s15, s[8:11], 0x3c ; C207893C s_buffer_load_dword s14, s[8:11], 0x40 ; C2070940 v_mul_f32_e32 v35, s2, v20 ; 10462802 v_floor_f32_e32 v34, v35 ; 7E444923 v_mov_b32_e32 v24, 0x42800000 ; 7E3002FF 42800000 v_cmp_le_f32_e32 vcc, v24, v20 ; 7C062918 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[12:13], vcc ; BE8C246A s_xor_b64 s[12:13], exec, s[12:13] ; 898C0C7E v_mov_b32_e32 v24, 0xc2800000 ; 7E3002FF C2800000 v_add_f32_e32 v20, v20, v24 ; 06283114 v_mul_f32_e32 v24, s3, v20 ; 10302803 v_floor_f32_e32 v24, v24 ; 7E304918 v_mul_f32_e32 v26, s3, v24 ; 10343003 v_mad_f32 v25, v20, s3, -v24 ; D2820019 84600714 v_floor_f32_e32 v20, v26 ; 7E28491A v_mad_f32 v26, v24, s3, -v20 ; D282001A 84500718 v_add_f32_e32 v24, 4.0, v20 ; 063028F6 s_or_saveexec_b64 s[12:13], s[12:13] ; BE8C250C v_mov_b32_e32 v39, s15 ; 7E4E020F s_buffer_load_dword s4, s[8:11], 0x8 ; C2020908 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v254, s4, 38 ; 05FD4C04 s_buffer_load_dword s4, s[8:11], 0x9 ; C2020909 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v254, s4, 39 ; 05FD4E04 s_buffer_load_dword s4, s[8:11], 0xa ; C202090A s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v254, s4, 1 ; 05FD0204 s_buffer_load_dword s37, s[8:11], 0x44 ; C2128944 s_buffer_load_dword s44, s[8:11], 0x48 ; C2160948 s_buffer_load_dword s42, s[8:11], 0x4c ; C215094C s_buffer_load_dword s4, s[8:11], 0x50 ; C2020950 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v254, s4, 4 ; 05FD0804 s_buffer_load_dword s4, s[8:11], 0x51 ; C2020951 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v254, s4, 3 ; 05FD0604 s_buffer_load_dword s4, s[8:11], 0x52 ; C2020952 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v254, s4, 2 ; 05FD0404 s_buffer_load_dword s4, s[8:11], 0x54 ; C2020954 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v254, s4, 7 ; 05FD0E04 s_buffer_load_dword s4, s[8:11], 0x55 ; C2020955 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v254, s4, 6 ; 05FD0C04 s_buffer_load_dword s4, s[8:11], 0x56 ; C2020956 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v254, s4, 5 ; 05FD0A04 v_mov_b32_e32 v40, s14 ; 7E50020E s_xor_b64 exec, exec, s[12:13] ; 89FE0C7E v_mul_f32_e32 v20, s2, v34 ; 10284402 v_floor_f32_e32 v24, v35 ; 7E304923 v_subrev_f32_e32 v25, v24, v35 ; 0A324718 v_floor_f32_e32 v24, v20 ; 7E304914 v_mad_f32 v26, v34, s2, -v24 ; D282001A 84600522 v_mov_b32_e32 v30, v18 ; 7E3C0312 s_or_b64 exec, exec, s[12:13] ; 88FE0C7E v_mul_f32_e32 v20, v23, v16 ; 10282117 v_mul_f32_e32 v18, v23, v17 ; 10242317 v_mul_f32_e32 v19, v23, v19 ; 10262717 v_mac_f32_e32 v20, s1, v15 ; 3E281E01 v_mac_f32_e32 v18, s0, v15 ; 3E241E00 v_mac_f32_e32 v19, s38, v15 ; 3E261E26 v_cmp_neq_f32_e64 s[26:27], s2, v41 ; D01A001A 00025202 v_cmp_eq_f32_e32 vcc, 4.0, v21 ; 7C042AF6 v_cmp_eq_f32_e64 s[4:5], 2.0, v21 ; D0040004 00022AF4 v_writelane_b32 v254, s4, 22 ; 05FD2C04 v_writelane_b32 v254, s5, 23 ; 05FD2E05 v_cmp_eq_f32_e64 s[4:5], 1.0, v21 ; D0040004 00022AF2 v_writelane_b32 v254, s4, 18 ; 05FD2404 v_writelane_b32 v254, s5, 19 ; 05FD2605 v_cmp_eq_f32_e64 s[4:5], 0, v21 ; D0040004 00022A80 v_writelane_b32 v254, s4, 20 ; 05FD2804 v_writelane_b32 v254, s5, 21 ; 05FD2A05 v_cmp_neq_f32_e64 s[24:25], s2, v38 ; D01A0018 00024C02 v_cmp_neq_f32_e64 s[22:23], s2, v30 ; D01A0016 00023C02 v_cmp_eq_f32_e64 s[2:3], 4.0, v22 ; D0040002 00022CF6 v_writelane_b32 v254, s2, 12 ; 05FD1802 v_writelane_b32 v254, s3, 13 ; 05FD1A03 v_cmp_eq_f32_e64 s[2:3], 2.0, v22 ; D0040002 00022CF4 v_writelane_b32 v254, s2, 14 ; 05FD1C02 v_writelane_b32 v254, s3, 15 ; 05FD1E03 v_cmp_eq_f32_e64 s[2:3], 1.0, v22 ; D0040002 00022CF2 v_writelane_b32 v254, s2, 16 ; 05FD2002 v_writelane_b32 v254, s3, 17 ; 05FD2203 v_cmp_eq_f32_e64 s[2:3], 0, v22 ; D0040002 00022C80 v_writelane_b32 v254, s2, 24 ; 05FD3002 v_writelane_b32 v254, s3, 25 ; 05FD3203 v_mul_f32_e32 v17, s1, v15 ; 10221E01 v_mul_f32_e32 v16, s0, v15 ; 10201E00 v_mul_f32_e32 v15, s38, v15 ; 101E1E26 v_cmp_eq_f32_e64 s[0:1], 4.0, v24 ; D0040000 000230F6 v_writelane_b32 v254, s0, 8 ; 05FD1000 v_writelane_b32 v254, s1, 9 ; 05FD1201 v_subrev_f32_e32 v23, s40, v33 ; 0A2E4228 v_mul_f32_e32 v34, v23, v23 ; 10442F17 v_cmp_eq_f32_e64 s[0:1], 2.0, v24 ; D0040000 000230F4 v_writelane_b32 v254, s0, 10 ; 05FD1400 v_writelane_b32 v254, s1, 11 ; 05FD1601 v_subrev_f32_e32 v23, s39, v37 ; 0A2E4A27 v_mac_f32_e32 v34, v23, v23 ; 3E442F17 v_mov_b32_e32 v23, 0xbe4ccccd ; 7E2E02FF BE4CCCCD v_subrev_f32_e32 v35, s41, v27 ; 0A463629 v_mac_f32_e32 v34, v35, v35 ; 3E444723 v_mul_f32_e32 v34, s42, v34 ; 1044442A v_log_f32_e32 v34, v34 ; 7E444F22 v_cndmask_b32_e64 v35, v39, v40, s[26:27] ; D2000023 006A5127 v_rcp_f32_e32 v35, v35 ; 7E465523 v_mov_b32_e32 v46, 0x3b000000 ; 7E5C02FF 3B000000 v_mul_f32_e32 v34, 0x3f317218, v34 ; 104444FF 3F317218 v_mov_b32_e32 v47, 0x3b800000 ; 7E5E02FF 3B800000 v_cndmask_b32_e64 v36, v47, v46, s[26:27] ; D2000024 006A5D2F s_load_dwordx4 s[96:99], s[100:101], 0x20 ; C0B06520 v_mul_f32_e32 v42, v35, v33 ; 10544323 v_floor_f32_e32 v42, v42 ; 7E54492A v_mad_f32 v42, v33, v35, -v42 ; D282002A 84AA4721 v_mul_f32_e32 v43, v35, v37 ; 10564B23 v_floor_f32_e32 v43, v43 ; 7E56492B v_mad_f32 v43, v37, v35, -v43 ; D282002B 84AE4725 v_add_f32_e64 v48, s37, s37 ; D2060030 00004A25 v_mad_f32 v44, -v48, v36, 1.0 ; D282002C 23CA4930 v_mul_f32_e32 v45, s37, v36 ; 105A4825 v_mad_f32 v49, v44, v42, v45 ; D2820031 04B6552C v_mad_f32 v42, v44, v43, v45 ; D282002A 04B6572C v_mul_f32_e32 v36, v35, v27 ; 10483723 v_floor_f32_e32 v36, v36 ; 7E484924 v_mad_f32 v35, v27, v35, -v36 ; D2820023 8492471B v_mac_f32_e32 v45, v44, v35 ; 3E5A472C v_cndmask_b32_e64 v35, v39, v40, s[24:25] ; D2000023 00625127 v_rcp_f32_e32 v50, v35 ; 7E645523 v_mul_f32_e32 v36, s44, v34 ; 1048442C v_mad_f32 v34, v41, v49, v31 ; D2820022 047E6329 v_mad_f32 v35, v41, v42, v32 ; D2820023 04825529 v_mac_f32_e32 v31, v41, v45 ; 3E3E5B29 v_mul_f32_e32 v42, v50, v33 ; 10544332 v_floor_f32_e32 v42, v42 ; 7E54492A v_mad_f32 v51, v33, v50, -v42 ; D2820033 84AA6521 v_mov_b32_e32 v42, v31 ; 7E54031F v_mov_b32_e32 v43, v32 ; 7E560320 v_mov_b32_e32 v44, v33 ; 7E580321 v_mov_b32_e32 v45, v34 ; 7E5A0322 v_mac_f32_e32 v32, v41, v49 ; 3E406329 v_mul_f32_e32 v41, v50, v37 ; 10524B32 v_floor_f32_e32 v41, v41 ; 7E524929 v_mad_f32 v41, v37, v50, -v41 ; D2820029 84A66525 v_mul_f32_e32 v43, v50, v27 ; 10563732 v_floor_f32_e32 v43, v43 ; 7E56492B v_mad_f32 v43, v27, v50, -v43 ; D282002B 84AE651B v_cndmask_b32_e64 v39, v39, v40, s[22:23] ; D2000027 005A5127 v_cndmask_b32_e64 v40, v47, v46, s[24:25] ; D2000028 00625D2F v_mad_f32 v44, -v48, v40, 1.0 ; D282002C 23CA5130 v_mul_f32_e32 v40, s37, v40 ; 10505025 v_rcp_f32_e32 v39, v39 ; 7E4E5527 v_mad_f32 v49, v44, v51, v40 ; D2820031 04A2672C v_mad_f32 v41, v44, v41, v40 ; D2820029 04A2532C v_mac_f32_e32 v40, v44, v43 ; 3E50572C v_mul_f32_e32 v43, v39, v33 ; 10564327 v_floor_f32_e32 v43, v43 ; 7E56492B v_mad_f32 v33, v33, v39, -v43 ; D2820021 84AE4F21 v_mul_f32_e32 v43, v39, v37 ; 10564B27 v_floor_f32_e32 v43, v43 ; 7E56492B v_mad_f32 v37, v37, v39, -v43 ; D2820025 84AE4F25 v_mul_f32_e32 v43, v39, v27 ; 10563727 v_floor_f32_e32 v43, v43 ; 7E56492B v_mad_f32 v27, v27, v39, -v43 ; D282001B 84AE4F1B v_cndmask_b32_e64 v39, v47, v46, s[22:23] ; D2000027 005A5D2F v_mad_f32 v43, -v48, v39, 1.0 ; D282002B 23CA4F30 v_mul_f32_e32 v44, s37, v39 ; 10584E25 v_mad_f32 v33, v43, v33, v44 ; D2820021 04B2432B v_mad_f32 v37, v43, v37, v44 ; D2820025 04B24B2B v_mac_f32_e32 v44, v43, v27 ; 3E58372B v_mad_f32 v50, v38, v49, v28 ; D2820032 04726326 v_mad_f32 v51, v38, v41, v29 ; D2820033 04765326 v_mac_f32_e32 v28, v38, v40 ; 3E385126 v_mad_f32 v39, v30, v33, v25 ; D2820027 0466431E v_mad_f32 v40, v30, v37, v26 ; D2820028 046A4B1E v_mac_f32_e32 v25, v30, v44 ; 3E32591E v_mov_b32_e32 v53, v28 ; 7E6A031C v_mov_b32_e32 v54, v29 ; 7E6C031D v_mov_b32_e32 v55, v30 ; 7E6E031E v_mov_b32_e32 v56, v31 ; 7E70031F v_mac_f32_e32 v29, v38, v49 ; 3E3A6326 v_mov_b32_e32 v46, v25 ; 7E5C0319 v_mov_b32_e32 v47, v26 ; 7E5E031A v_mov_b32_e32 v48, v27 ; 7E60031B v_mov_b32_e32 v49, v28 ; 7E62031C v_mac_f32_e32 v26, v30, v33 ; 3E34431E v_mov_b32_e32 v43, v35 ; 7E560323 v_mov_b32_e32 v52, v36 ; 7E680324 v_mov_b32_e32 v54, v51 ; 7E6C0333 v_mov_b32_e32 v41, v36 ; 7E520324 v_mov_b32_e32 v47, v40 ; 7E5E0328 s_load_dwordx8 s[64:71], s[6:7], 0x40 ; C0E00740 v_mov_b32_e32 v44, v36 ; 7E580324 v_mov_b32_e32 v33, v36 ; 7E420324 v_mov_b32_e32 v55, v36 ; 7E6E0324 v_mov_b32_e32 v30, v36 ; 7E3C0324 v_mov_b32_e32 v48, v36 ; 7E600324 v_mov_b32_e32 v27, v36 ; 7E360324 s_load_dwordx4 s[44:47], s[100:101], 0x8 ; C0966508 s_load_dwordx4 s[28:31], s[100:101], 0x18 ; C08E6518 s_load_dwordx8 s[12:19], s[6:7], 0x20 ; C0C60720 s_load_dwordx8 s[56:63], s[6:7], 0x30 ; C0DC0730 s_load_dwordx4 s[8:11], s[100:101], 0x10 ; C0846510 s_load_dwordx4 s[48:51], s[100:101], 0x24 ; C0986524 s_load_dwordx8 s[20:27], s[6:7], 0x10 ; C0CA0710 s_load_dwordx8 s[80:87], s[6:7], 0x48 ; C0E80748 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[57:59], 7, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[64:71], s[96:99] ; F0900700 03103922 s_load_dwordx4 s[52:55], s[100:101], 0x0 ; C09A6500 s_load_dwordx8 s[32:39], s[6:7], 0x0 ; C0D00700 s_load_dwordx4 s[40:43], s[100:101], 0x1c ; C094651C s_load_dwordx8 s[72:79], s[6:7], 0x38 ; C0E40738 image_sample_l v[60:62], 7, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[56:63], s[28:31] ; F0900700 00EE3C22 image_sample_l v[63:65], 7, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[12:19], s[8:11] ; F0900700 00433F22 s_load_dwordx4 s[0:3], s[100:101], 0xc ; C080650C s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_writelane_b32 v254, s0, 40 ; 05FD5000 v_writelane_b32 v254, s1, 41 ; 05FD5201 v_writelane_b32 v254, s2, 42 ; 05FD5402 v_writelane_b32 v254, s3, 43 ; 05FD5603 s_load_dwordx8 s[88:95], s[6:7], 0x18 ; C0EC0718 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v254, s88, 44 ; 05FD5858 v_writelane_b32 v254, s89, 45 ; 05FD5A59 v_writelane_b32 v254, s90, 46 ; 05FD5C5A v_writelane_b32 v254, s91, 47 ; 05FD5E5B v_writelane_b32 v254, s92, 48 ; 05FD605C v_writelane_b32 v254, s93, 49 ; 05FD625D v_writelane_b32 v254, s94, 50 ; 05FD645E v_writelane_b32 v254, s95, 51 ; 05FD665F image_sample_l v[66:68], 7, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[20:27], s[44:47] ; F0900700 01654222 s_load_dwordx4 s[0:3], s[100:101], 0x4 ; C0806504 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_writelane_b32 v254, s0, 26 ; 05FD3400 v_writelane_b32 v254, s1, 27 ; 05FD3601 v_writelane_b32 v254, s2, 28 ; 05FD3802 v_writelane_b32 v254, s3, 29 ; 05FD3A03 s_load_dwordx8 s[88:95], s[6:7], 0x8 ; C0EC0708 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v254, s88, 30 ; 05FD3C58 v_writelane_b32 v254, s89, 31 ; 05FD3E59 v_writelane_b32 v254, s90, 32 ; 05FD405A v_writelane_b32 v254, s91, 33 ; 05FD425B v_writelane_b32 v254, s92, 34 ; 05FD445C v_writelane_b32 v254, s93, 35 ; 05FD465D v_writelane_b32 v254, s94, 36 ; 05FD485E v_writelane_b32 v254, s95, 37 ; 05FD4A5F image_sample_l v[69:71], 7, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[32:39], s[52:55] ; F0900700 01A84522 image_sample_l v[72:74], 7, 0, 0, 0, 0, 0, 0, 0, v[42:45], s[64:71], s[96:99] ; F0900700 0310482A image_sample_l v[75:77], 7, 0, 0, 0, 0, 0, 0, 0, v[42:45], s[56:63], s[28:31] ; F0900700 00EE4B2A image_sample_l v[78:80], 7, 0, 0, 0, 0, 0, 0, 0, v[42:45], s[12:19], s[8:11] ; F0900700 00434E2A image_sample_l v[81:83], 7, 0, 0, 0, 0, 0, 0, 0, v[42:45], s[20:27], s[44:47] ; F0900700 0165512A image_sample_l v[84:86], 7, 0, 0, 0, 0, 0, 0, 0, v[42:45], s[32:39], s[52:55] ; F0900700 01A8542A image_sample_l v[87:89], 7, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[64:71], s[96:99] ; F0900700 0310571F image_sample_l v[90:92], 7, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[56:63], s[28:31] ; F0900700 00EE5A1F image_sample_l v[93:95], 7, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[12:19], s[8:11] ; F0900700 00435D1F image_sample_l v[96:98], 7, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[20:27], s[44:47] ; F0900700 0165601F image_sample_l v[99:101], 7, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[32:39], s[52:55] ; F0900700 01A8631F image_sample_l v[102:104], 7, 0, 0, 0, 0, 0, 0, 0, v[50:53], s[64:71], s[96:99] ; F0900700 03106632 image_sample_l v[105:107], 7, 0, 0, 0, 0, 0, 0, 0, v[50:53], s[56:63], s[28:31] ; F0900700 00EE6932 image_sample_l v[108:110], 7, 0, 0, 0, 0, 0, 0, 0, v[50:53], s[12:19], s[8:11] ; F0900700 00436C32 image_sample_l v[111:113], 7, 0, 0, 0, 0, 0, 0, 0, v[50:53], s[20:27], s[44:47] ; F0900700 01656F32 image_sample_l v[114:116], 7, 0, 0, 0, 0, 0, 0, 0, v[50:53], s[32:39], s[52:55] ; F0900700 01A87232 image_sample_l v[117:119], 7, 0, 0, 0, 0, 0, 0, 0, v[53:56], s[64:71], s[96:99] ; F0900700 03107535 image_sample_l v[120:122], 7, 0, 0, 0, 0, 0, 0, 0, v[53:56], s[56:63], s[28:31] ; F0900700 00EE7835 image_sample_l v[123:125], 7, 0, 0, 0, 0, 0, 0, 0, v[53:56], s[12:19], s[8:11] ; F0900700 00437B35 image_sample_l v[126:128], 7, 0, 0, 0, 0, 0, 0, 0, v[53:56], s[20:27], s[44:47] ; F0900700 01657E35 image_sample_l v[129:131], 7, 0, 0, 0, 0, 0, 0, 0, v[53:56], s[32:39], s[52:55] ; F0900700 01A88135 image_sample_l v[132:134], 7, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[64:71], s[96:99] ; F0900700 0310841C image_sample_l v[135:137], 7, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[56:63], s[28:31] ; F0900700 00EE871C image_sample_l v[138:140], 7, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[12:19], s[8:11] ; F0900700 00438A1C image_sample_l v[141:143], 7, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[20:27], s[44:47] ; F0900700 01658D1C image_sample_l v[144:146], 7, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[32:39], s[52:55] ; F0900700 01A8901C image_sample_l v[147:149], 7, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[64:71], s[96:99] ; F0900700 03109327 image_sample_l v[150:152], 7, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[56:63], s[28:31] ; F0900700 00EE9627 image_sample_l v[153:155], 7, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[12:19], s[8:11] ; F0900700 00439927 image_sample_l v[156:158], 7, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[20:27], s[44:47] ; F0900700 01659C27 image_sample_l v[159:161], 7, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[32:39], s[52:55] ; F0900700 01A89F27 image_sample_l v[162:164], 7, 0, 0, 0, 0, 0, 0, 0, v[46:49], s[64:71], s[96:99] ; F0900700 0310A22E image_sample_l v[165:167], 7, 0, 0, 0, 0, 0, 0, 0, v[46:49], s[56:63], s[28:31] ; F0900700 00EEA52E image_sample_l v[168:170], 7, 0, 0, 0, 0, 0, 0, 0, v[46:49], s[12:19], s[8:11] ; F0900700 0043A82E s_load_dwordx4 s[88:91], s[100:101], 0x14 ; C0AC6514 s_load_dwordx8 s[0:7], s[6:7], 0x28 ; C0C00728 image_sample_l v[171:173], 7, 0, 0, 0, 0, 0, 0, 0, v[46:49], s[20:27], s[44:47] ; F0900700 0165AB2E image_sample_l v[174:176], 7, 0, 0, 0, 0, 0, 0, 0, v[46:49], s[32:39], s[52:55] ; F0900700 01A8AE2E image_sample_l v[177:179], 7, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[64:71], s[96:99] ; F0900700 0310B119 image_sample_l v[180:182], 7, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[56:63], s[28:31] ; F0900700 00EEB419 image_sample_l v[183:185], 7, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[12:19], s[8:11] ; F0900700 0043B719 image_sample_l v[186:188], 7, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[20:27], s[44:47] ; F0900700 0165BA19 image_sample_l v[189:191], 7, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[32:39], s[52:55] ; F0900700 01A8BD19 image_sample_l v[37:38], 10, 0, 0, 0, 0, 0, 0, 0, v[42:45], s[80:87], s[48:51] ; F0900A00 0194252A image_sample_l v[192:193], 10, 0, 0, 0, 0, 0, 0, 0, v[42:45], s[72:79], s[40:43] ; F0900A00 0152C02A s_waitcnt vmcnt(9) lgkmcnt(0) ; BF8C0079 image_sample_l v[194:195], 10, 0, 0, 0, 0, 0, 0, 0, v[42:45], s[0:7], s[88:91] ; F0900A00 02C0C22A v_readlane_b32 s8, v254, 40 ; 021151FE v_readlane_b32 s9, v254, 41 ; 021353FE v_readlane_b32 s10, v254, 42 ; 021555FE v_readlane_b32 s11, v254, 43 ; 021757FE s_nop 2 ; BF800002 v_readlane_b32 s24, v254, 44 ; 023159FE v_readlane_b32 s25, v254, 45 ; 02335BFE v_readlane_b32 s26, v254, 46 ; 02355DFE v_readlane_b32 s27, v254, 47 ; 02375FFE v_readlane_b32 s28, v254, 48 ; 023961FE v_readlane_b32 s29, v254, 49 ; 023B63FE v_readlane_b32 s30, v254, 50 ; 023D65FE v_readlane_b32 s31, v254, 51 ; 023F67FE s_nop 2 ; BF800002 image_sample_l v[196:197], 10, 0, 0, 0, 0, 0, 0, 0, v[42:45], s[24:31], s[8:11] ; F0900A00 0046C42A v_readlane_b32 s92, v254, 26 ; 02B935FE v_readlane_b32 s93, v254, 27 ; 02BB37FE v_readlane_b32 s94, v254, 28 ; 02BD39FE v_readlane_b32 s95, v254, 29 ; 02BF3BFE s_nop 2 ; BF800002 v_readlane_b32 s16, v254, 30 ; 02213DFE v_readlane_b32 s17, v254, 31 ; 02233FFE v_readlane_b32 s18, v254, 32 ; 022541FE v_readlane_b32 s19, v254, 33 ; 022743FE v_readlane_b32 s20, v254, 34 ; 022945FE v_readlane_b32 s21, v254, 35 ; 022B47FE v_readlane_b32 s22, v254, 36 ; 022D49FE v_readlane_b32 s23, v254, 37 ; 022F4BFE s_nop 2 ; BF800002 image_sample_l v[42:43], 10, 0, 0, 0, 0, 0, 0, 0, v[42:45], s[16:23], s[92:95] ; F0900A00 02E42A2A image_sample_l v[44:45], 10, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[80:87], s[48:51] ; F0900A00 01942C1F image_sample_l v[198:199], 10, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[72:79], s[40:43] ; F0900A00 0152C61F image_sample_l v[200:201], 10, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[0:7], s[88:91] ; F0900A00 02C0C81F image_sample_l v[202:203], 10, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[24:31], s[8:11] ; F0900A00 0046CA1F s_mov_b32 s12, s8 ; BE8C0308 s_mov_b32 s13, s9 ; BE8D0309 s_mov_b32 s14, s10 ; BE8E030A s_mov_b32 s15, s11 ; BE8F030B image_sample_l v[31:32], 10, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[16:23], s[92:95] ; F0900A00 02E41F1F s_mov_b32 s8, s92 ; BE88035C s_mov_b32 s9, s93 ; BE89035D s_mov_b32 s10, s94 ; BE8A035E s_mov_b32 s11, s95 ; BE8B035F s_waitcnt vmcnt(9) ; BF8C0779 image_sample_l v[204:205], 10, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[80:87], s[48:51] ; F0900A00 0194CC22 image_sample_l v[206:207], 10, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[72:79], s[40:43] ; F0900A00 0152CE22 image_sample_l v[208:209], 10, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[0:7], s[88:91] ; F0900A00 02C0D022 image_sample_l v[210:211], 10, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[24:31], s[12:15] ; F0900A00 0066D222 image_sample_l v[33:34], 10, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[16:23], s[8:11] ; F0900A00 00442122 image_sample_l v[35:36], 10, 0, 0, 0, 0, 0, 0, 0, v[53:56], s[80:87], s[48:51] ; F0900A00 01942335 image_sample_l v[212:213], 10, 0, 0, 0, 0, 0, 0, 0, v[53:56], s[72:79], s[40:43] ; F0900A00 0152D435 image_sample_l v[214:215], 10, 0, 0, 0, 0, 0, 0, 0, v[53:56], s[0:7], s[88:91] ; F0900A00 02C0D635 image_sample_l v[216:217], 10, 0, 0, 0, 0, 0, 0, 0, v[53:56], s[24:31], s[12:15] ; F0900A00 0066D835 image_sample_l v[53:54], 10, 0, 0, 0, 0, 0, 0, 0, v[53:56], s[16:23], s[8:11] ; F0900A00 00443535 s_waitcnt vmcnt(10) ; BF8C077A image_sample_l v[55:56], 10, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[80:87], s[48:51] ; F0900A00 0194371C image_sample_l v[218:219], 10, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[72:79], s[40:43] ; F0900A00 0152DA1C image_sample_l v[220:221], 10, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[0:7], s[88:91] ; F0900A00 02C0DC1C image_sample_l v[222:223], 10, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[24:31], s[12:15] ; F0900A00 0066DE1C image_sample_l v[28:29], 10, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[16:23], s[8:11] ; F0900A00 00441C1C s_waitcnt vmcnt(5) ; BF8C0775 image_sample_l v[224:225], 10, 0, 0, 0, 0, 0, 0, 0, v[50:53], s[80:87], s[48:51] ; F0900A00 0194E032 image_sample_l v[226:227], 10, 0, 0, 0, 0, 0, 0, 0, v[50:53], s[72:79], s[40:43] ; F0900A00 0152E232 image_sample_l v[228:229], 10, 0, 0, 0, 0, 0, 0, 0, v[50:53], s[0:7], s[88:91] ; F0900A00 02C0E432 image_sample_l v[230:231], 10, 0, 0, 0, 0, 0, 0, 0, v[50:53], s[24:31], s[12:15] ; F0900A00 0066E632 image_sample_l v[50:51], 10, 0, 0, 0, 0, 0, 0, 0, v[50:53], s[16:23], s[8:11] ; F0900A00 00443232 image_sample_l v[232:233], 10, 0, 0, 0, 0, 0, 0, 0, v[46:49], s[80:87], s[48:51] ; F0900A00 0194E82E image_sample_l v[234:235], 10, 0, 0, 0, 0, 0, 0, 0, v[46:49], s[72:79], s[40:43] ; F0900A00 0152EA2E image_sample_l v[236:237], 10, 0, 0, 0, 0, 0, 0, 0, v[46:49], s[0:7], s[88:91] ; F0900A00 02C0EC2E image_sample_l v[238:239], 10, 0, 0, 0, 0, 0, 0, 0, v[46:49], s[24:31], s[12:15] ; F0900A00 0066EE2E image_sample_l v[46:47], 10, 0, 0, 0, 0, 0, 0, 0, v[46:49], s[16:23], s[8:11] ; F0900A00 00442E2E s_waitcnt vmcnt(10) ; BF8C077A image_sample_l v[48:49], 10, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[80:87], s[48:51] ; F0900A00 01943019 image_sample_l v[240:241], 10, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[72:79], s[40:43] ; F0900A00 0152F019 image_sample_l v[242:243], 10, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[0:7], s[88:91] ; F0900A00 02C0F219 image_sample_l v[244:245], 10, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[24:31], s[12:15] ; F0900A00 0066F419 image_sample_l v[25:26], 10, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[16:23], s[8:11] ; F0900A00 00441919 image_sample_l v[246:247], 10, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[80:87], s[48:51] ; F0900A00 0194F627 image_sample_l v[248:249], 10, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[72:79], s[40:43] ; F0900A00 0152F827 image_sample_l v[250:251], 10, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[0:7], s[88:91] ; F0900A00 02C0FA27 v_cndmask_b32_e64 v27, 0, 1.0, vcc ; D200001B 01A9E480 v_mov_b32_e32 v30, 0x40400000 ; 7E3C02FF 40400000 v_cmp_eq_f32_e64 s[4:5], v21, v30 ; D0040004 00023D15 v_cndmask_b32_e64 v21, 0, 1.0, s[4:5] ; D2000015 0011E480 v_readlane_b32 s0, v254, 22 ; 02012DFE v_readlane_b32 s1, v254, 23 ; 02032FFE s_nop 2 ; BF800002 v_cndmask_b32_e64 v52, 0, 1.0, s[0:1] ; D2000034 0001E480 v_readlane_b32 s0, v254, 18 ; 020125FE v_readlane_b32 s1, v254, 19 ; 020327FE s_nop 2 ; BF800002 v_cndmask_b32_e64 v252, 0, 1.0, s[0:1] ; D20000FC 0001E480 v_readlane_b32 s0, v254, 20 ; 020129FE v_readlane_b32 s1, v254, 21 ; 02032BFE s_nop 2 ; BF800002 v_cndmask_b32_e64 v253, 0, 1.0, s[0:1] ; D20000FD 0001E480 v_readlane_b32 s0, v254, 24 ; 020131FE v_readlane_b32 s1, v254, 25 ; 020333FE s_nop 2 ; BF800002 v_cndmask_b32_e64 v1, 0, 1.0, s[0:1] ; D2000001 0001E480 v_mul_f32_e32 v69, v253, v69 ; 108A8BFD v_mul_f32_e32 v70, v253, v70 ; 108C8DFD v_mul_f32_e32 v71, v253, v71 ; 108E8FFD v_mac_f32_e32 v69, v252, v66 ; 3E8A85FC v_mac_f32_e32 v70, v252, v67 ; 3E8C87FC v_mac_f32_e32 v71, v252, v68 ; 3E8E89FC v_mac_f32_e32 v69, v52, v63 ; 3E8A7F34 v_mac_f32_e32 v70, v52, v64 ; 3E8C8134 v_mac_f32_e32 v71, v52, v65 ; 3E8E8334 v_mac_f32_e32 v69, v21, v60 ; 3E8A7915 v_mac_f32_e32 v70, v21, v61 ; 3E8C7B15 v_mac_f32_e32 v71, v21, v62 ; 3E8E7D15 v_mac_f32_e32 v69, v27, v57 ; 3E8A731B v_mac_f32_e32 v70, v27, v58 ; 3E8C751B v_mac_f32_e32 v71, v27, v59 ; 3E8E771B v_mul_f32_e32 v57, v253, v84 ; 1072A9FD v_mul_f32_e32 v58, v253, v85 ; 1074ABFD v_mul_f32_e32 v59, v253, v86 ; 1076ADFD v_mac_f32_e32 v57, v252, v81 ; 3E72A3FC v_mac_f32_e32 v58, v252, v82 ; 3E74A5FC v_mac_f32_e32 v59, v252, v83 ; 3E76A7FC v_mac_f32_e32 v57, v52, v78 ; 3E729D34 v_mac_f32_e32 v58, v52, v79 ; 3E749F34 v_mac_f32_e32 v59, v52, v80 ; 3E76A134 v_mac_f32_e32 v57, v21, v75 ; 3E729715 v_mac_f32_e32 v58, v21, v76 ; 3E749915 v_mac_f32_e32 v59, v21, v77 ; 3E769B15 v_mac_f32_e32 v57, v27, v72 ; 3E72911B v_mac_f32_e32 v58, v27, v73 ; 3E74931B v_mac_f32_e32 v59, v27, v74 ; 3E76951B v_mul_f32_e32 v60, v253, v99 ; 1078C7FD v_mul_f32_e32 v61, v253, v100 ; 107AC9FD v_mul_f32_e32 v62, v253, v101 ; 107CCBFD v_mac_f32_e32 v60, v252, v96 ; 3E78C1FC v_mac_f32_e32 v61, v252, v97 ; 3E7AC3FC v_mac_f32_e32 v62, v252, v98 ; 3E7CC5FC v_mac_f32_e32 v60, v52, v93 ; 3E78BB34 v_mac_f32_e32 v61, v52, v94 ; 3E7ABD34 v_mac_f32_e32 v62, v52, v95 ; 3E7CBF34 v_mac_f32_e32 v60, v21, v90 ; 3E78B515 v_mac_f32_e32 v61, v21, v91 ; 3E7AB715 v_mac_f32_e32 v62, v21, v92 ; 3E7CB915 v_mac_f32_e32 v60, v27, v87 ; 3E78AF1B v_mac_f32_e32 v61, v27, v88 ; 3E7AB11B v_mac_f32_e32 v62, v27, v89 ; 3E7CB31B v_mul_f32_e32 v63, v1, v114 ; 107EE501 v_mul_f32_e32 v64, v1, v115 ; 1080E701 v_mul_f32_e32 v65, v1, v116 ; 1082E901 v_readlane_b32 s0, v254, 16 ; 020121FE v_readlane_b32 s1, v254, 17 ; 020323FE s_nop 2 ; BF800002 v_cndmask_b32_e64 v66, 0, 1.0, s[0:1] ; D2000042 0001E480 v_mac_f32_e32 v63, v66, v111 ; 3E7EDF42 v_mac_f32_e32 v64, v66, v112 ; 3E80E142 v_mac_f32_e32 v65, v66, v113 ; 3E82E342 v_readlane_b32 s0, v254, 14 ; 02011DFE v_readlane_b32 s1, v254, 15 ; 02031FFE s_nop 2 ; BF800002 v_cndmask_b32_e64 v67, 0, 1.0, s[0:1] ; D2000043 0001E480 v_mac_f32_e32 v63, v67, v108 ; 3E7ED943 v_mac_f32_e32 v64, v67, v109 ; 3E80DB43 v_mac_f32_e32 v65, v67, v110 ; 3E82DD43 v_cmp_eq_f32_e64 s[4:5], v22, v30 ; D0040004 00023D16 v_cndmask_b32_e64 v22, 0, 1.0, s[4:5] ; D2000016 0011E480 v_mac_f32_e32 v63, v22, v105 ; 3E7ED316 v_mac_f32_e32 v64, v22, v106 ; 3E80D516 v_mac_f32_e32 v65, v22, v107 ; 3E82D716 v_readlane_b32 s0, v254, 12 ; 020119FE v_readlane_b32 s1, v254, 13 ; 02031BFE s_nop 2 ; BF800002 v_cndmask_b32_e64 v68, 0, 1.0, s[0:1] ; D2000044 0001E480 v_mac_f32_e32 v63, v68, v102 ; 3E7ECD44 v_mac_f32_e32 v64, v68, v103 ; 3E80CF44 v_mac_f32_e32 v65, v68, v104 ; 3E82D144 v_mul_f32_e32 v72, v1, v129 ; 10910301 v_mul_f32_e32 v73, v1, v130 ; 10930501 v_mul_f32_e32 v74, v1, v131 ; 10950701 v_mac_f32_e32 v72, v66, v126 ; 3E90FD42 v_mac_f32_e32 v73, v66, v127 ; 3E92FF42 v_mac_f32_e32 v74, v66, v128 ; 3E950142 v_mac_f32_e32 v72, v67, v123 ; 3E90F743 v_mac_f32_e32 v73, v67, v124 ; 3E92F943 v_mac_f32_e32 v74, v67, v125 ; 3E94FB43 v_mac_f32_e32 v72, v22, v120 ; 3E90F116 v_mac_f32_e32 v73, v22, v121 ; 3E92F316 v_mac_f32_e32 v74, v22, v122 ; 3E94F516 v_mac_f32_e32 v72, v68, v117 ; 3E90EB44 v_mac_f32_e32 v73, v68, v118 ; 3E92ED44 v_mac_f32_e32 v74, v68, v119 ; 3E94EF44 v_mul_f32_e32 v75, v1, v144 ; 10972101 v_mul_f32_e32 v76, v1, v145 ; 10992301 v_mul_f32_e32 v77, v1, v146 ; 109B2501 v_mac_f32_e32 v75, v66, v141 ; 3E971B42 v_mac_f32_e32 v76, v66, v142 ; 3E991D42 v_mac_f32_e32 v77, v66, v143 ; 3E9B1F42 v_mac_f32_e32 v75, v67, v138 ; 3E971543 v_mac_f32_e32 v76, v67, v139 ; 3E991743 v_mac_f32_e32 v77, v67, v140 ; 3E9B1943 v_mac_f32_e32 v75, v22, v135 ; 3E970F16 v_mac_f32_e32 v76, v22, v136 ; 3E991116 v_mac_f32_e32 v77, v22, v137 ; 3E9B1316 v_mac_f32_e32 v75, v68, v132 ; 3E970944 v_mac_f32_e32 v76, v68, v133 ; 3E990B44 v_mac_f32_e32 v77, v68, v134 ; 3E9B0D44 v_cmp_eq_f32_e64 s[2:3], 0, v24 ; D0040002 00023080 v_cndmask_b32_e64 v78, 0, 1.0, s[2:3] ; D200004E 0009E480 v_mul_f32_e32 v79, v78, v159 ; 109F3F4E v_mul_f32_e32 v80, v78, v160 ; 10A1414E v_mul_f32_e32 v81, v78, v161 ; 10A3434E v_cmp_eq_f32_e64 s[2:3], 1.0, v24 ; D0040002 000230F2 v_cndmask_b32_e64 v82, 0, 1.0, s[2:3] ; D2000052 0009E480 v_mac_f32_e32 v79, v82, v156 ; 3E9F3952 v_mac_f32_e32 v80, v82, v157 ; 3EA13B52 v_mac_f32_e32 v81, v82, v158 ; 3EA33D52 v_readlane_b32 s0, v254, 10 ; 020115FE v_readlane_b32 s1, v254, 11 ; 020317FE s_nop 2 ; BF800002 v_cndmask_b32_e64 v83, 0, 1.0, s[0:1] ; D2000053 0001E480 v_mac_f32_e32 v79, v83, v153 ; 3E9F3353 v_mac_f32_e32 v80, v83, v154 ; 3EA13553 v_mac_f32_e32 v81, v83, v155 ; 3EA33753 v_cmp_eq_f32_e64 s[0:1], v24, v30 ; D0040000 00023D18 v_cndmask_b32_e64 v24, 0, 1.0, s[0:1] ; D2000018 0001E480 v_mac_f32_e32 v79, v24, v150 ; 3E9F2D18 v_mac_f32_e32 v80, v24, v151 ; 3EA12F18 v_mac_f32_e32 v81, v24, v152 ; 3EA33118 v_readlane_b32 s0, v254, 8 ; 020111FE v_readlane_b32 s1, v254, 9 ; 020313FE s_nop 2 ; BF800002 v_cndmask_b32_e64 v84, 0, 1.0, s[0:1] ; D2000054 0001E480 v_mac_f32_e32 v79, v84, v147 ; 3E9F2754 v_mac_f32_e32 v80, v84, v148 ; 3EA12954 v_mac_f32_e32 v81, v84, v149 ; 3EA32B54 v_mul_f32_e32 v85, v78, v174 ; 10AB5D4E v_mul_f32_e32 v86, v78, v175 ; 10AD5F4E v_mul_f32_e32 v87, v78, v176 ; 10AF614E v_mac_f32_e32 v85, v82, v171 ; 3EAB5752 v_mac_f32_e32 v86, v82, v172 ; 3EAD5952 v_mac_f32_e32 v87, v82, v173 ; 3EAF5B52 v_mac_f32_e32 v85, v83, v168 ; 3EAB5153 v_mac_f32_e32 v86, v83, v169 ; 3EAD5353 v_mac_f32_e32 v87, v83, v170 ; 3EAF5553 v_mac_f32_e32 v85, v24, v165 ; 3EAB4B18 v_mac_f32_e32 v86, v24, v166 ; 3EAD4D18 v_mac_f32_e32 v87, v24, v167 ; 3EAF4F18 v_mac_f32_e32 v85, v84, v162 ; 3EAB4554 v_mac_f32_e32 v86, v84, v163 ; 3EAD4754 v_mac_f32_e32 v87, v84, v164 ; 3EAF4954 v_mul_f32_e32 v88, v78, v189 ; 10B17B4E v_mul_f32_e32 v89, v78, v190 ; 10B37D4E v_mul_f32_e32 v90, v78, v191 ; 10B57F4E v_mac_f32_e32 v88, v82, v186 ; 3EB17552 v_mac_f32_e32 v89, v82, v187 ; 3EB37752 v_mac_f32_e32 v90, v82, v188 ; 3EB57952 v_mac_f32_e32 v88, v83, v183 ; 3EB16F53 v_mac_f32_e32 v89, v83, v184 ; 3EB37153 v_mac_f32_e32 v90, v83, v185 ; 3EB57353 v_mac_f32_e32 v88, v24, v180 ; 3EB16918 v_mac_f32_e32 v89, v24, v181 ; 3EB36B18 v_mac_f32_e32 v90, v24, v182 ; 3EB56D18 v_mac_f32_e32 v88, v84, v177 ; 3EB16354 v_mac_f32_e32 v89, v84, v178 ; 3EB36554 v_mac_f32_e32 v90, v84, v179 ; 3EB56754 v_mul_f32_e32 v42, v253, v42 ; 105455FD v_mul_f32_e32 v43, v253, v43 ; 105657FD v_mac_f32_e32 v42, v252, v196 ; 3E5589FC v_mac_f32_e32 v43, v252, v197 ; 3E578BFC v_mul_f32_e32 v31, v253, v31 ; 103E3FFD v_mul_f32_e32 v32, v253, v32 ; 104041FD v_mac_f32_e32 v31, v252, v202 ; 3E3F95FC v_mac_f32_e32 v32, v252, v203 ; 3E4197FC v_mul_f32_e32 v33, v253, v33 ; 104243FD v_mul_f32_e32 v34, v253, v34 ; 104445FD v_mac_f32_e32 v33, v252, v210 ; 3E43A5FC v_mac_f32_e32 v34, v252, v211 ; 3E45A7FC v_mac_f32_e32 v42, v52, v194 ; 3E558534 v_mac_f32_e32 v43, v52, v195 ; 3E578734 v_mac_f32_e32 v31, v52, v200 ; 3E3F9134 v_mac_f32_e32 v32, v52, v201 ; 3E419334 v_mac_f32_e32 v33, v52, v208 ; 3E43A134 v_mac_f32_e32 v34, v52, v209 ; 3E45A334 v_mac_f32_e32 v42, v21, v192 ; 3E558115 v_mac_f32_e32 v43, v21, v193 ; 3E578315 v_mac_f32_e32 v31, v21, v198 ; 3E3F8D15 v_mac_f32_e32 v32, v21, v199 ; 3E418F15 v_mac_f32_e32 v33, v21, v206 ; 3E439D15 v_mac_f32_e32 v34, v21, v207 ; 3E459F15 v_mac_f32_e32 v42, v27, v37 ; 3E544B1B v_mac_f32_e32 v43, v27, v38 ; 3E564D1B v_mac_f32_e32 v31, v27, v44 ; 3E3E591B v_mac_f32_e32 v32, v27, v45 ; 3E405B1B v_mac_f32_e32 v33, v27, v204 ; 3E43991B v_mac_f32_e32 v34, v27, v205 ; 3E459B1B v_mul_f32_e32 v21, v1, v53 ; 102A6B01 v_mul_f32_e32 v27, v1, v54 ; 10366D01 v_mac_f32_e32 v21, v66, v216 ; 3E2BB142 v_mac_f32_e32 v27, v66, v217 ; 3E37B342 v_mul_f32_e32 v28, v1, v28 ; 10383901 v_mul_f32_e32 v29, v1, v29 ; 103A3B01 v_mac_f32_e32 v28, v66, v222 ; 3E39BD42 v_mac_f32_e32 v29, v66, v223 ; 3E3BBF42 s_waitcnt vmcnt(13) ; BF8C077D v_mul_f32_e32 v37, v1, v50 ; 104A6501 v_mul_f32_e32 v1, v1, v51 ; 10026701 v_mac_f32_e32 v37, v66, v230 ; 3E4BCD42 v_mac_f32_e32 v1, v66, v231 ; 3E03CF42 v_mac_f32_e32 v21, v67, v214 ; 3E2BAD43 v_mac_f32_e32 v27, v67, v215 ; 3E37AF43 v_mac_f32_e32 v28, v67, v220 ; 3E39B943 v_mac_f32_e32 v29, v67, v221 ; 3E3BBB43 v_mac_f32_e32 v37, v67, v228 ; 3E4BC943 v_mac_f32_e32 v1, v67, v229 ; 3E03CB43 v_mac_f32_e32 v21, v22, v212 ; 3E2BA916 v_mac_f32_e32 v27, v22, v213 ; 3E37AB16 v_mac_f32_e32 v28, v22, v218 ; 3E39B516 v_mac_f32_e32 v29, v22, v219 ; 3E3BB716 v_mac_f32_e32 v37, v22, v226 ; 3E4BC516 v_mac_f32_e32 v1, v22, v227 ; 3E03C716 v_mac_f32_e32 v21, v68, v35 ; 3E2A4744 v_mac_f32_e32 v27, v68, v36 ; 3E364944 v_mac_f32_e32 v28, v68, v55 ; 3E386F44 v_mac_f32_e32 v29, v68, v56 ; 3E3A7144 v_mac_f32_e32 v37, v68, v224 ; 3E4BC144 v_mac_f32_e32 v1, v68, v225 ; 3E03C344 image_sample_l v[35:36], 10, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[24:31], s[12:15] ; F0900A00 00662327 image_sample_l v[38:39], 10, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[16:23], s[8:11] ; F0900A00 00442627 s_waitcnt vmcnt(10) ; BF8C077A v_mul_f32_e32 v22, v78, v46 ; 102C5D4E v_mul_f32_e32 v40, v78, v47 ; 10505F4E v_mac_f32_e32 v22, v82, v238 ; 3E2DDD52 v_mac_f32_e32 v40, v82, v239 ; 3E51DF52 s_waitcnt vmcnt(5) ; BF8C0775 v_mul_f32_e32 v25, v78, v25 ; 1032334E v_mul_f32_e32 v26, v78, v26 ; 1034354E v_mac_f32_e32 v25, v82, v244 ; 3E33E952 v_mac_f32_e32 v26, v82, v245 ; 3E35EB52 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v38, v78, v38 ; 104C4D4E v_mul_f32_e32 v39, v78, v39 ; 104E4F4E v_mac_f32_e32 v38, v82, v35 ; 3E4C4752 v_mac_f32_e32 v39, v82, v36 ; 3E4E4952 v_mac_f32_e32 v22, v83, v236 ; 3E2DD953 v_mac_f32_e32 v40, v83, v237 ; 3E51DB53 v_mac_f32_e32 v25, v83, v242 ; 3E33E553 v_mac_f32_e32 v26, v83, v243 ; 3E35E753 v_mac_f32_e32 v38, v83, v250 ; 3E4DF553 v_mac_f32_e32 v39, v83, v251 ; 3E4FF753 v_mac_f32_e32 v22, v24, v234 ; 3E2DD518 v_mac_f32_e32 v40, v24, v235 ; 3E51D718 v_mac_f32_e32 v25, v24, v240 ; 3E33E118 v_mac_f32_e32 v26, v24, v241 ; 3E35E318 v_mac_f32_e32 v38, v24, v248 ; 3E4DF118 v_mac_f32_e32 v39, v24, v249 ; 3E4FF318 s_mov_b32 s0, SCRATCH_RSRC_DWORD0 ; BE8003FF 00000000 s_mov_b32 s1, SCRATCH_RSRC_DWORD1 ; BE8103FF 00000000 s_mov_b32 s2, -1 ; BE8203C1 s_mov_b32 s3, 0x80f000 ; BE8303FF 0080F000 buffer_load_dword v24, s[0:3], s12 ; E0300000 0C001800 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v24, v14, v24, v23 ; D2820018 045E310E v_mad_f32 v12, v14, v12, v23 ; D282000C 045E190E v_mac_f32_e32 v23, v14, v13 ; 3E2E1B0E v_mac_f32_e32 v22, v84, v232 ; 3E2DD154 v_mac_f32_e32 v40, v84, v233 ; 3E51D354 v_mov_b32_e32 v13, 0x40e00000 ; 7E1A02FF 40E00000 v_mul_f32_e32 v14, v13, v24 ; 101C310D v_mul_f32_e32 v12, v13, v12 ; 1018190D v_mul_f32_e32 v13, v13, v23 ; 101A2F0D v_mov_b32_e32 v23, 0x3c23d70a ; 7E2E02FF 3C23D70A v_max_f32_e32 v14, v23, v14 ; 201C1D17 v_max_f32_e32 v12, v23, v12 ; 20181917 v_max_f32_e32 v13, v23, v13 ; 201A1B17 v_add_f32_e32 v23, v12, v14 ; 062E1D0C v_add_f32_e32 v23, v13, v23 ; 062E2F0D v_rcp_f32_e32 v23, v23 ; 7E2E5517 v_mac_f32_e32 v25, v84, v48 ; 3E326154 v_mac_f32_e32 v26, v84, v49 ; 3E346354 v_mac_f32_e32 v38, v84, v246 ; 3E4DED54 v_mac_f32_e32 v39, v84, v247 ; 3E4FEF54 v_mul_f32_e32 v14, v23, v14 ; 101C1D17 v_mul_f32_e32 v12, v23, v12 ; 10181917 v_mul_f32_e32 v13, v23, v13 ; 101A1B17 v_mul_f32_e32 v23, v13, v63 ; 102E7F0D v_mac_f32_e32 v23, v14, v72 ; 3E2E910E v_mul_f32_e32 v24, v13, v64 ; 1030810D v_mac_f32_e32 v24, v14, v73 ; 3E30930E v_mul_f32_e32 v35, v13, v65 ; 1046830D v_mac_f32_e32 v35, v14, v74 ; 3E46950E v_mac_f32_e32 v23, v12, v75 ; 3E2E970C v_mac_f32_e32 v24, v12, v76 ; 3E30990C v_mac_f32_e32 v35, v12, v77 ; 3E469B0C v_mul_f32_e32 v36, v13, v69 ; 10488B0D v_mac_f32_e32 v36, v14, v57 ; 3E48730E v_mul_f32_e32 v41, v13, v70 ; 10528D0D v_mac_f32_e32 v41, v14, v58 ; 3E52750E v_mul_f32_e32 v44, v13, v71 ; 10588F0D v_mac_f32_e32 v44, v14, v59 ; 3E58770E v_mac_f32_e32 v36, v12, v60 ; 3E48790C v_mac_f32_e32 v41, v12, v61 ; 3E527B0C v_mac_f32_e32 v44, v12, v62 ; 3E587D0C v_mul_f32_e32 v36, v36, v10 ; 10481524 v_mac_f32_e32 v36, v23, v11 ; 3E481717 v_mul_f32_e32 v23, v41, v10 ; 102E1529 v_mac_f32_e32 v23, v24, v11 ; 3E2E1718 v_mul_f32_e32 v24, v44, v10 ; 1030152C v_mac_f32_e32 v24, v35, v11 ; 3E301723 v_mad_f32 v35, 2.0, v42, -1.0 ; D2820023 03CE54F4 v_mad_f32 v32, 2.0, v32, -1.0 ; D2820020 03CE40F4 v_mul_f32_e32 v35, v14, v35 ; 1046470E v_mac_f32_e32 v35, v12, v32 ; 3E46410C v_mad_f32 v31, 2.0, v31, -1.0 ; D282001F 03CE3EF4 v_mad_f32 v32, 2.0, v34, -1.0 ; D2820020 03CE44F4 v_mul_f32_e32 v34, 0, v14 ; 10441C80 v_mad_f32 v31, v12, v31, v34 ; D282001F 048A3F0C v_mac_f32_e32 v31, v13, v32 ; 3E3E410D v_mad_f32 v32, 2.0, v43, -1.0 ; D2820020 03CE56F4 v_mad_f32 v33, 2.0, v33, -1.0 ; D2820021 03CE42F4 v_mul_f32_e32 v32, v14, v32 ; 1040410E v_mac_f32_e32 v32, 0, v12 ; 3E401880 v_mac_f32_e32 v32, v13, v33 ; 3E40430D v_mad_f32 v21, 2.0, v21, -1.0 ; D2820015 03CE2AF4 v_mad_f32 v29, 2.0, v29, -1.0 ; D282001D 03CE3AF4 v_mul_f32_e32 v21, v14, v21 ; 102A2B0E v_mac_f32_e32 v21, v12, v29 ; 3E2A3B0C v_mad_f32 v28, 2.0, v28, -1.0 ; D282001C 03CE38F4 v_mad_f32 v1, 2.0, v1, -1.0 ; D2820001 03CE02F4 v_mad_f32 v28, v12, v28, v34 ; D282001C 048A390C v_mac_f32_e32 v28, v13, v1 ; 3E38030D v_mad_f32 v1, 2.0, v27, -1.0 ; D2820001 03CE36F4 v_mad_f32 v27, 2.0, v37, -1.0 ; D282001B 03CE4AF4 v_mul_f32_e32 v1, v14, v1 ; 1002030E v_mac_f32_e32 v1, 0, v12 ; 3E021880 v_mac_f32_e32 v1, v13, v27 ; 3E02370D v_mac_f32_e32 v35, 0, v13 ; 3E461A80 v_mul_f32_e32 v27, v31, v10 ; 1036151F v_mul_f32_e32 v29, v32, v10 ; 103A1520 v_mul_f32_e32 v10, v35, v10 ; 10141523 v_mac_f32_e32 v27, v28, v11 ; 3E36171C v_mac_f32_e32 v29, v1, v11 ; 3E3A1701 v_mac_f32_e32 v21, 0, v13 ; 3E2A1A80 v_mac_f32_e32 v10, v21, v11 ; 3E141715 v_mul_f32_e32 v1, v13, v79 ; 10029F0D v_mac_f32_e32 v1, v14, v85 ; 3E02AB0E v_mul_f32_e32 v11, v13, v80 ; 1016A10D v_mac_f32_e32 v11, v14, v86 ; 3E16AD0E v_mul_f32_e32 v21, v13, v81 ; 102AA30D v_mac_f32_e32 v21, v14, v87 ; 3E2AAF0E v_mac_f32_e32 v1, v12, v88 ; 3E02B10C v_mac_f32_e32 v11, v12, v89 ; 3E16B30C v_mac_f32_e32 v21, v12, v90 ; 3E2AB50C v_mad_f32 v28, 2.0, v40, -1.0 ; D282001C 03CE50F4 v_mad_f32 v22, 2.0, v22, -1.0 ; D2820016 03CE2CF4 v_mul_f32_e32 v28, v14, v28 ; 1038390E v_mul_f32_e32 v14, v14, v22 ; 101C2D0E v_mad_f32 v22, 2.0, v25, -1.0 ; D2820016 03CE32F4 v_mac_f32_e32 v34, v12, v22 ; 3E442D0C v_mad_f32 v22, 2.0, v26, -1.0 ; D2820016 03CE34F4 v_mac_f32_e32 v14, v12, v22 ; 3E1C2D0C v_mac_f32_e32 v28, 0, v12 ; 3E381880 v_mad_f32 v12, 2.0, v39, -1.0 ; D282000C 03CE4EF4 v_mac_f32_e32 v34, v13, v12 ; 3E44190D v_mad_f32 v12, 2.0, v38, -1.0 ; D282000C 03CE4CF4 v_mac_f32_e32 v28, v13, v12 ; 3E38190D v_mac_f32_e32 v14, 0, v13 ; 3E1C1A80 v_mac_f32_e32 v27, v34, v6 ; 3E360D22 v_mac_f32_e32 v29, v28, v6 ; 3E3A0D1C v_mac_f32_e32 v10, v14, v6 ; 3E140D0E v_mul_f32_e32 v12, v29, v29 ; 10183B1D v_mac_f32_e32 v12, v27, v27 ; 3E18371B v_mac_f32_e32 v12, v10, v10 ; 3E18150A v_add_f32_e32 v12, 1.0, v12 ; 061818F2 v_rsq_clamp_f32_e32 v12, v12 ; 7E18590C v_mac_f32_e32 v36, v1, v6 ; 3E480D01 v_mac_f32_e32 v23, v11, v6 ; 3E2E0D0B v_mac_f32_e32 v24, v21, v6 ; 3E300D15 v_mul_f32_e32 v1, v12, v27 ; 1002370C v_mul_f32_e32 v6, v12, v29 ; 100C3B0C v_mul_f32_e32 v10, v12, v10 ; 1014150C v_mad_f32 v1, -v1, v5, v7 ; D2820001 241E0B01 v_mad_f32 v6, -v6, v5, v8 ; D2820006 24220B06 v_mad_f32 v5, -v10, v5, v9 ; D2820005 24260B0A v_mul_f32_e32 v7, v1, v1 ; 100E0301 v_mac_f32_e32 v7, v6, v6 ; 3E0E0D06 v_mac_f32_e32 v7, v5, v5 ; 3E0E0B05 v_rsq_clamp_f32_e32 v7, v7 ; 7E0E5907 v_mul_f32_e32 v8, v20, v20 ; 10102914 v_mac_f32_e32 v8, v18, v18 ; 3E102512 v_mac_f32_e32 v8, v19, v19 ; 3E102713 v_rsq_clamp_f32_e32 v8, v8 ; 7E105908 v_mul_f32_e32 v1, v7, v1 ; 10020307 v_mul_f32_e32 v6, v7, v6 ; 100C0D07 v_mul_f32_e32 v5, v7, v5 ; 100A0B07 v_mul_f32_e32 v7, v8, v20 ; 100E2908 v_mul_f32_e32 v9, v8, v18 ; 10122508 v_mul_f32_e32 v8, v8, v19 ; 10102708 v_mul_f32_e32 v7, v7, v1 ; 100E0307 v_mac_f32_e32 v7, v9, v6 ; 3E0E0D09 v_mac_f32_e32 v7, v8, v5 ; 3E0E0B08 v_mul_f32_e32 v1, v17, v1 ; 10020311 v_max_f32_e32 v7, 0x38d1b717, v7 ; 200E0EFF 38D1B717 v_log_f32_e32 v7, v7 ; 7E0E4F07 v_mac_f32_e32 v1, v16, v6 ; 3E020D10 v_mac_f32_e32 v1, v15, v5 ; 3E020B0F v_mul_f32_e32 v5, 0x42000000, v2 ; 100A04FF 42000000 v_mul_legacy_f32_e32 v5, v5, v7 ; 0E0A0F05 v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_add_f32_e64 v5, 0, v5 clamp ; D2060805 00020A80 v_mac_f32_e32 v30, -2.0, v5 ; 3E3C0AF5 v_mul_f32_e32 v6, v30, v5 ; 100C0B1E v_mul_f32_e32 v5, v6, v5 ; 100A0B06 v_mul_f32_e32 v2, v2, v5 ; 10040B02 v_readlane_b32 s2, v254, 38 ; 02054DFE s_nop 2 ; BF800002 v_mul_f32_e32 v5, s2, v36 ; 100A4802 v_readlane_b32 s0, v254, 7 ; 02010FFE s_nop 2 ; BF800002 v_add_f32_e64 v6, s0, s0 ; D2060006 00000000 v_readlane_b32 s0, v254, 4 ; 020109FE s_nop 2 ; BF800002 v_max_f32_e32 v6, s0, v6 ; 200C0C00 v_readlane_b32 s3, v254, 39 ; 02074FFE s_nop 2 ; BF800002 v_mul_f32_e32 v7, s3, v23 ; 100E2E03 v_readlane_b32 s0, v254, 6 ; 02010DFE s_nop 2 ; BF800002 v_add_f32_e64 v8, s0, s0 ; D2060008 00000000 v_readlane_b32 s0, v254, 3 ; 020107FE s_nop 2 ; BF800002 v_max_f32_e32 v8, s0, v8 ; 20101000 v_readlane_b32 s0, v254, 1 ; 020103FE s_nop 2 ; BF800002 v_mul_f32_e32 v9, s0, v24 ; 10123000 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_readlane_b32 s1, v254, 5 ; 02030BFE s_nop 2 ; BF800002 v_add_f32_e64 v10, s1, s1 ; D206000A 00000201 v_readlane_b32 s1, v254, 2 ; 020305FE s_nop 2 ; BF800002 v_max_f32_e32 v10, s1, v10 ; 20141401 v_min_f32_e32 v6, 1.0, v6 ; 1E0C0CF2 v_min_f32_e32 v8, 1.0, v8 ; 1E1010F2 v_min_f32_e32 v10, 1.0, v10 ; 1E1414F2 v_mul_f32_e32 v6, v36, v6 ; 100C0D24 v_mul_f32_e32 v8, v23, v8 ; 10101117 v_mul_f32_e32 v10, v24, v10 ; 10141518 v_mac_f32_e32 v6, v1, v5 ; 3E0C0B01 v_mac_f32_e32 v8, v1, v7 ; 3E100F01 v_mac_f32_e32 v10, v1, v9 ; 3E141301 v_mac_f32_e32 v6, s2, v2 ; 3E0C0402 v_mac_f32_e32 v8, s3, v2 ; 3E100403 v_mac_f32_e32 v10, s0, v2 ; 3E140400 v_mul_f32_e32 v1, 0.5, v6 ; 10020CF0 v_mul_f32_e32 v2, 0.5, v8 ; 100410F0 v_mul_f32_e32 v5, 0.5, v10 ; 100A14F0 v_mac_f32_e32 v1, v3, v36 ; 3E024903 v_mac_f32_e32 v2, v4, v23 ; 3E042F04 v_mac_f32_e32 v5, v0, v24 ; 3E0A3100 v_cvt_pkrtz_f16_f32_e32 v0, v1, v2 ; 5E000501 v_cvt_pkrtz_f16_f32_e64 v1, v5, 1.0 ; D25E0001 0001E505 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 104 VGPRS: 256 Code Size: 4864 bytes LDS: 0 blocks Scratch: 16384 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..7] DCL TEMP[0..3], LOCAL IMM[0] INT32 {0, 1, 2, 3} IMM[1] FLT32 { 0.1000, 0.0000, 0.0000, 0.0000} 0: F2I TEMP[0].x, IN[0].zzzz 1: USEQ TEMP[1], TEMP[0].xxxx, IMM[0] 2: UCMP TEMP[2].x, TEMP[1].xxxx, CONST[0].xxxx, TEMP[2].xxxx 3: UCMP TEMP[2].x, TEMP[1].yyyy, CONST[0].yyyy, TEMP[2].xxxx 4: UCMP TEMP[2].x, TEMP[1].zzzz, CONST[0].zzzz, TEMP[2].xxxx 5: UCMP TEMP[2].x, TEMP[1].wwww, CONST[0].wwww, TEMP[2].xxxx 6: MOV TEMP[1].x, TEMP[2].xxxx 7: USEQ TEMP[2], TEMP[0].xxxx, IMM[0] 8: UCMP TEMP[3].x, TEMP[2].xxxx, CONST[1].xxxx, TEMP[3].xxxx 9: UCMP TEMP[3].x, TEMP[2].yyyy, CONST[1].yyyy, TEMP[3].xxxx 10: UCMP TEMP[3].x, TEMP[2].zzzz, CONST[1].zzzz, TEMP[3].xxxx 11: UCMP TEMP[3].x, TEMP[2].wwww, CONST[1].wwww, TEMP[3].xxxx 12: MOV TEMP[1].y, TEMP[3].xxxx 13: USEQ TEMP[0], TEMP[0].xxxx, IMM[0] 14: UCMP TEMP[2].x, TEMP[0].xxxx, CONST[2].xxxx, TEMP[2].xxxx 15: UCMP TEMP[2].x, TEMP[0].yyyy, CONST[2].yyyy, TEMP[2].xxxx 16: UCMP TEMP[2].x, TEMP[0].zzzz, CONST[2].zzzz, TEMP[2].xxxx 17: UCMP TEMP[2].x, TEMP[0].wwww, CONST[2].wwww, TEMP[2].xxxx 18: MOV TEMP[1].z, TEMP[2].xxxx 19: MOV TEMP[0].xyz, TEMP[1].xyzx 20: MOV TEMP[0].w, IN[0].zzzz 21: MUL TEMP[1], CONST[4], IN[0].xxxx 22: MAD TEMP[1], CONST[5], IN[0].yyyy, TEMP[1] 23: MAD TEMP[1], CONST[6], IMM[1].xxxx, TEMP[1] 24: MAD TEMP[1], CONST[7], IN[0].wwww, TEMP[1] 25: MOV OUT[2], IN[1].xyxy 26: MOV OUT[1], TEMP[0] 27: MOV OUT[0], TEMP[1] 28: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %41 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0 %43 = add i32 %5, %7 %44 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %43) %45 = extractelement <4 x float> %44, i32 0 %46 = extractelement <4 x float> %44, i32 1 %47 = extractelement <4 x float> %44, i32 2 %48 = extractelement <4 x float> %44, i32 3 %49 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %50 = load <16 x i8>, <16 x i8> addrspace(2)* %49, align 16, !tbaa !0 %51 = add i32 %5, %7 %52 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %50, i32 0, i32 %51) %53 = extractelement <4 x float> %52, i32 0 %54 = extractelement <4 x float> %52, i32 1 %55 = fptosi float %47 to i32 %56 = icmp eq i32 %55, 0 %57 = icmp eq i32 %55, 1 %58 = icmp eq i32 %55, 2 %59 = icmp eq i32 %55, 3 %60 = select i1 %56, float %13, float 0.000000e+00 %61 = select i1 %57, float %14, float %60 %62 = select i1 %58, float %15, float %61 %63 = select i1 %59, float %16, float %62 %64 = icmp eq i32 %55, 0 %65 = sext i1 %64 to i32 %66 = icmp eq i32 %55, 1 %67 = icmp eq i32 %55, 2 %68 = icmp eq i32 %55, 3 %69 = bitcast i32 %65 to float %70 = select i1 %64, float %17, float 0.000000e+00 %71 = select i1 %66, float %18, float %70 %72 = select i1 %67, float %19, float %71 %73 = select i1 %68, float %20, float %72 %74 = icmp eq i32 %55, 0 %75 = icmp eq i32 %55, 1 %76 = icmp eq i32 %55, 2 %77 = icmp eq i32 %55, 3 %78 = select i1 %74, float %21, float %69 %79 = select i1 %75, float %22, float %78 %80 = select i1 %76, float %23, float %79 %81 = select i1 %77, float %24, float %80 %82 = fmul float %25, %45 %83 = fmul float %26, %45 %84 = fmul float %27, %45 %85 = fmul float %28, %45 %86 = fmul float %29, %46 %87 = fadd float %86, %82 %88 = fmul float %30, %46 %89 = fadd float %88, %83 %90 = fmul float %31, %46 %91 = fadd float %90, %84 %92 = fmul float %32, %46 %93 = fadd float %92, %85 %94 = fmul float %33, 0x3FB99999A0000000 %95 = fadd float %94, %87 %96 = fmul float %34, 0x3FB99999A0000000 %97 = fadd float %96, %89 %98 = fmul float %35, 0x3FB99999A0000000 %99 = fadd float %98, %91 %100 = fmul float %36, 0x3FB99999A0000000 %101 = fadd float %100, %93 %102 = fmul float %37, %48 %103 = fadd float %102, %95 %104 = fmul float %38, %48 %105 = fadd float %104, %97 %106 = fmul float %39, %48 %107 = fadd float %106, %99 %108 = fmul float %40, %48 %109 = fadd float %108, %101 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %63, float %73, float %81, float %47) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %53, float %54, float %53, float %54) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %103, float %105, float %107, float %109) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100 s_buffer_load_dword s9, s[0:3], 0x1 ; C2048101 s_buffer_load_dword s10, s[0:3], 0x2 ; C2050102 s_buffer_load_dword s11, s[0:3], 0x3 ; C2058103 s_buffer_load_dword s16, s[0:3], 0x4 ; C2080104 buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_buffer_load_dword s5, s[0:3], 0x6 ; C2028106 s_buffer_load_dword s6, s[0:3], 0x7 ; C2030107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s8 ; 7E000208 s_buffer_load_dword s7, s[0:3], 0x8 ; C2038108 s_waitcnt vmcnt(0) ; BF8C0770 v_mov_b32_e32 v7, s9 ; 7E0E0209 s_buffer_load_dword s8, s[0:3], 0x9 ; C2040109 v_mov_b32_e32 v8, s10 ; 7E10020A s_buffer_load_dword s9, s[0:3], 0xa ; C204810A v_mov_b32_e32 v9, s11 ; 7E12020B s_buffer_load_dword s10, s[0:3], 0xb ; C205010B v_mov_b32_e32 v10, s16 ; 7E140210 s_buffer_load_dword s11, s[0:3], 0x10 ; C2058110 s_buffer_load_dword s12, s[0:3], 0x11 ; C2060111 s_buffer_load_dword s13, s[0:3], 0x12 ; C2068112 v_mov_b32_e32 v11, s4 ; 7E160204 s_buffer_load_dword s4, s[0:3], 0x13 ; C2020113 s_buffer_load_dword s14, s[0:3], 0x14 ; C2070114 s_buffer_load_dword s15, s[0:3], 0x15 ; C2078115 s_buffer_load_dword s16, s[0:3], 0x16 ; C2080116 s_buffer_load_dword s17, s[0:3], 0x17 ; C2088117 s_buffer_load_dword s18, s[0:3], 0x18 ; C2090118 s_buffer_load_dword s19, s[0:3], 0x19 ; C2098119 s_buffer_load_dword s20, s[0:3], 0x1a ; C20A011A s_buffer_load_dword s21, s[0:3], 0x1b ; C20A811B s_buffer_load_dword s22, s[0:3], 0x1c ; C20B011C v_cvt_i32_f32_e32 v12, v3 ; 7E181103 s_buffer_load_dword s23, s[0:3], 0x1d ; C20B811D s_buffer_load_dword s24, s[0:3], 0x1e ; C20C011E s_buffer_load_dword s0, s[0:3], 0x1f ; C200011F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v13, s7 ; 7E1A0207 v_cmp_eq_i32_e32 vcc, 0, v12 ; 7D041880 v_cndmask_b32_e64 v14, 0, -1, vcc ; D200000E 01A98280 v_cndmask_b32_e32 v13, v14, v13 ; 001A1B0E v_cndmask_b32_e32 v0, 0, v0 ; 00000080 v_cndmask_b32_e32 v10, 0, v10 ; 00141480 v_cmp_eq_i32_e32 vcc, 1, v12 ; 7D041881 v_cndmask_b32_e32 v0, v0, v7 ; 00000F00 v_cndmask_b32_e32 v7, v10, v11 ; 000E170A v_mov_b32_e32 v10, s8 ; 7E140208 v_cndmask_b32_e32 v10, v13, v10 ; 0014150D v_cmp_eq_i32_e32 vcc, 2, v12 ; 7D041882 v_cndmask_b32_e32 v0, v0, v8 ; 00001100 v_mov_b32_e32 v8, s5 ; 7E100205 v_cndmask_b32_e32 v7, v7, v8 ; 000E1107 v_mov_b32_e32 v8, s9 ; 7E100209 v_cndmask_b32_e32 v8, v10, v8 ; 0010110A v_cmp_eq_i32_e32 vcc, 3, v12 ; 7D041883 v_cndmask_b32_e32 v0, v0, v9 ; 00001300 v_mov_b32_e32 v9, s6 ; 7E120206 v_cndmask_b32_e32 v7, v7, v9 ; 000E1307 v_mov_b32_e32 v9, s10 ; 7E12020A v_cndmask_b32_e32 v8, v8, v9 ; 00101308 exp 15, 32, 0, 0, 0, v0, v7, v8, v3 ; F800020F 03080700 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s11, v1 ; 1000020B v_mac_f32_e32 v0, s14, v2 ; 3E00040E v_mul_f32_e32 v3, s12, v1 ; 1006020C v_mac_f32_e32 v3, s15, v2 ; 3E06040F v_mul_f32_e32 v7, s13, v1 ; 100E020D v_mac_f32_e32 v7, s16, v2 ; 3E0E0410 v_mul_f32_e32 v1, s4, v1 ; 10020204 v_mac_f32_e32 v1, s17, v2 ; 3E020411 v_mov_b32_e32 v2, 0x3dcccccd ; 7E0402FF 3DCCCCCD v_mac_f32_e32 v0, s18, v2 ; 3E000412 v_mac_f32_e32 v3, s19, v2 ; 3E060413 v_mac_f32_e32 v7, s20, v2 ; 3E0E0414 v_mac_f32_e32 v1, s21, v2 ; 3E020415 v_mac_f32_e32 v0, s22, v4 ; 3E000816 v_mac_f32_e32 v3, s23, v4 ; 3E060817 v_mac_f32_e32 v7, s24, v4 ; 3E0E0818 v_mac_f32_e32 v1, s0, v4 ; 3E020800 exp 15, 33, 0, 0, 0, v5, v6, v5, v6 ; F800021F 06050605 exp 15, 12, 0, 1, 0, v0, v3, v7, v1 ; F80008CF 01070300 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 16 Code Size: 388 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL CONST[0..1] DCL CONST[4..8] DCL TEMP[0..5], LOCAL IMM[0] FLT32 { 1.0000, 2.0000, 0.0000, 0.0000} IMM[1] INT32 {1, 2, 3, 0} IMM[2] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[1].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MOV TEMP[1].xy, IN[1].zwww 3: TEX TEMP[1].x, TEMP[1], SAMP[1], 2D 4: MAD TEMP[2].x, CONST[0].xxxx, TEMP[1].xxxx, CONST[0].yyyy 5: RCP TEMP[2].x, TEMP[2].xxxx 6: MUL TEMP[2].xyz, TEMP[2].xxxx, IN[0] 7: MUL TEMP[3].xyz, CONST[4].wwww, TEMP[2].xyzz 8: ADD TEMP[4].y, CONST[8].xyzz, TEMP[2].xyzz 9: ADD TEMP[4].x, TEMP[4].yyyy, -CONST[4].xxxx 10: MUL TEMP[5].x, IMM[0].yyyy, CONST[4].zzzz 11: ADD TEMP[5].x, IMM[0].xxxx, -TEMP[5].xxxx 12: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[4].xxxx 13: MIN TEMP[5].x, TEMP[5].xxxx, IMM[0].zzzz 14: DP3 TEMP[3].x, TEMP[3].xyzz, TEMP[3].xyzz 15: SQRT TEMP[3].x, TEMP[3].xxxx 16: ADD TEMP[4].x, TEMP[4].xxxx, CONST[4].yyyy 17: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[5].xxxx 18: ADD TEMP[2].x, TEMP[2].yyyy, IMM[0].wwww 19: ABS TEMP[2].x, TEMP[2].xxxx 20: RCP TEMP[2].x, TEMP[2].xxxx 21: MUL TEMP[2].x, TEMP[5].xxxx, TEMP[2].xxxx 22: MAD TEMP[2].x, CONST[4].zzzz, TEMP[4].xxxx, -TEMP[2].xxxx 23: MAD TEMP[2].x, -TEMP[3].xxxx, TEMP[2].xxxx, CONST[5].xxxx 24: MAX TEMP[2].x, IMM[0].zzzz, TEMP[2].xxxx 25: MOV TEMP[3].x, IMM[0].zzzz 26: USEQ TEMP[4].x, CONST[6].xxxx, IMM[1].xxxx 27: UIF TEMP[4].xxxx :0 28: MAD TEMP[3].x, TEMP[2].xxxx, CONST[7].zzzz, CONST[7].wwww 29: ENDIF 30: USEQ TEMP[4].x, CONST[6].xxxx, IMM[1].yyyy 31: UIF TEMP[4].xxxx :0 32: MUL TEMP[4].x, CONST[7].yyyy, TEMP[2].xxxx 33: EX2 TEMP[3].x, -TEMP[4].xxxx 34: ENDIF 35: USEQ TEMP[4].x, CONST[6].xxxx, IMM[1].zzzz 36: UIF TEMP[4].xxxx :0 37: MUL TEMP[2].x, CONST[7].xxxx, TEMP[2].xxxx 38: MUL TEMP[2].x, -TEMP[2].xxxx, TEMP[2].xxxx 39: EX2 TEMP[3].x, TEMP[2].xxxx 40: ENDIF 41: MOV_SAT TEMP[2].x, TEMP[3].xxxx 42: FSGE TEMP[1].x, TEMP[1].xxxx, IMM[2].xxxx 43: UIF TEMP[1].xxxx :0 44: MOV TEMP[2].x, IMM[0].xxxx 45: ENDIF 46: LRP TEMP[0], TEMP[2].xxxx, TEMP[0], CONST[1] 47: MOV OUT[0], TEMP[0] 48: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 124) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %40 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %41 = load <32 x i8>, <32 x i8> addrspace(2)* %40, align 32, !tbaa !0 %42 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %43 = load <16 x i8>, <16 x i8> addrspace(2)* %42, align 16, !tbaa !0 %44 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %45 = bitcast <8 x i32> addrspace(2)* %44 to <32 x i8> addrspace(2)* %46 = load <32 x i8>, <32 x i8> addrspace(2)* %45, align 32, !tbaa !0 %47 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %48 = bitcast <4 x i32> addrspace(2)* %47 to <16 x i8> addrspace(2)* %49 = load <16 x i8>, <16 x i8> addrspace(2)* %48, align 16, !tbaa !0 %50 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %51 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %52 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %53 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %54 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %55 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %56 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %57 = bitcast float %53 to i32 %58 = bitcast float %54 to i32 %59 = insertelement <2 x i32> undef, i32 %57, i32 0 %60 = insertelement <2 x i32> %59, i32 %58, i32 1 %61 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %60, <32 x i8> %41, <16 x i8> %43, i32 2) %62 = extractelement <4 x float> %61, i32 0 %63 = extractelement <4 x float> %61, i32 1 %64 = extractelement <4 x float> %61, i32 2 %65 = extractelement <4 x float> %61, i32 3 %66 = bitcast float %55 to i32 %67 = bitcast float %56 to i32 %68 = insertelement <2 x i32> undef, i32 %66, i32 0 %69 = insertelement <2 x i32> %68, i32 %67, i32 1 %70 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %69, <32 x i8> %46, <16 x i8> %49, i32 2) %71 = extractelement <4 x float> %70, i32 0 %72 = fmul float %24, %71 %73 = fadd float %72, %25 %74 = fdiv float 1.000000e+00, %73 %75 = fmul float %74, %50 %76 = fmul float %74, %51 %77 = fmul float %74, %52 %78 = fmul float %33, %75 %79 = fmul float %33, %76 %80 = fmul float %33, %77 %81 = fadd float %39, %76 %82 = fsub float %81, %30 %83 = fmul float %32, 2.000000e+00 %84 = fsub float 1.000000e+00, %83 %85 = fmul float %84, %82 %86 = call float @llvm.minnum.f32(float %85, float 0.000000e+00) %87 = fmul float %78, %78 %88 = fmul float %79, %79 %89 = fadd float %88, %87 %90 = fmul float %80, %80 %91 = fadd float %89, %90 %92 = call float @llvm.sqrt.f32(float %91) %93 = fadd float %82, %31 %94 = fmul float %86, %86 %95 = fadd float %76, 0x3EE4F8B580000000 %96 = call float @llvm.fabs.f32(float %95) %97 = fdiv float 1.000000e+00, %96 %98 = fmul float %94, %97 %99 = fmul float %32, %93 %100 = fsub float %99, %98 %101 = fmul float %92, %100 %102 = fsub float %34, %101 %103 = call float @llvm.maxnum.f32(float %102, float 0.000000e+00) %104 = bitcast float %35 to i32 %105 = icmp eq i32 %104, 1 %106 = fmul float %103, %37 %107 = fadd float %106, %38 %temp12.0 = select i1 %105, float %107, float 0.000000e+00 %108 = bitcast float %35 to i32 %109 = icmp eq i32 %108, 2 br i1 %109, label %IF25, label %ENDIF24 IF25: ; preds = %main_body %110 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %111 = fmul float %110, %103 %112 = fsub float -0.000000e+00, %111 %113 = call float @llvm.AMDIL.exp.(float %112) br label %ENDIF24 ENDIF24: ; preds = %main_body, %IF25 %temp12.1 = phi float [ %113, %IF25 ], [ %temp12.0, %main_body ] %114 = bitcast float %35 to i32 %115 = icmp eq i32 %114, 3 br i1 %115, label %IF28, label %ENDIF27 IF28: ; preds = %ENDIF24 %116 = fmul float %36, %103 %117 = fmul float %116, %116 %118 = fsub float -0.000000e+00, %117 %119 = call float @llvm.AMDIL.exp.(float %118) br label %ENDIF27 ENDIF27: ; preds = %ENDIF24, %IF28 %temp12.2 = phi float [ %119, %IF28 ], [ %temp12.1, %ENDIF24 ] %120 = call float @llvm.AMDIL.clamp.(float %temp12.2, float 0.000000e+00, float 1.000000e+00) %121 = fcmp oge float %71, 0x3FEFFFFDE0000000 %. = select i1 %121, float 1.000000e+00, float %120 %122 = call float @llvm.AMDGPU.lrp(float %., float %62, float %26) %123 = call float @llvm.AMDGPU.lrp(float %., float %63, float %27) %124 = call float @llvm.AMDGPU.lrp(float %., float %64, float %28) %125 = call float @llvm.AMDGPU.lrp(float %., float %65, float %29) %126 = call i32 @llvm.SI.packf16(float %122, float %123) %127 = bitcast i32 %126 to float %128 = call i32 @llvm.SI.packf16(float %124, float %125) %129 = bitcast i32 %128 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %127, float %129, float %127, float %129) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_interp_p1_f32 v5, v0, 0, 0, [m0] ; C8140000 v_interp_p2_f32 v5, [v5], v1, 0, 0, [m0] ; C8150001 v_interp_p1_f32 v6, v0, 1, 0, [m0] ; C8180100 v_interp_p2_f32 v6, [v6], v1, 1, 0, [m0] ; C8190101 v_interp_p1_f32 v7, v0, 2, 0, [m0] ; C81C0200 v_interp_p2_f32 v7, [v7], v1, 2, 0, [m0] ; C81D0201 v_interp_p1_f32 v2, v0, 0, 1, [m0] ; C8080400 v_interp_p2_f32 v2, [v2], v1, 0, 1, [m0] ; C8090401 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504 s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700 s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708 v_interp_p1_f32 v3, v0, 1, 1, [m0] ; C80C0500 v_interp_p2_f32 v3, [v3], v1, 1, 1, [m0] ; C80D0501 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600 v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601 v_interp_p1_f32 v9, v0, 3, 1, [m0] ; C8240700 v_interp_p2_f32 v9, [v9], v1, 3, 1, [m0] ; C8250701 image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[16:23], s[8:11] ; F0800F00 00440002 image_sample v4, 1, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[24:31], s[12:15] ; F0800100 00660408 s_buffer_load_dword s6, s[0:3], 0x21 ; C2030121 s_buffer_load_dword s7, s[0:3], 0x10 ; C2038110 s_buffer_load_dword s8, s[0:3], 0x12 ; C2040112 s_buffer_load_dword s9, s[0:3], 0x11 ; C2048111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v8, s5 ; 7E100205 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v8, s4, v4 ; 3E100804 v_rcp_f32_e32 v8, v8 ; 7E105508 s_buffer_load_dword s4, s[0:3], 0x13 ; C2020113 v_mul_f32_e32 v5, v5, v8 ; 100A1105 v_mad_f32 v9, v8, v6, s6 ; D2820009 001A0D08 v_subrev_f32_e32 v9, s7, v9 ; 0A121207 v_mad_f32 v10, -2.0, s8, 1.0 ; D282000A 03C810F5 v_mul_f32_e32 v10, v9, v10 ; 10141509 v_add_f32_e32 v9, s9, v9 ; 06121209 v_mul_f32_e32 v11, v6, v8 ; 10161106 v_mul_f32_e32 v7, v7, v8 ; 100E1107 v_madak_f32_e32 v6, v8, v6, 0x3727c5ac ; 420C0D08 3727C5AC v_rcp_f32_e64 v6, |v6| ; D3540106 00000106 s_buffer_load_dword s5, s[0:3], 0x1f ; C202811F v_min_f32_e32 v8, 0, v10 ; 1E101480 v_mul_f32_e32 v8, v8, v8 ; 10101108 v_mul_f32_e32 v6, v6, v8 ; 100C1106 v_mad_f32 v6, s8, v9, -v6 ; D2820006 841A1208 s_buffer_load_dword s6, s[0:3], 0x14 ; C2030114 s_buffer_load_dword s8, s[0:3], 0x18 ; C2040118 s_buffer_load_dword s7, s[0:3], 0x1e ; C203811E s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v5 ; 100A0A04 v_mul_f32_e32 v8, s4, v11 ; 10101604 v_mul_f32_e32 v7, s4, v7 ; 100E0E04 v_mul_f32_e32 v5, v5, v5 ; 100A0B05 v_mac_f32_e32 v5, v8, v8 ; 3E0A1108 v_mac_f32_e32 v5, v7, v7 ; 3E0A0F07 v_sqrt_f32_e32 v5, v5 ; 7E0A6705 v_mad_f32 v5, -v5, v6, s6 ; D2820005 201A0D05 v_max_f32_e32 v6, 0, v5 ; 200C0A80 v_mov_b32_e32 v5, s5 ; 7E0A0205 v_mac_f32_e32 v5, s7, v6 ; 3E0A0C07 v_cmp_eq_i32_e64 vcc, 1, s8 ; D104006A 00001081 v_cndmask_b32_e32 v5, 0, v5 ; 000A0A80 v_cmp_eq_i32_e64 s[4:5], 2, s8 ; D1040004 00001082 s_and_saveexec_b64 s[10:11], s[4:5] ; BE8A2404 s_xor_b64 s[10:11], exec, s[10:11] ; 898A0A7E s_buffer_load_dword s4, s[0:3], 0x1d ; C202011D s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v6 ; 100A0C04 v_exp_f32_e64 v5, -v5 ; D34A0005 20000105 s_or_b64 exec, exec, s[10:11] ; 88FE0A7E s_buffer_load_dword s7, s[0:3], 0x4 ; C2038104 s_buffer_load_dword s6, s[0:3], 0x5 ; C2030105 s_buffer_load_dword s5, s[0:3], 0x6 ; C2028106 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 v_cmp_eq_i32_e64 s[8:9], 3, s8 ; D1040008 00001083 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[8:9], s[8:9] ; BE882408 s_xor_b64 s[8:9], exec, s[8:9] ; 8988087E s_buffer_load_dword s10, s[0:3], 0x1c ; C205011C s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s10, v6 ; 100A0C0A v_mul_f32_e32 v5, v5, v5 ; 100A0B05 v_exp_f32_e64 v5, -v5 ; D34A0005 20000105 s_or_b64 exec, exec, s[8:9] ; 88FE087E v_add_f32_e64 v5, 0, v5 clamp ; D2060805 00020A80 v_mov_b32_e32 v6, 0x3f7fffef ; 7E0C02FF 3F7FFFEF v_cmp_le_f32_e32 vcc, v6, v4 ; 7C060906 v_cndmask_b32_e64 v4, v5, 1.0, vcc ; D2000004 01A9E505 v_sub_f32_e32 v5, 1.0, v4 ; 080A08F2 v_mul_f32_e32 v6, s7, v5 ; 100C0A07 v_mac_f32_e32 v6, v0, v4 ; 3E0C0900 v_mul_f32_e32 v0, s6, v5 ; 10000A06 v_mac_f32_e32 v0, v1, v4 ; 3E000901 v_mul_f32_e32 v1, s5, v5 ; 10020A05 v_mac_f32_e32 v1, v2, v4 ; 3E020902 v_mul_f32_e32 v2, s4, v5 ; 10040A04 v_mac_f32_e32 v2, v3, v4 ; 3E040903 v_cvt_pkrtz_f16_f32_e32 v0, v6, v0 ; 5E000106 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 12 Code Size: 496 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..4] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { -0.5000, 0.5000, 0.0000, 0.0000} 0: MUL TEMP[0], CONST[1], IN[0].xxxx 1: MAD TEMP[0], CONST[2], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[3], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[4], IN[0].wwww, TEMP[0] 4: ADD TEMP[1].xy, IN[1].xyyy, CONST[0].xyyy 5: MAD TEMP[2].xy, CONST[0].xyyy, IMM[0].xxxx, IN[1].xyyy 6: MOV TEMP[1].zw, TEMP[2].yyxy 7: MAD TEMP[2].xy, CONST[0].xyyy, IMM[0].yxxx, IN[1].xyyy 8: MAD TEMP[3].xy, CONST[0].xyyy, IMM[0].xyyy, IN[1].xyyy 9: MOV TEMP[2].zw, TEMP[3].yyxy 10: MOV OUT[1], TEMP[1] 11: MOV OUT[2], TEMP[2] 12: MOV OUT[0], TEMP[0] 13: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %31 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %32 = load <16 x i8>, <16 x i8> addrspace(2)* %31, align 16, !tbaa !0 %33 = add i32 %5, %7 %34 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %32, i32 0, i32 %33) %35 = extractelement <4 x float> %34, i32 0 %36 = extractelement <4 x float> %34, i32 1 %37 = extractelement <4 x float> %34, i32 2 %38 = extractelement <4 x float> %34, i32 3 %39 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %40 = load <16 x i8>, <16 x i8> addrspace(2)* %39, align 16, !tbaa !0 %41 = add i32 %5, %7 %42 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %40, i32 0, i32 %41) %43 = extractelement <4 x float> %42, i32 0 %44 = extractelement <4 x float> %42, i32 1 %45 = fmul float %15, %35 %46 = fmul float %16, %35 %47 = fmul float %17, %35 %48 = fmul float %18, %35 %49 = fmul float %19, %36 %50 = fadd float %49, %45 %51 = fmul float %20, %36 %52 = fadd float %51, %46 %53 = fmul float %21, %36 %54 = fadd float %53, %47 %55 = fmul float %22, %36 %56 = fadd float %55, %48 %57 = fmul float %23, %37 %58 = fadd float %57, %50 %59 = fmul float %24, %37 %60 = fadd float %59, %52 %61 = fmul float %25, %37 %62 = fadd float %61, %54 %63 = fmul float %26, %37 %64 = fadd float %63, %56 %65 = fmul float %27, %38 %66 = fadd float %65, %58 %67 = fmul float %28, %38 %68 = fadd float %67, %60 %69 = fmul float %29, %38 %70 = fadd float %69, %62 %71 = fmul float %30, %38 %72 = fadd float %71, %64 %73 = fadd float %43, %13 %74 = fadd float %44, %14 %75 = fmul float %13, -5.000000e-01 %76 = fadd float %75, %43 %77 = fmul float %14, -5.000000e-01 %78 = fadd float %77, %44 %79 = fmul float %13, 5.000000e-01 %80 = fadd float %79, %43 %81 = fmul float %14, -5.000000e-01 %82 = fadd float %81, %44 %83 = fmul float %13, -5.000000e-01 %84 = fadd float %83, %43 %85 = fmul float %14, 5.000000e-01 %86 = fadd float %85, %44 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %73, float %74, float %76, float %78) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %80, float %82, float %84, float %86) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %66, float %68, float %70, float %72) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104 s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105 s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s4, v5 ; 06000A04 v_add_f32_e32 v7, s5, v6 ; 060E0C05 v_mad_f32 v8, -0.5, s4, v5 ; D2820008 041408F1 v_mad_f32 v9, -0.5, s5, v6 ; D2820009 04180AF1 exp 15, 32, 0, 0, 0, v0, v7, v8, v9 ; F800020F 09080700 s_waitcnt expcnt(0) ; BF8C070F v_mad_f32 v0, 0.5, s4, v5 ; D2820000 041408F0 v_mac_f32_e64 v6, 0.5, s5 ; D23E0006 00000AF0 exp 15, 33, 0, 0, 0, v0, v9, v8, v6 ; F800021F 06080900 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_buffer_load_dword s5, s[0:3], 0x8 ; C2028108 s_buffer_load_dword s9, s[0:3], 0x9 ; C2048109 s_buffer_load_dword s10, s[0:3], 0xa ; C205010A s_buffer_load_dword s11, s[0:3], 0xb ; C205810B s_buffer_load_dword s12, s[0:3], 0xc ; C206010C s_buffer_load_dword s13, s[0:3], 0xd ; C206810D s_buffer_load_dword s14, s[0:3], 0xe ; C207010E s_buffer_load_dword s15, s[0:3], 0xf ; C207810F s_buffer_load_dword s16, s[0:3], 0x10 ; C2080110 s_buffer_load_dword s17, s[0:3], 0x11 ; C2088111 s_buffer_load_dword s18, s[0:3], 0x12 ; C2090112 s_buffer_load_dword s0, s[0:3], 0x13 ; C2000113 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s6, v1 ; 10000206 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v0, s5, v2 ; 3E000405 v_mul_f32_e32 v5, s7, v1 ; 100A0207 v_mac_f32_e32 v5, s9, v2 ; 3E0A0409 v_mul_f32_e32 v6, s8, v1 ; 100C0208 v_mac_f32_e32 v6, s10, v2 ; 3E0C040A v_mul_f32_e32 v1, s4, v1 ; 10020204 v_mac_f32_e32 v1, s11, v2 ; 3E02040B v_mac_f32_e32 v0, s12, v3 ; 3E00060C v_mac_f32_e32 v5, s13, v3 ; 3E0A060D v_mac_f32_e32 v6, s14, v3 ; 3E0C060E v_mac_f32_e32 v1, s15, v3 ; 3E02060F v_mac_f32_e32 v0, s16, v4 ; 3E000810 v_mac_f32_e32 v5, s17, v4 ; 3E0A0811 v_mac_f32_e32 v6, s18, v4 ; 3E0C0812 v_mac_f32_e32 v1, s0, v4 ; 3E020800 exp 15, 12, 0, 1, 0, v0, v5, v6, v1 ; F80008CF 01060500 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 260 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.2500, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MOV TEMP[1].xy, IN[0].zwww 3: TEX TEMP[1], TEMP[1], SAMP[0], 2D 4: ADD TEMP[0], TEMP[0], TEMP[1] 5: MOV TEMP[1].xy, IN[1].xyyy 6: TEX TEMP[1], TEMP[1], SAMP[0], 2D 7: MOV TEMP[2].xy, IN[1].zwww 8: TEX TEMP[2], TEMP[2], SAMP[0], 2D 9: ADD TEMP[1], TEMP[1], TEMP[2] 10: ADD TEMP[0], TEMP[0], TEMP[1] 11: MUL TEMP[0], TEMP[0], IMM[0].xxxx 12: MOV OUT[0], TEMP[0] 13: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %23 = load <8 x i32>, <8 x i32> addrspace(2)* %22, align 32, !tbaa !0 %24 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %25 = load <4 x i32>, <4 x i32> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %32 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %34 = bitcast float %26 to i32 %35 = bitcast float %27 to i32 %36 = insertelement <2 x i32> undef, i32 %34, i32 0 %37 = insertelement <2 x i32> %36, i32 %35, i32 1 %38 = bitcast <8 x i32> %23 to <32 x i8> %39 = bitcast <4 x i32> %25 to <16 x i8> %40 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %37, <32 x i8> %38, <16 x i8> %39, i32 2) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = bitcast float %28 to i32 %46 = bitcast float %29 to i32 %47 = insertelement <2 x i32> undef, i32 %45, i32 0 %48 = insertelement <2 x i32> %47, i32 %46, i32 1 %49 = bitcast <8 x i32> %23 to <32 x i8> %50 = bitcast <4 x i32> %25 to <16 x i8> %51 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %48, <32 x i8> %49, <16 x i8> %50, i32 2) %52 = extractelement <4 x float> %51, i32 0 %53 = extractelement <4 x float> %51, i32 1 %54 = extractelement <4 x float> %51, i32 2 %55 = extractelement <4 x float> %51, i32 3 %56 = fadd float %41, %52 %57 = fadd float %42, %53 %58 = fadd float %43, %54 %59 = fadd float %44, %55 %60 = bitcast float %30 to i32 %61 = bitcast float %31 to i32 %62 = insertelement <2 x i32> undef, i32 %60, i32 0 %63 = insertelement <2 x i32> %62, i32 %61, i32 1 %64 = bitcast <8 x i32> %23 to <32 x i8> %65 = bitcast <4 x i32> %25 to <16 x i8> %66 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %63, <32 x i8> %64, <16 x i8> %65, i32 2) %67 = extractelement <4 x float> %66, i32 0 %68 = extractelement <4 x float> %66, i32 1 %69 = extractelement <4 x float> %66, i32 2 %70 = extractelement <4 x float> %66, i32 3 %71 = bitcast float %32 to i32 %72 = bitcast float %33 to i32 %73 = insertelement <2 x i32> undef, i32 %71, i32 0 %74 = insertelement <2 x i32> %73, i32 %72, i32 1 %75 = bitcast <8 x i32> %23 to <32 x i8> %76 = bitcast <4 x i32> %25 to <16 x i8> %77 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %74, <32 x i8> %75, <16 x i8> %76, i32 2) %78 = extractelement <4 x float> %77, i32 0 %79 = extractelement <4 x float> %77, i32 1 %80 = extractelement <4 x float> %77, i32 2 %81 = extractelement <4 x float> %77, i32 3 %82 = fadd float %67, %78 %83 = fadd float %68, %79 %84 = fadd float %69, %80 %85 = fadd float %70, %81 %86 = fadd float %56, %82 %87 = fadd float %57, %83 %88 = fadd float %58, %84 %89 = fadd float %59, %85 %90 = fmul float %86, 2.500000e-01 %91 = fmul float %87, 2.500000e-01 %92 = fmul float %88, 2.500000e-01 %93 = fmul float %89, 2.500000e-01 %94 = call i32 @llvm.SI.packf16(float %90, float %91) %95 = bitcast i32 %94 to float %96 = call i32 @llvm.SI.packf16(float %92, float %93) %97 = bitcast i32 %96 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %95, float %97, float %95, float %97) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600 v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601 v_interp_p1_f32 v9, v0, 3, 1, [m0] ; C8240700 v_interp_p2_f32 v9, [v9], v1, 3, 1, [m0] ; C8250701 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[8:15], s[0:3] ; F0800F00 00020002 image_sample v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[8:15], s[0:3] ; F0800F00 00020A04 image_sample v[4:7], 15, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[8:15], s[0:3] ; F0800F00 00020406 image_sample v[14:17], 15, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[8:15], s[0:3] ; F0800F00 00020E08 s_waitcnt vmcnt(2) ; BF8C0772 v_add_f32_e32 v0, v10, v0 ; 0600010A v_add_f32_e32 v1, v11, v1 ; 0602030B v_add_f32_e32 v2, v12, v2 ; 0604050C v_add_f32_e32 v3, v13, v3 ; 0606070D s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v4, v14, v4 ; 0608090E v_add_f32_e32 v5, v15, v5 ; 060A0B0F v_add_f32_e32 v6, v16, v6 ; 060C0D10 v_add_f32_e32 v7, v17, v7 ; 060E0F11 v_add_f32_e32 v0, v4, v0 ; 06000104 v_add_f32_e32 v1, v5, v1 ; 06020305 v_add_f32_e32 v2, v6, v2 ; 06040506 v_add_f32_e32 v3, v7, v3 ; 06060707 v_mov_b32_e32 v4, 0x3e800000 ; 7E0802FF 3E800000 v_mul_f32_e32 v0, v4, v0 ; 10000104 v_mul_f32_e32 v1, v4, v1 ; 10020304 v_mul_f32_e32 v2, v4, v2 ; 10040504 v_mul_f32_e32 v3, v4, v3 ; 10060704 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 20 Code Size: 216 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..5] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].xxxx 1: MOV TEMP[0].xy, IN[1].xyxx 2: MUL TEMP[1], CONST[2], IN[0].xxxx 3: MAD TEMP[1], CONST[3], IN[0].yyyy, TEMP[1] 4: MAD TEMP[1], CONST[4], IN[0].zzzz, TEMP[1] 5: MAD TEMP[1], CONST[5], IN[0].wwww, TEMP[1] 6: MUL TEMP[2].xy, CONST[0].xyyy, IMM[0].yxxx 7: MUL TEMP[2].xy, TEMP[2].xyyy, CONST[1].xxxx 8: MOV OUT[1], TEMP[0] 9: MOV OUT[2], TEMP[2] 10: MOV OUT[0], TEMP[1] 11: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %32 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %33 = load <16 x i8>, <16 x i8> addrspace(2)* %32, align 16, !tbaa !0 %34 = add i32 %5, %7 %35 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %33, i32 0, i32 %34) %36 = extractelement <4 x float> %35, i32 0 %37 = extractelement <4 x float> %35, i32 1 %38 = extractelement <4 x float> %35, i32 2 %39 = extractelement <4 x float> %35, i32 3 %40 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %41 = load <16 x i8>, <16 x i8> addrspace(2)* %40, align 16, !tbaa !0 %42 = add i32 %5, %7 %43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %41, i32 0, i32 %42) %44 = extractelement <4 x float> %43, i32 0 %45 = extractelement <4 x float> %43, i32 1 %46 = fmul float %16, %36 %47 = fmul float %17, %36 %48 = fmul float %18, %36 %49 = fmul float %19, %36 %50 = fmul float %20, %37 %51 = fadd float %50, %46 %52 = fmul float %21, %37 %53 = fadd float %52, %47 %54 = fmul float %22, %37 %55 = fadd float %54, %48 %56 = fmul float %23, %37 %57 = fadd float %56, %49 %58 = fmul float %24, %38 %59 = fadd float %58, %51 %60 = fmul float %25, %38 %61 = fadd float %60, %53 %62 = fmul float %26, %38 %63 = fadd float %62, %55 %64 = fmul float %27, %38 %65 = fadd float %64, %57 %66 = fmul float %28, %39 %67 = fadd float %66, %59 %68 = fmul float %29, %39 %69 = fadd float %68, %61 %70 = fmul float %30, %39 %71 = fadd float %70, %63 %72 = fmul float %31, %39 %73 = fadd float %72, %65 %74 = fmul float %13, 0.000000e+00 %75 = fmul float %74, %15 %76 = fmul float %14, %15 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %44, float %45, float 1.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %75, float %76, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %67, float %69, float %71, float %73) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_mov_b32_e32 v1, 1.0 ; 7E0202F2 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[0:3], 0x0 ; C2060100 s_buffer_load_dword s13, s[0:3], 0x1 ; C2068101 buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[8:11], 0 idxen ; E00C2000 80020600 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_buffer_load_dword s5, s[0:3], 0x8 ; C2028108 s_buffer_load_dword s6, s[0:3], 0x9 ; C2030109 s_buffer_load_dword s7, s[0:3], 0xa ; C203810A s_buffer_load_dword s8, s[0:3], 0xb ; C204010B s_buffer_load_dword s9, s[0:3], 0xc ; C204810C s_buffer_load_dword s10, s[0:3], 0xd ; C205010D s_buffer_load_dword s11, s[0:3], 0xe ; C205810E s_buffer_load_dword s14, s[0:3], 0xf ; C207010F s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 exp 15, 32, 0, 0, 0, v6, v7, v1, v1 ; F800020F 01010706 s_buffer_load_dword s15, s[0:3], 0x10 ; C2078110 s_buffer_load_dword s16, s[0:3], 0x11 ; C2080111 s_buffer_load_dword s17, s[0:3], 0x12 ; C2088112 s_buffer_load_dword s18, s[0:3], 0x13 ; C2090113 s_buffer_load_dword s19, s[0:3], 0x14 ; C2098114 s_buffer_load_dword s20, s[0:3], 0x15 ; C20A0115 s_buffer_load_dword s21, s[0:3], 0x16 ; C20A8116 s_buffer_load_dword s0, s[0:3], 0x17 ; C2000117 v_mul_f32_e32 v0, s5, v2 ; 10000405 v_mac_f32_e32 v0, s9, v3 ; 3E000609 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v1, s6, v2 ; 10020406 v_mac_f32_e32 v1, s10, v3 ; 3E02060A v_mul_f32_e32 v6, s7, v2 ; 100C0407 v_mac_f32_e32 v6, s11, v3 ; 3E0C060B v_mul_f32_e32 v2, s8, v2 ; 10040408 v_mac_f32_e32 v2, s14, v3 ; 3E04060E s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v0, s15, v4 ; 3E00080F v_mac_f32_e32 v1, s16, v4 ; 3E020810 v_mac_f32_e32 v6, s17, v4 ; 3E0C0811 v_mac_f32_e32 v2, s18, v4 ; 3E040812 v_mac_f32_e32 v0, s19, v5 ; 3E000A13 v_mac_f32_e32 v1, s20, v5 ; 3E020A14 v_mac_f32_e32 v6, s21, v5 ; 3E0C0A15 v_mac_f32_e32 v2, s0, v5 ; 3E040A00 v_mul_f32_e64 v3, 0, s12 ; D2100003 00001880 v_mul_f32_e32 v3, s4, v3 ; 10060604 v_mov_b32_e32 v4, s4 ; 7E080204 v_mul_f32_e32 v4, s13, v4 ; 1008080D v_mov_b32_e32 v5, 0 ; 7E0A0280 exp 15, 33, 0, 0, 0, v3, v4, v5, v5 ; F800021F 05050403 exp 15, 12, 0, 1, 0, v0, v1, v6, v2 ; F80008CF 02060100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 244 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 3.0000, 0.0205, 0.0000, 0.0855} IMM[1] FLT32 { 0.2320, 0.0000, 0.3240, 1.0000} 0: MUL TEMP[0].xy, IN[1].xyyy, IMM[0].xxxx 1: ADD TEMP[0].xy, IN[0].xyyy, -TEMP[0].xyyy 2: MOV TEMP[1].xy, TEMP[0].xyyy 3: TEX TEMP[1], TEMP[1], SAMP[0], 2D 4: MUL TEMP[1], TEMP[1], IMM[0].yyyz 5: ADD TEMP[0].xy, TEMP[0].xyyy, IN[1].xyyy 6: MOV TEMP[2].xy, TEMP[0].xyyy 7: TEX TEMP[2], TEMP[2], SAMP[0], 2D 8: MAD TEMP[1], TEMP[2], IMM[0].wwwz, TEMP[1] 9: ADD TEMP[0].xy, TEMP[0].xyyy, IN[1].xyyy 10: MOV TEMP[2].xy, TEMP[0].xyyy 11: TEX TEMP[2], TEMP[2], SAMP[0], 2D 12: MAD TEMP[1], TEMP[2], IMM[1].xxxy, TEMP[1] 13: ADD TEMP[0].xy, TEMP[0].xyyy, IN[1].xyyy 14: MOV TEMP[2].xy, TEMP[0].xyyy 15: TEX TEMP[2], TEMP[2], SAMP[0], 2D 16: MAD TEMP[1], TEMP[2], IMM[1].zzzw, TEMP[1] 17: ADD TEMP[0].xy, TEMP[0].xyyy, IN[1].xyyy 18: MOV TEMP[2].xy, TEMP[0].xyyy 19: TEX TEMP[2], TEMP[2], SAMP[0], 2D 20: MAD TEMP[1], TEMP[2], IMM[1].xxxy, TEMP[1] 21: ADD TEMP[0].xy, TEMP[0].xyyy, IN[1].xyyy 22: MOV TEMP[2].xy, TEMP[0].xyyy 23: TEX TEMP[2], TEMP[2], SAMP[0], 2D 24: MAD TEMP[1], TEMP[2], IMM[0].wwwz, TEMP[1] 25: ADD TEMP[0].xy, TEMP[0].xyyy, IN[1].xyyy 26: MOV TEMP[0].xy, TEMP[0].xyyy 27: TEX TEMP[0], TEMP[0], SAMP[0], 2D 28: MAD TEMP[1], TEMP[0], IMM[0].yyyz, TEMP[1] 29: MOV OUT[0], TEMP[1] 30: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %23 = load <8 x i32>, <8 x i32> addrspace(2)* %22, align 32, !tbaa !0 %24 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %25 = load <4 x i32>, <4 x i32> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %30 = fmul float %28, 3.000000e+00 %31 = fmul float %29, 3.000000e+00 %32 = fsub float %26, %30 %33 = fsub float %27, %31 %34 = bitcast float %32 to i32 %35 = bitcast float %33 to i32 %36 = insertelement <2 x i32> undef, i32 %34, i32 0 %37 = insertelement <2 x i32> %36, i32 %35, i32 1 %38 = bitcast <8 x i32> %23 to <32 x i8> %39 = bitcast <4 x i32> %25 to <16 x i8> %40 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %37, <32 x i8> %38, <16 x i8> %39, i32 2) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = fmul float %41, 0x3F94FDF3C0000000 %46 = fmul float %42, 0x3F94FDF3C0000000 %47 = fmul float %43, 0x3F94FDF3C0000000 %48 = fmul float %44, 0.000000e+00 %49 = fadd float %32, %28 %50 = fadd float %33, %29 %51 = bitcast float %49 to i32 %52 = bitcast float %50 to i32 %53 = insertelement <2 x i32> undef, i32 %51, i32 0 %54 = insertelement <2 x i32> %53, i32 %52, i32 1 %55 = bitcast <8 x i32> %23 to <32 x i8> %56 = bitcast <4 x i32> %25 to <16 x i8> %57 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %54, <32 x i8> %55, <16 x i8> %56, i32 2) %58 = extractelement <4 x float> %57, i32 0 %59 = extractelement <4 x float> %57, i32 1 %60 = extractelement <4 x float> %57, i32 2 %61 = extractelement <4 x float> %57, i32 3 %62 = fmul float %58, 0x3FB5E35400000000 %63 = fadd float %62, %45 %64 = fmul float %59, 0x3FB5E35400000000 %65 = fadd float %64, %46 %66 = fmul float %60, 0x3FB5E35400000000 %67 = fadd float %66, %47 %68 = fmul float %61, 0.000000e+00 %69 = fadd float %68, %48 %70 = fadd float %49, %28 %71 = fadd float %50, %29 %72 = bitcast float %70 to i32 %73 = bitcast float %71 to i32 %74 = insertelement <2 x i32> undef, i32 %72, i32 0 %75 = insertelement <2 x i32> %74, i32 %73, i32 1 %76 = bitcast <8 x i32> %23 to <32 x i8> %77 = bitcast <4 x i32> %25 to <16 x i8> %78 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %75, <32 x i8> %76, <16 x i8> %77, i32 2) %79 = extractelement <4 x float> %78, i32 0 %80 = extractelement <4 x float> %78, i32 1 %81 = extractelement <4 x float> %78, i32 2 %82 = extractelement <4 x float> %78, i32 3 %83 = fmul float %79, 0x3FCDB22D00000000 %84 = fadd float %83, %63 %85 = fmul float %80, 0x3FCDB22D00000000 %86 = fadd float %85, %65 %87 = fmul float %81, 0x3FCDB22D00000000 %88 = fadd float %87, %67 %89 = fmul float %82, 0.000000e+00 %90 = fadd float %89, %69 %91 = fadd float %70, %28 %92 = fadd float %71, %29 %93 = bitcast float %91 to i32 %94 = bitcast float %92 to i32 %95 = insertelement <2 x i32> undef, i32 %93, i32 0 %96 = insertelement <2 x i32> %95, i32 %94, i32 1 %97 = bitcast <8 x i32> %23 to <32 x i8> %98 = bitcast <4 x i32> %25 to <16 x i8> %99 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %96, <32 x i8> %97, <16 x i8> %98, i32 2) %100 = extractelement <4 x float> %99, i32 0 %101 = extractelement <4 x float> %99, i32 1 %102 = extractelement <4 x float> %99, i32 2 %103 = extractelement <4 x float> %99, i32 3 %104 = fmul float %100, 0x3FD4BC6A80000000 %105 = fadd float %104, %84 %106 = fmul float %101, 0x3FD4BC6A80000000 %107 = fadd float %106, %86 %108 = fmul float %102, 0x3FD4BC6A80000000 %109 = fadd float %108, %88 %110 = fadd float %103, %90 %111 = fadd float %91, %28 %112 = fadd float %92, %29 %113 = bitcast float %111 to i32 %114 = bitcast float %112 to i32 %115 = insertelement <2 x i32> undef, i32 %113, i32 0 %116 = insertelement <2 x i32> %115, i32 %114, i32 1 %117 = bitcast <8 x i32> %23 to <32 x i8> %118 = bitcast <4 x i32> %25 to <16 x i8> %119 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %116, <32 x i8> %117, <16 x i8> %118, i32 2) %120 = extractelement <4 x float> %119, i32 0 %121 = extractelement <4 x float> %119, i32 1 %122 = extractelement <4 x float> %119, i32 2 %123 = extractelement <4 x float> %119, i32 3 %124 = fmul float %120, 0x3FCDB22D00000000 %125 = fadd float %124, %105 %126 = fmul float %121, 0x3FCDB22D00000000 %127 = fadd float %126, %107 %128 = fmul float %122, 0x3FCDB22D00000000 %129 = fadd float %128, %109 %130 = fmul float %123, 0.000000e+00 %131 = fadd float %130, %110 %132 = fadd float %111, %28 %133 = fadd float %112, %29 %134 = bitcast float %132 to i32 %135 = bitcast float %133 to i32 %136 = insertelement <2 x i32> undef, i32 %134, i32 0 %137 = insertelement <2 x i32> %136, i32 %135, i32 1 %138 = bitcast <8 x i32> %23 to <32 x i8> %139 = bitcast <4 x i32> %25 to <16 x i8> %140 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %137, <32 x i8> %138, <16 x i8> %139, i32 2) %141 = extractelement <4 x float> %140, i32 0 %142 = extractelement <4 x float> %140, i32 1 %143 = extractelement <4 x float> %140, i32 2 %144 = extractelement <4 x float> %140, i32 3 %145 = fmul float %141, 0x3FB5E35400000000 %146 = fadd float %145, %125 %147 = fmul float %142, 0x3FB5E35400000000 %148 = fadd float %147, %127 %149 = fmul float %143, 0x3FB5E35400000000 %150 = fadd float %149, %129 %151 = fmul float %144, 0.000000e+00 %152 = fadd float %151, %131 %153 = fadd float %132, %28 %154 = fadd float %133, %29 %155 = bitcast float %153 to i32 %156 = bitcast float %154 to i32 %157 = insertelement <2 x i32> undef, i32 %155, i32 0 %158 = insertelement <2 x i32> %157, i32 %156, i32 1 %159 = bitcast <8 x i32> %23 to <32 x i8> %160 = bitcast <4 x i32> %25 to <16 x i8> %161 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %158, <32 x i8> %159, <16 x i8> %160, i32 2) %162 = extractelement <4 x float> %161, i32 0 %163 = extractelement <4 x float> %161, i32 1 %164 = extractelement <4 x float> %161, i32 2 %165 = extractelement <4 x float> %161, i32 3 %166 = fmul float %162, 0x3F94FDF3C0000000 %167 = fadd float %166, %146 %168 = fmul float %163, 0x3F94FDF3C0000000 %169 = fadd float %168, %148 %170 = fmul float %164, 0x3F94FDF3C0000000 %171 = fadd float %170, %150 %172 = fmul float %165, 0.000000e+00 %173 = fadd float %172, %152 %174 = call i32 @llvm.SI.packf16(float %167, float %169) %175 = bitcast i32 %174 to float %176 = call i32 @llvm.SI.packf16(float %171, float %173) %177 = bitcast i32 %176 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %175, float %177, float %175, float %177) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400 v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401 v_interp_p1_f32 v0, v0, 1, 1, [m0] ; C8000500 v_interp_p2_f32 v0, [v0], v1, 1, 1, [m0] ; C8010501 v_mov_b32_e32 v1, 0x40400000 ; 7E0202FF 40400000 v_mad_f32 v5, -v4, v1, v2 ; D2820005 240A0304 v_mad_f32 v6, -v0, v1, v3 ; D2820006 240E0300 v_mov_b32_e32 v1, 0x3ca7ef9e ; 7E0202FF 3CA7EF9E s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[7:10], 15, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[8:15], s[0:3] ; F0800F00 00020705 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v2, v1, v7 ; 10040F01 v_mul_f32_e32 v3, v1, v8 ; 10061101 v_mul_f32_e32 v7, v1, v9 ; 100E1301 v_mul_f32_e32 v8, 0, v10 ; 10101480 v_add_f32_e32 v9, v4, v5 ; 06120B04 v_add_f32_e32 v10, v0, v6 ; 06140D00 v_mov_b32_e32 v5, 0x3daf1aa0 ; 7E0A02FF 3DAF1AA0 image_sample v[11:14], 15, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[8:15], s[0:3] ; F0800F00 00020B09 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v2, v5, v11 ; 3E041705 v_mac_f32_e32 v3, v5, v12 ; 3E061905 v_mac_f32_e32 v7, v5, v13 ; 3E0E1B05 v_mac_f32_e32 v8, 0, v14 ; 3E101C80 v_add_f32_e32 v11, v4, v9 ; 06161304 v_add_f32_e32 v12, v0, v10 ; 06181500 v_mov_b32_e32 v6, 0x3e6d9168 ; 7E0C02FF 3E6D9168 image_sample v[13:16], 15, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[8:15], s[0:3] ; F0800F00 00020D0B s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v2, v6, v13 ; 3E041B06 v_mac_f32_e32 v3, v6, v14 ; 3E061D06 v_mac_f32_e32 v7, v6, v15 ; 3E0E1F06 v_mac_f32_e32 v8, 0, v16 ; 3E102080 v_add_f32_e32 v9, v4, v11 ; 06121704 v_add_f32_e32 v10, v0, v12 ; 06141900 v_mov_b32_e32 v11, 0x3ea5e354 ; 7E1602FF 3EA5E354 image_sample v[12:15], 15, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[8:15], s[0:3] ; F0800F00 00020C09 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v2, v11, v12 ; 3E04190B v_mac_f32_e32 v3, v11, v13 ; 3E061B0B v_mac_f32_e32 v7, v11, v14 ; 3E0E1D0B v_add_f32_e32 v8, v8, v15 ; 06101F08 v_add_f32_e32 v11, v4, v9 ; 06161304 v_add_f32_e32 v12, v0, v10 ; 06181500 v_add_f32_e32 v9, v4, v11 ; 06121704 v_add_f32_e32 v10, v0, v12 ; 06141900 v_add_f32_e32 v13, v4, v9 ; 061A1304 v_add_f32_e32 v14, v0, v10 ; 061C1500 image_sample v[15:18], 15, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[8:15], s[0:3] ; F0800F00 00020F0B image_sample v[9:12], 15, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[8:15], s[0:3] ; F0800F00 00020909 image_sample v[19:22], 15, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[8:15], s[0:3] ; F0800F00 0002130D s_waitcnt vmcnt(2) ; BF8C0772 v_mac_f32_e32 v2, v6, v15 ; 3E041F06 v_mac_f32_e32 v3, v6, v16 ; 3E062106 v_mac_f32_e32 v7, v6, v17 ; 3E0E2306 v_mac_f32_e32 v8, 0, v18 ; 3E102480 s_waitcnt vmcnt(1) ; BF8C0771 v_mac_f32_e32 v2, v5, v9 ; 3E041305 v_mac_f32_e32 v3, v5, v10 ; 3E061505 v_mac_f32_e32 v7, v5, v11 ; 3E0E1705 v_mac_f32_e32 v8, 0, v12 ; 3E101880 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v2, v1, v19 ; 3E042701 v_mac_f32_e32 v3, v1, v20 ; 3E062901 v_mac_f32_e32 v7, v1, v21 ; 3E0E2B01 v_mac_f32_e32 v8, 0, v22 ; 3E102C80 v_cvt_pkrtz_f16_f32_e32 v0, v2, v3 ; 5E000702 v_cvt_pkrtz_f16_f32_e32 v1, v7, v8 ; 5E021107 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 24 Code Size: 372 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..5] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].xxxx 1: MOV TEMP[0].xy, IN[1].xyxx 2: MUL TEMP[1], CONST[2], IN[0].xxxx 3: MAD TEMP[1], CONST[3], IN[0].yyyy, TEMP[1] 4: MAD TEMP[1], CONST[4], IN[0].zzzz, TEMP[1] 5: MAD TEMP[1], CONST[5], IN[0].wwww, TEMP[1] 6: MUL TEMP[2].xy, CONST[0].xyyy, IMM[0].xyyy 7: MUL TEMP[2].xy, TEMP[2].xyyy, CONST[1].xxxx 8: MOV OUT[1], TEMP[0] 9: MOV OUT[2], TEMP[2] 10: MOV OUT[0], TEMP[1] 11: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %32 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %33 = load <16 x i8>, <16 x i8> addrspace(2)* %32, align 16, !tbaa !0 %34 = add i32 %5, %7 %35 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %33, i32 0, i32 %34) %36 = extractelement <4 x float> %35, i32 0 %37 = extractelement <4 x float> %35, i32 1 %38 = extractelement <4 x float> %35, i32 2 %39 = extractelement <4 x float> %35, i32 3 %40 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %41 = load <16 x i8>, <16 x i8> addrspace(2)* %40, align 16, !tbaa !0 %42 = add i32 %5, %7 %43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %41, i32 0, i32 %42) %44 = extractelement <4 x float> %43, i32 0 %45 = extractelement <4 x float> %43, i32 1 %46 = fmul float %16, %36 %47 = fmul float %17, %36 %48 = fmul float %18, %36 %49 = fmul float %19, %36 %50 = fmul float %20, %37 %51 = fadd float %50, %46 %52 = fmul float %21, %37 %53 = fadd float %52, %47 %54 = fmul float %22, %37 %55 = fadd float %54, %48 %56 = fmul float %23, %37 %57 = fadd float %56, %49 %58 = fmul float %24, %38 %59 = fadd float %58, %51 %60 = fmul float %25, %38 %61 = fadd float %60, %53 %62 = fmul float %26, %38 %63 = fadd float %62, %55 %64 = fmul float %27, %38 %65 = fadd float %64, %57 %66 = fmul float %28, %39 %67 = fadd float %66, %59 %68 = fmul float %29, %39 %69 = fadd float %68, %61 %70 = fmul float %30, %39 %71 = fadd float %70, %63 %72 = fmul float %31, %39 %73 = fadd float %72, %65 %74 = fmul float %14, 0.000000e+00 %75 = fmul float %13, %15 %76 = fmul float %74, %15 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %44, float %45, float 1.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %75, float %76, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %67, float %69, float %71, float %73) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_mov_b32_e32 v1, 1.0 ; 7E0202F2 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[0:3], 0x0 ; C2060100 s_buffer_load_dword s13, s[0:3], 0x1 ; C2068101 buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[8:11], 0 idxen ; E00C2000 80020600 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_buffer_load_dword s5, s[0:3], 0x8 ; C2028108 s_buffer_load_dword s6, s[0:3], 0x9 ; C2030109 s_buffer_load_dword s7, s[0:3], 0xa ; C203810A s_buffer_load_dword s8, s[0:3], 0xb ; C204010B s_buffer_load_dword s9, s[0:3], 0xc ; C204810C s_buffer_load_dword s10, s[0:3], 0xd ; C205010D s_buffer_load_dword s11, s[0:3], 0xe ; C205810E s_buffer_load_dword s14, s[0:3], 0xf ; C207010F s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 exp 15, 32, 0, 0, 0, v6, v7, v1, v1 ; F800020F 01010706 s_buffer_load_dword s15, s[0:3], 0x10 ; C2078110 s_buffer_load_dword s16, s[0:3], 0x11 ; C2080111 s_buffer_load_dword s17, s[0:3], 0x12 ; C2088112 s_buffer_load_dword s18, s[0:3], 0x13 ; C2090113 s_buffer_load_dword s19, s[0:3], 0x14 ; C2098114 s_buffer_load_dword s20, s[0:3], 0x15 ; C20A0115 s_buffer_load_dword s21, s[0:3], 0x16 ; C20A8116 s_buffer_load_dword s0, s[0:3], 0x17 ; C2000117 v_mul_f32_e32 v0, s5, v2 ; 10000405 v_mac_f32_e32 v0, s9, v3 ; 3E000609 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v1, s6, v2 ; 10020406 v_mac_f32_e32 v1, s10, v3 ; 3E02060A v_mul_f32_e32 v6, s7, v2 ; 100C0407 v_mac_f32_e32 v6, s11, v3 ; 3E0C060B v_mul_f32_e32 v2, s8, v2 ; 10040408 v_mac_f32_e32 v2, s14, v3 ; 3E04060E s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v0, s15, v4 ; 3E00080F v_mac_f32_e32 v1, s16, v4 ; 3E020810 v_mac_f32_e32 v6, s17, v4 ; 3E0C0811 v_mac_f32_e32 v2, s18, v4 ; 3E040812 v_mac_f32_e32 v0, s19, v5 ; 3E000A13 v_mac_f32_e32 v1, s20, v5 ; 3E020A14 v_mac_f32_e32 v6, s21, v5 ; 3E0C0A15 v_mac_f32_e32 v2, s0, v5 ; 3E040A00 v_mul_f32_e64 v3, 0, s13 ; D2100003 00001A80 v_mul_f32_e32 v3, s4, v3 ; 10060604 v_mov_b32_e32 v4, s4 ; 7E080204 v_mul_f32_e32 v4, s12, v4 ; 1008080C v_mov_b32_e32 v5, 0 ; 7E0A0280 exp 15, 33, 0, 0, 0, v4, v3, v5, v5 ; F800021F 05050304 exp 15, 12, 0, 1, 0, v0, v1, v6, v2 ; F80008CF 02060100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 244 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 3.0000, 0.0205, 0.0000, 0.0855} IMM[1] FLT32 { 0.2320, 0.0000, 0.3240, 1.0000} 0: MUL TEMP[0].xy, IN[1].xyyy, IMM[0].xxxx 1: ADD TEMP[0].xy, IN[0].xyyy, -TEMP[0].xyyy 2: MOV TEMP[1].xy, TEMP[0].xyyy 3: TEX TEMP[1], TEMP[1], SAMP[0], 2D 4: MUL TEMP[1], TEMP[1], IMM[0].yyyz 5: ADD TEMP[0].xy, TEMP[0].xyyy, IN[1].xyyy 6: MOV TEMP[2].xy, TEMP[0].xyyy 7: TEX TEMP[2], TEMP[2], SAMP[0], 2D 8: MAD TEMP[1], TEMP[2], IMM[0].wwwz, TEMP[1] 9: ADD TEMP[0].xy, TEMP[0].xyyy, IN[1].xyyy 10: MOV TEMP[2].xy, TEMP[0].xyyy 11: TEX TEMP[2], TEMP[2], SAMP[0], 2D 12: MAD TEMP[1], TEMP[2], IMM[1].xxxy, TEMP[1] 13: ADD TEMP[0].xy, TEMP[0].xyyy, IN[1].xyyy 14: MOV TEMP[2].xy, TEMP[0].xyyy 15: TEX TEMP[2], TEMP[2], SAMP[0], 2D 16: MAD TEMP[1], TEMP[2], IMM[1].zzzw, TEMP[1] 17: ADD TEMP[0].xy, TEMP[0].xyyy, IN[1].xyyy 18: MOV TEMP[2].xy, TEMP[0].xyyy 19: TEX TEMP[2], TEMP[2], SAMP[0], 2D 20: MAD TEMP[1], TEMP[2], IMM[1].xxxy, TEMP[1] 21: ADD TEMP[0].xy, TEMP[0].xyyy, IN[1].xyyy 22: MOV TEMP[2].xy, TEMP[0].xyyy 23: TEX TEMP[2], TEMP[2], SAMP[0], 2D 24: MAD TEMP[1], TEMP[2], IMM[0].wwwz, TEMP[1] 25: ADD TEMP[0].xy, TEMP[0].xyyy, IN[1].xyyy 26: MOV TEMP[0].xy, TEMP[0].xyyy 27: TEX TEMP[0], TEMP[0], SAMP[0], 2D 28: MAD TEMP[1], TEMP[0], IMM[0].yyyz, TEMP[1] 29: MOV OUT[0], TEMP[1] 30: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %23 = load <8 x i32>, <8 x i32> addrspace(2)* %22, align 32, !tbaa !0 %24 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %25 = load <4 x i32>, <4 x i32> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %30 = fmul float %28, 3.000000e+00 %31 = fmul float %29, 3.000000e+00 %32 = fsub float %26, %30 %33 = fsub float %27, %31 %34 = bitcast float %32 to i32 %35 = bitcast float %33 to i32 %36 = insertelement <2 x i32> undef, i32 %34, i32 0 %37 = insertelement <2 x i32> %36, i32 %35, i32 1 %38 = bitcast <8 x i32> %23 to <32 x i8> %39 = bitcast <4 x i32> %25 to <16 x i8> %40 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %37, <32 x i8> %38, <16 x i8> %39, i32 2) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = fmul float %41, 0x3F94FDF3C0000000 %46 = fmul float %42, 0x3F94FDF3C0000000 %47 = fmul float %43, 0x3F94FDF3C0000000 %48 = fmul float %44, 0.000000e+00 %49 = fadd float %32, %28 %50 = fadd float %33, %29 %51 = bitcast float %49 to i32 %52 = bitcast float %50 to i32 %53 = insertelement <2 x i32> undef, i32 %51, i32 0 %54 = insertelement <2 x i32> %53, i32 %52, i32 1 %55 = bitcast <8 x i32> %23 to <32 x i8> %56 = bitcast <4 x i32> %25 to <16 x i8> %57 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %54, <32 x i8> %55, <16 x i8> %56, i32 2) %58 = extractelement <4 x float> %57, i32 0 %59 = extractelement <4 x float> %57, i32 1 %60 = extractelement <4 x float> %57, i32 2 %61 = extractelement <4 x float> %57, i32 3 %62 = fmul float %58, 0x3FB5E35400000000 %63 = fadd float %62, %45 %64 = fmul float %59, 0x3FB5E35400000000 %65 = fadd float %64, %46 %66 = fmul float %60, 0x3FB5E35400000000 %67 = fadd float %66, %47 %68 = fmul float %61, 0.000000e+00 %69 = fadd float %68, %48 %70 = fadd float %49, %28 %71 = fadd float %50, %29 %72 = bitcast float %70 to i32 %73 = bitcast float %71 to i32 %74 = insertelement <2 x i32> undef, i32 %72, i32 0 %75 = insertelement <2 x i32> %74, i32 %73, i32 1 %76 = bitcast <8 x i32> %23 to <32 x i8> %77 = bitcast <4 x i32> %25 to <16 x i8> %78 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %75, <32 x i8> %76, <16 x i8> %77, i32 2) %79 = extractelement <4 x float> %78, i32 0 %80 = extractelement <4 x float> %78, i32 1 %81 = extractelement <4 x float> %78, i32 2 %82 = extractelement <4 x float> %78, i32 3 %83 = fmul float %79, 0x3FCDB22D00000000 %84 = fadd float %83, %63 %85 = fmul float %80, 0x3FCDB22D00000000 %86 = fadd float %85, %65 %87 = fmul float %81, 0x3FCDB22D00000000 %88 = fadd float %87, %67 %89 = fmul float %82, 0.000000e+00 %90 = fadd float %89, %69 %91 = fadd float %70, %28 %92 = fadd float %71, %29 %93 = bitcast float %91 to i32 %94 = bitcast float %92 to i32 %95 = insertelement <2 x i32> undef, i32 %93, i32 0 %96 = insertelement <2 x i32> %95, i32 %94, i32 1 %97 = bitcast <8 x i32> %23 to <32 x i8> %98 = bitcast <4 x i32> %25 to <16 x i8> %99 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %96, <32 x i8> %97, <16 x i8> %98, i32 2) %100 = extractelement <4 x float> %99, i32 0 %101 = extractelement <4 x float> %99, i32 1 %102 = extractelement <4 x float> %99, i32 2 %103 = extractelement <4 x float> %99, i32 3 %104 = fmul float %100, 0x3FD4BC6A80000000 %105 = fadd float %104, %84 %106 = fmul float %101, 0x3FD4BC6A80000000 %107 = fadd float %106, %86 %108 = fmul float %102, 0x3FD4BC6A80000000 %109 = fadd float %108, %88 %110 = fadd float %103, %90 %111 = fadd float %91, %28 %112 = fadd float %92, %29 %113 = bitcast float %111 to i32 %114 = bitcast float %112 to i32 %115 = insertelement <2 x i32> undef, i32 %113, i32 0 %116 = insertelement <2 x i32> %115, i32 %114, i32 1 %117 = bitcast <8 x i32> %23 to <32 x i8> %118 = bitcast <4 x i32> %25 to <16 x i8> %119 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %116, <32 x i8> %117, <16 x i8> %118, i32 2) %120 = extractelement <4 x float> %119, i32 0 %121 = extractelement <4 x float> %119, i32 1 %122 = extractelement <4 x float> %119, i32 2 %123 = extractelement <4 x float> %119, i32 3 %124 = fmul float %120, 0x3FCDB22D00000000 %125 = fadd float %124, %105 %126 = fmul float %121, 0x3FCDB22D00000000 %127 = fadd float %126, %107 %128 = fmul float %122, 0x3FCDB22D00000000 %129 = fadd float %128, %109 %130 = fmul float %123, 0.000000e+00 %131 = fadd float %130, %110 %132 = fadd float %111, %28 %133 = fadd float %112, %29 %134 = bitcast float %132 to i32 %135 = bitcast float %133 to i32 %136 = insertelement <2 x i32> undef, i32 %134, i32 0 %137 = insertelement <2 x i32> %136, i32 %135, i32 1 %138 = bitcast <8 x i32> %23 to <32 x i8> %139 = bitcast <4 x i32> %25 to <16 x i8> %140 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %137, <32 x i8> %138, <16 x i8> %139, i32 2) %141 = extractelement <4 x float> %140, i32 0 %142 = extractelement <4 x float> %140, i32 1 %143 = extractelement <4 x float> %140, i32 2 %144 = extractelement <4 x float> %140, i32 3 %145 = fmul float %141, 0x3FB5E35400000000 %146 = fadd float %145, %125 %147 = fmul float %142, 0x3FB5E35400000000 %148 = fadd float %147, %127 %149 = fmul float %143, 0x3FB5E35400000000 %150 = fadd float %149, %129 %151 = fmul float %144, 0.000000e+00 %152 = fadd float %151, %131 %153 = fadd float %132, %28 %154 = fadd float %133, %29 %155 = bitcast float %153 to i32 %156 = bitcast float %154 to i32 %157 = insertelement <2 x i32> undef, i32 %155, i32 0 %158 = insertelement <2 x i32> %157, i32 %156, i32 1 %159 = bitcast <8 x i32> %23 to <32 x i8> %160 = bitcast <4 x i32> %25 to <16 x i8> %161 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %158, <32 x i8> %159, <16 x i8> %160, i32 2) %162 = extractelement <4 x float> %161, i32 0 %163 = extractelement <4 x float> %161, i32 1 %164 = extractelement <4 x float> %161, i32 2 %165 = extractelement <4 x float> %161, i32 3 %166 = fmul float %162, 0x3F94FDF3C0000000 %167 = fadd float %166, %146 %168 = fmul float %163, 0x3F94FDF3C0000000 %169 = fadd float %168, %148 %170 = fmul float %164, 0x3F94FDF3C0000000 %171 = fadd float %170, %150 %172 = fmul float %165, 0.000000e+00 %173 = fadd float %172, %152 %174 = call i32 @llvm.SI.packf16(float %167, float %169) %175 = bitcast i32 %174 to float %176 = call i32 @llvm.SI.packf16(float %171, float %173) %177 = bitcast i32 %176 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %175, float %177, float %175, float %177) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400 v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401 v_interp_p1_f32 v0, v0, 1, 1, [m0] ; C8000500 v_interp_p2_f32 v0, [v0], v1, 1, 1, [m0] ; C8010501 v_mov_b32_e32 v1, 0x40400000 ; 7E0202FF 40400000 v_mad_f32 v5, -v4, v1, v2 ; D2820005 240A0304 v_mad_f32 v6, -v0, v1, v3 ; D2820006 240E0300 v_mov_b32_e32 v1, 0x3ca7ef9e ; 7E0202FF 3CA7EF9E s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[7:10], 15, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[8:15], s[0:3] ; F0800F00 00020705 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v2, v1, v7 ; 10040F01 v_mul_f32_e32 v3, v1, v8 ; 10061101 v_mul_f32_e32 v7, v1, v9 ; 100E1301 v_mul_f32_e32 v8, 0, v10 ; 10101480 v_add_f32_e32 v9, v4, v5 ; 06120B04 v_add_f32_e32 v10, v0, v6 ; 06140D00 v_mov_b32_e32 v5, 0x3daf1aa0 ; 7E0A02FF 3DAF1AA0 image_sample v[11:14], 15, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[8:15], s[0:3] ; F0800F00 00020B09 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v2, v5, v11 ; 3E041705 v_mac_f32_e32 v3, v5, v12 ; 3E061905 v_mac_f32_e32 v7, v5, v13 ; 3E0E1B05 v_mac_f32_e32 v8, 0, v14 ; 3E101C80 v_add_f32_e32 v11, v4, v9 ; 06161304 v_add_f32_e32 v12, v0, v10 ; 06181500 v_mov_b32_e32 v6, 0x3e6d9168 ; 7E0C02FF 3E6D9168 image_sample v[13:16], 15, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[8:15], s[0:3] ; F0800F00 00020D0B s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v2, v6, v13 ; 3E041B06 v_mac_f32_e32 v3, v6, v14 ; 3E061D06 v_mac_f32_e32 v7, v6, v15 ; 3E0E1F06 v_mac_f32_e32 v8, 0, v16 ; 3E102080 v_add_f32_e32 v9, v4, v11 ; 06121704 v_add_f32_e32 v10, v0, v12 ; 06141900 v_mov_b32_e32 v11, 0x3ea5e354 ; 7E1602FF 3EA5E354 image_sample v[12:15], 15, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[8:15], s[0:3] ; F0800F00 00020C09 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v2, v11, v12 ; 3E04190B v_mac_f32_e32 v3, v11, v13 ; 3E061B0B v_mac_f32_e32 v7, v11, v14 ; 3E0E1D0B v_add_f32_e32 v8, v8, v15 ; 06101F08 v_add_f32_e32 v11, v4, v9 ; 06161304 v_add_f32_e32 v12, v0, v10 ; 06181500 v_add_f32_e32 v9, v4, v11 ; 06121704 v_add_f32_e32 v10, v0, v12 ; 06141900 v_add_f32_e32 v13, v4, v9 ; 061A1304 v_add_f32_e32 v14, v0, v10 ; 061C1500 image_sample v[15:18], 15, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[8:15], s[0:3] ; F0800F00 00020F0B image_sample v[9:12], 15, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[8:15], s[0:3] ; F0800F00 00020909 image_sample v[19:22], 15, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[8:15], s[0:3] ; F0800F00 0002130D s_waitcnt vmcnt(2) ; BF8C0772 v_mac_f32_e32 v2, v6, v15 ; 3E041F06 v_mac_f32_e32 v3, v6, v16 ; 3E062106 v_mac_f32_e32 v7, v6, v17 ; 3E0E2306 v_mac_f32_e32 v8, 0, v18 ; 3E102480 s_waitcnt vmcnt(1) ; BF8C0771 v_mac_f32_e32 v2, v5, v9 ; 3E041305 v_mac_f32_e32 v3, v5, v10 ; 3E061505 v_mac_f32_e32 v7, v5, v11 ; 3E0E1705 v_mac_f32_e32 v8, 0, v12 ; 3E101880 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v2, v1, v19 ; 3E042701 v_mac_f32_e32 v3, v1, v20 ; 3E062901 v_mac_f32_e32 v7, v1, v21 ; 3E0E2B01 v_mac_f32_e32 v8, 0, v22 ; 3E102C80 v_cvt_pkrtz_f16_f32_e32 v0, v2, v3 ; 5E000702 v_cvt_pkrtz_f16_f32_e32 v1, v7, v8 ; 5E021107 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 24 Code Size: 372 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL CONST[0..17] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: FSNE TEMP[0].x, CONST[1].zzzz, IMM[0].xxxx 1: UIF TEMP[0].xxxx :0 2: MUL TEMP[0], CONST[2], IN[0].xxxx 3: MAD TEMP[0], CONST[3], IN[0].yyyy, TEMP[0] 4: MAD TEMP[0], CONST[4], IN[0].zzzz, TEMP[0] 5: ADD TEMP[0].xyz, TEMP[0], CONST[5] 6: MOV TEMP[1].x, CONST[6].xxxx 7: MOV TEMP[1].y, CONST[7].xxxx 8: MOV TEMP[1].z, CONST[8].xxxx 9: MOV TEMP[2].x, CONST[6].yyyy 10: MOV TEMP[2].y, CONST[7].yyyy 11: MOV TEMP[2].z, CONST[8].yyyy 12: MOV TEMP[3].x, CONST[6].zzzz 13: MOV TEMP[3].y, CONST[7].zzzz 14: MOV TEMP[3].z, CONST[8].zzzz 15: MUL TEMP[1].xyz, TEMP[1].xyzz, IN[1].xxxx 16: MAD TEMP[1].xyz, TEMP[2].xyzz, IN[1].yyyy, TEMP[1].xyzz 17: MAD TEMP[1].xyz, TEMP[3].xyzz, IN[1].zzzz, TEMP[1].xyzz 18: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz 19: RSQ TEMP[2].x, TEMP[2].xxxx 20: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx 21: MUL TEMP[2].xyz, TEMP[0].xyzz, CONST[0].wwww 22: ADD TEMP[2].xyz, CONST[0].xyzz, -TEMP[2].xyzz 23: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz 24: RSQ TEMP[3].x, TEMP[3].xxxx 25: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx 26: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[2].xyzz 27: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[2].xxxx 28: ADD TEMP[2].x, IMM[0].yyyy, -TEMP[2].xxxx 29: SQRT TEMP[2].x, TEMP[2].xxxx 30: MUL TEMP[2].x, CONST[1].zzzz, TEMP[2].xxxx 31: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx 32: ADD TEMP[0].xyz, TEMP[0].xyzz, -TEMP[1].xyzz 33: MUL TEMP[1], CONST[10], TEMP[0].xxxx 34: MAD TEMP[1], CONST[11], TEMP[0].yyyy, TEMP[1] 35: MAD TEMP[0], CONST[12], TEMP[0].zzzz, TEMP[1] 36: ADD TEMP[0], TEMP[0], CONST[13] 37: ELSE :0 38: MUL TEMP[1], CONST[14], IN[0].xxxx 39: MAD TEMP[1], CONST[15], IN[0].yyyy, TEMP[1] 40: MAD TEMP[1], CONST[16], IN[0].zzzz, TEMP[1] 41: ADD TEMP[0], TEMP[1], CONST[17] 42: ENDIF 43: MOV TEMP[1].xyw, TEMP[0].xyxw 44: RCP TEMP[2].x, TEMP[0].wwww 45: MUL TEMP[2].x, CONST[1].xxxx, TEMP[2].xxxx 46: MOV_SAT TEMP[2].x, TEMP[2].xxxx 47: ADD TEMP[2].x, TEMP[0].zzzz, TEMP[2].xxxx 48: MAX TEMP[0].x, TEMP[2].xxxx, -TEMP[0].wwww 49: LRP TEMP[0].x, CONST[1].yyyy, TEMP[0].xxxx, TEMP[2].xxxx 50: MOV TEMP[1].z, TEMP[0].xxxx 51: MOV OUT[0], TEMP[1] 52: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %17 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 %19 = add i32 %5, %7 %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %19) %21 = extractelement <4 x float> %20, i32 0 %22 = extractelement <4 x float> %20, i32 1 %23 = extractelement <4 x float> %20, i32 2 %24 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = add i32 %5, %7 %27 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %25, i32 0, i32 %26) %28 = extractelement <4 x float> %27, i32 0 %29 = extractelement <4 x float> %27, i32 1 %30 = extractelement <4 x float> %27, i32 2 %31 = fcmp une float %16, 0.000000e+00 br i1 %31, label %IF, label %ELSE IF: ; preds = %main_body %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 204) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 172) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %72 = fmul float %68, %21 %73 = fmul float %67, %21 %74 = fmul float %66, %21 %75 = fmul float %65, %22 %76 = fadd float %75, %72 %77 = fmul float %64, %22 %78 = fadd float %77, %73 %79 = fmul float %63, %22 %80 = fadd float %79, %74 %81 = fmul float %62, %23 %82 = fadd float %81, %76 %83 = fmul float %61, %23 %84 = fadd float %83, %78 %85 = fmul float %60, %23 %86 = fadd float %85, %80 %87 = fadd float %82, %59 %88 = fadd float %84, %58 %89 = fadd float %86, %57 %90 = fmul float %56, %28 %91 = fmul float %53, %28 %92 = fmul float %50, %28 %93 = fmul float %55, %29 %94 = fadd float %93, %90 %95 = fmul float %52, %29 %96 = fadd float %95, %91 %97 = fmul float %49, %29 %98 = fadd float %97, %92 %99 = fmul float %54, %30 %100 = fadd float %99, %94 %101 = fmul float %51, %30 %102 = fadd float %101, %96 %103 = fmul float %48, %30 %104 = fadd float %103, %98 %105 = fmul float %100, %100 %106 = fmul float %102, %102 %107 = fadd float %106, %105 %108 = fmul float %104, %104 %109 = fadd float %107, %108 %110 = call float @llvm.AMDGPU.rsq.clamped.f32(float %109) %111 = fmul float %100, %110 %112 = fmul float %102, %110 %113 = fmul float %104, %110 %114 = fmul float %87, %13 %115 = fmul float %88, %13 %116 = fmul float %89, %13 %117 = fsub float %71, %114 %118 = fsub float %70, %115 %119 = fsub float %69, %116 %120 = fmul float %117, %117 %121 = fmul float %118, %118 %122 = fadd float %121, %120 %123 = fmul float %119, %119 %124 = fadd float %122, %123 %125 = call float @llvm.AMDGPU.rsq.clamped.f32(float %124) %126 = fmul float %117, %125 %127 = fmul float %118, %125 %128 = fmul float %119, %125 %129 = fmul float %111, %126 %130 = fmul float %112, %127 %131 = fadd float %130, %129 %132 = fmul float %113, %128 %133 = fadd float %131, %132 %134 = fmul float %133, %133 %135 = fsub float 1.000000e+00, %134 %136 = call float @llvm.sqrt.f32(float %135) %137 = fmul float %16, %136 %138 = fmul float %111, %137 %139 = fmul float %112, %137 %140 = fmul float %113, %137 %141 = fsub float %87, %138 %142 = fsub float %88, %139 %143 = fsub float %89, %140 %144 = fmul float %47, %141 %145 = fmul float %46, %141 %146 = fmul float %45, %141 %147 = fmul float %44, %141 %148 = fmul float %43, %142 %149 = fadd float %148, %144 %150 = fmul float %42, %142 %151 = fadd float %150, %145 %152 = fmul float %41, %142 %153 = fadd float %152, %146 %154 = fmul float %40, %142 %155 = fadd float %154, %147 %156 = fmul float %39, %143 %157 = fadd float %156, %149 %158 = fmul float %38, %143 %159 = fadd float %158, %151 %160 = fmul float %37, %143 %161 = fadd float %160, %153 %162 = fmul float %36, %143 %163 = fadd float %162, %155 %164 = fadd float %157, %35 %165 = fadd float %159, %34 %166 = fadd float %161, %33 %167 = fadd float %163, %32 br label %ENDIF ELSE: ; preds = %main_body %168 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284) %169 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280) %170 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276) %171 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272) %172 = call float @llvm.SI.load.const(<16 x i8> %12, i32 268) %173 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264) %174 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260) %175 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256) %176 = call float @llvm.SI.load.const(<16 x i8> %12, i32 252) %177 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248) %178 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244) %179 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240) %180 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236) %181 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232) %182 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228) %183 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224) %184 = fmul float %183, %21 %185 = fmul float %182, %21 %186 = fmul float %181, %21 %187 = fmul float %180, %21 %188 = fmul float %179, %22 %189 = fadd float %188, %184 %190 = fmul float %178, %22 %191 = fadd float %190, %185 %192 = fmul float %177, %22 %193 = fadd float %192, %186 %194 = fmul float %176, %22 %195 = fadd float %194, %187 %196 = fmul float %175, %23 %197 = fadd float %196, %189 %198 = fmul float %174, %23 %199 = fadd float %198, %191 %200 = fmul float %173, %23 %201 = fadd float %200, %193 %202 = fmul float %172, %23 %203 = fadd float %202, %195 %204 = fadd float %197, %171 %205 = fadd float %199, %170 %206 = fadd float %201, %169 %207 = fadd float %203, %168 br label %ENDIF ENDIF: ; preds = %ELSE, %IF %temp.0 = phi float [ %164, %IF ], [ %204, %ELSE ] %temp1.0 = phi float [ %165, %IF ], [ %205, %ELSE ] %temp2.0 = phi float [ %166, %IF ], [ %206, %ELSE ] %temp3.0 = phi float [ %167, %IF ], [ %207, %ELSE ] %208 = fdiv float 1.000000e+00, %temp3.0 %209 = fmul float %14, %208 %210 = call float @llvm.AMDIL.clamp.(float %209, float 0.000000e+00, float 1.000000e+00) %211 = fadd float %temp2.0, %210 %212 = fsub float -0.000000e+00, %temp3.0 %213 = call float @llvm.maxnum.f32(float %211, float %212) %214 = call float @llvm.AMDGPU.lrp(float %15, float %213, float %211) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %temp.0, float %temp1.0, float %214, float %temp3.0) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v4, s10, v0 ; 4A08000A s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v4, s[4:7], 0 idxen ; E00C2000 80010004 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[3:6], v4, s[8:11], 0 idxen ; E00C2000 80020304 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_cmp_eq_f32_e64 s[6:7], 0, s4 ; D0040006 00000880 s_and_saveexec_b64 s[6:7], s[6:7] ; BE862406 s_xor_b64 s[6:7], exec, s[6:7] ; 8986067E s_cbranch_execz BB0_1 ; BF880000 s_buffer_load_dword s5, s[0:3], 0x47 ; C2028147 s_buffer_load_dword s8, s[0:3], 0x42 ; C2040142 s_buffer_load_dword s9, s[0:3], 0x43 ; C2048143 s_buffer_load_dword s10, s[0:3], 0x44 ; C2050144 s_buffer_load_dword s11, s[0:3], 0x45 ; C2058145 s_buffer_load_dword s12, s[0:3], 0x46 ; C2060146 s_buffer_load_dword s13, s[0:3], 0x3d ; C206813D s_buffer_load_dword s14, s[0:3], 0x3e ; C207013E s_buffer_load_dword s15, s[0:3], 0x3f ; C207813F s_buffer_load_dword s16, s[0:3], 0x40 ; C2080140 s_buffer_load_dword s17, s[0:3], 0x41 ; C2088141 s_buffer_load_dword s18, s[0:3], 0x38 ; C2090138 s_buffer_load_dword s19, s[0:3], 0x39 ; C2098139 s_buffer_load_dword s20, s[0:3], 0x3a ; C20A013A s_buffer_load_dword s21, s[0:3], 0x3b ; C20A813B s_buffer_load_dword s22, s[0:3], 0x3c ; C20B013C s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s18, v0 ; 100C0012 v_mul_f32_e32 v7, s19, v0 ; 100E0013 v_mul_f32_e32 v9, s20, v0 ; 10120014 v_mul_f32_e32 v10, s21, v0 ; 10140015 v_mac_f32_e32 v6, s22, v1 ; 3E0C0216 v_mac_f32_e32 v7, s13, v1 ; 3E0E020D v_mac_f32_e32 v9, s14, v1 ; 3E12020E v_mac_f32_e32 v10, s15, v1 ; 3E14020F v_mac_f32_e32 v6, s16, v2 ; 3E0C0410 v_mac_f32_e32 v7, s17, v2 ; 3E0E0411 v_mac_f32_e32 v9, s8, v2 ; 3E120408 v_mac_f32_e32 v10, s9, v2 ; 3E140409 v_add_f32_e32 v6, s10, v6 ; 060C0C0A v_add_f32_e32 v8, s11, v7 ; 06100E0B v_add_f32_e32 v9, s12, v9 ; 0612120C v_add_f32_e32 v7, s5, v10 ; 060E1405 s_or_saveexec_b64 s[6:7], s[6:7] ; BE862506 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_waitcnt lgkmcnt(0) ; BF8C007F s_xor_b64 exec, exec, s[6:7] ; 89FE067E s_cbranch_execz BB0_4 ; BF880000 s_buffer_load_dword s9, s[0:3], 0x37 ; C2048137 s_buffer_load_dword s10, s[0:3], 0x32 ; C2050132 s_buffer_load_dword s11, s[0:3], 0x33 ; C2058133 s_buffer_load_dword s12, s[0:3], 0x34 ; C2060134 s_buffer_load_dword s13, s[0:3], 0x35 ; C2068135 s_buffer_load_dword s14, s[0:3], 0x36 ; C2070136 s_buffer_load_dword s15, s[0:3], 0x2d ; C207812D s_buffer_load_dword s16, s[0:3], 0x2e ; C208012E s_buffer_load_dword s17, s[0:3], 0x2f ; C208812F s_buffer_load_dword s18, s[0:3], 0x30 ; C2090130 s_buffer_load_dword s19, s[0:3], 0x31 ; C2098131 s_buffer_load_dword s20, s[0:3], 0x28 ; C20A0128 s_buffer_load_dword s21, s[0:3], 0x29 ; C20A8129 s_buffer_load_dword s22, s[0:3], 0x2a ; C20B012A s_buffer_load_dword s23, s[0:3], 0x2b ; C20B812B s_buffer_load_dword s24, s[0:3], 0x2c ; C20C012C s_buffer_load_dword s25, s[0:3], 0x1d ; C20C811D s_buffer_load_dword s26, s[0:3], 0x1e ; C20D011E s_buffer_load_dword s27, s[0:3], 0x20 ; C20D8120 s_buffer_load_dword s28, s[0:3], 0x21 ; C20E0121 s_buffer_load_dword s29, s[0:3], 0x22 ; C20E8122 s_buffer_load_dword s30, s[0:3], 0x16 ; C20F0116 s_buffer_load_dword s31, s[0:3], 0x18 ; C20F8118 s_buffer_load_dword s32, s[0:3], 0x19 ; C2100119 s_buffer_load_dword s33, s[0:3], 0x1a ; C210811A s_buffer_load_dword s34, s[0:3], 0x1c ; C211011C s_buffer_load_dword s35, s[0:3], 0x10 ; C2118110 s_buffer_load_dword s36, s[0:3], 0x11 ; C2120111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s27, v3 ; 100C061B v_mac_f32_e32 v6, s28, v4 ; 3E0C081C v_mac_f32_e32 v6, s29, v5 ; 3E0C0A1D s_buffer_load_dword s27, s[0:3], 0x12 ; C20D8112 v_mul_f32_e32 v7, s31, v3 ; 100E061F v_mac_f32_e32 v7, s32, v4 ; 3E0E0820 v_mac_f32_e32 v7, s33, v5 ; 3E0E0A21 v_mul_f32_e32 v3, s34, v3 ; 10060622 v_mac_f32_e32 v3, s25, v4 ; 3E060819 v_mac_f32_e32 v3, s26, v5 ; 3E060A1A s_buffer_load_dword s25, s[0:3], 0x14 ; C20C8114 s_buffer_load_dword s26, s[0:3], 0x15 ; C20D0115 s_buffer_load_dword s28, s[0:3], 0x9 ; C20E0109 s_buffer_load_dword s29, s[0:3], 0xa ; C20E810A s_buffer_load_dword s31, s[0:3], 0xc ; C20F810C s_buffer_load_dword s32, s[0:3], 0xd ; C210010D s_buffer_load_dword s33, s[0:3], 0xe ; C210810E s_buffer_load_dword s34, s[0:3], 0x0 ; C2110100 v_mul_f32_e32 v4, v7, v7 ; 10080F07 v_mac_f32_e32 v4, v3, v3 ; 3E080703 v_mac_f32_e32 v4, v6, v6 ; 3E080D06 v_rsq_clamp_f32_e32 v4, v4 ; 7E085904 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s28, v0 ; 100A001C v_mul_f32_e32 v8, s29, v0 ; 1010001D s_buffer_load_dword s28, s[0:3], 0x1 ; C20E0101 v_mac_f32_e32 v5, s32, v1 ; 3E0A0220 v_mac_f32_e32 v8, s33, v1 ; 3E100221 v_mac_f32_e32 v5, s36, v2 ; 3E0A0424 v_mac_f32_e32 v8, s27, v2 ; 3E10041B v_add_f32_e32 v5, s26, v5 ; 060A0A1A v_add_f32_e32 v8, s30, v8 ; 0610101E s_buffer_load_dword s26, s[0:3], 0x2 ; C20D0102 s_buffer_load_dword s27, s[0:3], 0x3 ; C20D8103 s_buffer_load_dword s29, s[0:3], 0x8 ; C20E8108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v9, s27 ; 7E12021B v_mad_f32 v10, -v5, v9, s28 ; D282000A 20721305 v_mul_f32_e32 v0, s29, v0 ; 1000001D v_mac_f32_e32 v0, s31, v1 ; 3E00021F v_mac_f32_e32 v0, s35, v2 ; 3E000423 v_add_f32_e32 v0, s25, v0 ; 06000019 v_mad_f32 v1, -v0, v9, s34 ; D2820001 208A1300 v_mad_f32 v2, -v8, v9, s26 ; D2820002 206A1308 v_mul_f32_e32 v9, v1, v1 ; 10120301 v_mac_f32_e32 v9, v10, v10 ; 3E12150A v_mac_f32_e32 v9, v2, v2 ; 3E120502 v_rsq_clamp_f32_e32 v9, v9 ; 7E125909 v_mul_f32_e32 v7, v4, v7 ; 100E0F04 v_mul_f32_e32 v3, v4, v3 ; 10060704 v_mul_f32_e32 v4, v4, v6 ; 10080D04 v_mul_f32_e32 v1, v9, v1 ; 10020309 v_mul_f32_e32 v6, v9, v10 ; 100C1509 v_mul_f32_e32 v2, v9, v2 ; 10040509 v_mul_f32_e32 v1, v1, v7 ; 10020F01 v_mac_f32_e32 v1, v6, v3 ; 3E020706 v_mac_f32_e32 v1, v2, v4 ; 3E020902 v_mad_f32 v1, -v1, v1, 1.0 ; D2820001 23CA0301 v_sqrt_f32_e32 v1, v1 ; 7E026701 v_mul_f32_e32 v1, s4, v1 ; 10020204 v_mad_f32 v0, -v7, v1, v0 ; D2820000 24020307 v_mad_f32 v2, -v3, v1, v5 ; D2820002 24160303 v_mad_f32 v1, -v4, v1, v8 ; D2820001 24220304 v_mul_f32_e32 v3, s20, v0 ; 10060014 v_mul_f32_e32 v4, s21, v0 ; 10080015 v_mul_f32_e32 v5, s22, v0 ; 100A0016 v_mul_f32_e32 v0, s23, v0 ; 10000017 v_mac_f32_e32 v3, s24, v2 ; 3E060418 v_mac_f32_e32 v4, s15, v2 ; 3E08040F v_mac_f32_e32 v5, s16, v2 ; 3E0A0410 v_mac_f32_e32 v0, s17, v2 ; 3E000411 v_mac_f32_e32 v3, s18, v1 ; 3E060212 v_mac_f32_e32 v4, s19, v1 ; 3E080213 v_mac_f32_e32 v5, s10, v1 ; 3E0A020A v_mac_f32_e32 v0, s11, v1 ; 3E00020B v_add_f32_e32 v6, s12, v3 ; 060C060C v_add_f32_e32 v8, s13, v4 ; 0610080D v_add_f32_e32 v9, s14, v5 ; 06120A0E v_add_f32_e32 v7, s9, v0 ; 060E0009 s_or_b64 exec, exec, s[6:7] ; 88FE067E v_rcp_f32_e32 v0, v7 ; 7E005507 v_sub_f32_e64 v1, 1.0, s5 ; D2080001 00000AF2 v_mul_f32_e32 v0, s8, v0 ; 10000008 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_add_f32_e32 v0, v0, v9 ; 06001300 v_max_f32_e64 v2, v0, -v7 ; D2200002 40020F00 v_mul_f32_e32 v0, v0, v1 ; 10000300 v_mac_f32_e32 v0, s5, v2 ; 3E000405 exp 15, 12, 0, 1, 0, v6, v8, v0, v7 ; F80008CF 07000806 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 12 Code Size: 744 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL OUT[0], COLOR IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MOV OUT[0], IMM[0].xxxx 1: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00) %23 = bitcast i32 %22 to float %24 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00) %25 = bitcast i32 %24 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %23, float %25, float %23, float %25) ret void } ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: v_cvt_pkrtz_f16_f32_e64 v0, 0, 0 ; D25E0000 00010080 exp 15, 0, 1, 1, 1, v0, v0, v0, v0 ; F8001C0F 00000000 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 8 VGPRS: 4 Code Size: 20 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..7] DCL TEMP[0..1], LOCAL 0: MUL TEMP[0], CONST[4], IN[0].xxxx 1: MAD TEMP[0], CONST[5], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[6], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[7], IN[0].wwww, TEMP[0] 4: MUL TEMP[1], CONST[0], IN[0].xxxx 5: MAD TEMP[1], CONST[1], IN[0].yyyy, TEMP[1] 6: MAD TEMP[1], CONST[2], IN[0].zzzz, TEMP[1] 7: MAD TEMP[1].xyz, CONST[3], IN[0].wwww, TEMP[1] 8: MOV TEMP[1].xyz, TEMP[1].xyzx 9: MOV OUT[1], TEMP[0] 10: MOV OUT[2], TEMP[1] 11: MOV OUT[0], TEMP[0] 12: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %44 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %45 = load <16 x i8>, <16 x i8> addrspace(2)* %44, align 16, !tbaa !0 %46 = add i32 %5, %7 %47 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %45, i32 0, i32 %46) %48 = extractelement <4 x float> %47, i32 0 %49 = extractelement <4 x float> %47, i32 1 %50 = extractelement <4 x float> %47, i32 2 %51 = extractelement <4 x float> %47, i32 3 %52 = fmul float %28, %48 %53 = fmul float %29, %48 %54 = fmul float %30, %48 %55 = fmul float %31, %48 %56 = fmul float %32, %49 %57 = fadd float %56, %52 %58 = fmul float %33, %49 %59 = fadd float %58, %53 %60 = fmul float %34, %49 %61 = fadd float %60, %54 %62 = fmul float %35, %49 %63 = fadd float %62, %55 %64 = fmul float %36, %50 %65 = fadd float %64, %57 %66 = fmul float %37, %50 %67 = fadd float %66, %59 %68 = fmul float %38, %50 %69 = fadd float %68, %61 %70 = fmul float %39, %50 %71 = fadd float %70, %63 %72 = fmul float %40, %51 %73 = fadd float %72, %65 %74 = fmul float %41, %51 %75 = fadd float %74, %67 %76 = fmul float %42, %51 %77 = fadd float %76, %69 %78 = fmul float %43, %51 %79 = fadd float %78, %71 %80 = fmul float %13, %48 %81 = fmul float %14, %48 %82 = fmul float %15, %48 %83 = fmul float %16, %48 %84 = fmul float %17, %49 %85 = fadd float %84, %80 %86 = fmul float %18, %49 %87 = fadd float %86, %81 %88 = fmul float %19, %49 %89 = fadd float %88, %82 %90 = fmul float %20, %49 %91 = fadd float %90, %83 %92 = fmul float %21, %50 %93 = fadd float %92, %85 %94 = fmul float %22, %50 %95 = fadd float %94, %87 %96 = fmul float %23, %50 %97 = fadd float %96, %89 %98 = fmul float %24, %50 %99 = fadd float %98, %91 %100 = fmul float %25, %51 %101 = fadd float %100, %93 %102 = fmul float %26, %51 %103 = fadd float %102, %95 %104 = fmul float %27, %51 %105 = fadd float %104, %97 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %73, float %75, float %77, float %79) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %101, float %103, float %105, float %99) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %73, float %75, float %77, float %79) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s19, s[0:3], 0x10 ; C2098110 s_buffer_load_dword s20, s[0:3], 0x11 ; C20A0111 s_buffer_load_dword s21, s[0:3], 0x12 ; C20A8112 s_buffer_load_dword s22, s[0:3], 0x13 ; C20B0113 s_buffer_load_dword s23, s[0:3], 0x14 ; C20B8114 s_buffer_load_dword s24, s[0:3], 0x15 ; C20C0115 s_buffer_load_dword s25, s[0:3], 0x16 ; C20C8116 s_buffer_load_dword s26, s[0:3], 0x17 ; C20D0117 s_buffer_load_dword s27, s[0:3], 0x18 ; C20D8118 s_buffer_load_dword s28, s[0:3], 0x19 ; C20E0119 s_buffer_load_dword s29, s[0:3], 0x1a ; C20E811A s_buffer_load_dword s30, s[0:3], 0x1b ; C20F011B s_buffer_load_dword s31, s[0:3], 0x1c ; C20F811C s_buffer_load_dword s32, s[0:3], 0x1d ; C210011D s_buffer_load_dword s33, s[0:3], 0x1e ; C210811E s_buffer_load_dword s0, s[0:3], 0x1f ; C200011F s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s19, v0 ; 10080013 v_mac_f32_e32 v4, s23, v1 ; 3E080217 v_mul_f32_e32 v5, s20, v0 ; 100A0014 v_mac_f32_e32 v5, s24, v1 ; 3E0A0218 v_mul_f32_e32 v6, s21, v0 ; 100C0015 v_mac_f32_e32 v6, s25, v1 ; 3E0C0219 v_mul_f32_e32 v7, s22, v0 ; 100E0016 v_mac_f32_e32 v7, s26, v1 ; 3E0E021A v_mul_f32_e32 v8, s4, v0 ; 10100004 v_mac_f32_e32 v8, s8, v1 ; 3E100208 v_mul_f32_e32 v9, s5, v0 ; 10120005 v_mac_f32_e32 v9, s9, v1 ; 3E120209 v_mul_f32_e32 v10, s6, v0 ; 10140006 v_mac_f32_e32 v10, s10, v1 ; 3E14020A v_mul_f32_e32 v0, s7, v0 ; 10000007 v_mac_f32_e32 v0, s11, v1 ; 3E00020B v_mac_f32_e32 v4, s27, v2 ; 3E08041B v_mac_f32_e32 v5, s28, v2 ; 3E0A041C v_mac_f32_e32 v6, s29, v2 ; 3E0C041D v_mac_f32_e32 v7, s30, v2 ; 3E0E041E v_mac_f32_e32 v8, s12, v2 ; 3E10040C v_mac_f32_e32 v9, s13, v2 ; 3E12040D v_mac_f32_e32 v10, s14, v2 ; 3E14040E v_mac_f32_e32 v0, s15, v2 ; 3E00040F v_mac_f32_e32 v4, s31, v3 ; 3E08061F v_mac_f32_e32 v5, s32, v3 ; 3E0A0620 v_mac_f32_e32 v6, s33, v3 ; 3E0C0621 v_mac_f32_e32 v7, s0, v3 ; 3E0E0600 v_mac_f32_e32 v8, s16, v3 ; 3E100610 v_mac_f32_e32 v9, s17, v3 ; 3E120611 v_mac_f32_e32 v10, s18, v3 ; 3E140612 exp 15, 32, 0, 0, 0, v4, v5, v6, v7 ; F800020F 07060504 exp 15, 33, 0, 0, 0, v8, v9, v10, v0 ; F800021F 000A0908 exp 15, 12, 0, 1, 0, v4, v5, v6, v7 ; F80008CF 07060504 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 12 Code Size: 308 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[0..2] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 0.5000, 0.0000, 1.0000, 0.3000} IMM[1] FLT32 { 0.8000, 0.0500, 0.0000, 0.0000} 0: MUL TEMP[0].xyw, IN[0], IMM[0].xxxx 1: MOV TEMP[1].x, TEMP[0].xxxx 2: MUL TEMP[2].x, TEMP[0].yyyy, CONST[1].xxxx 3: MOV TEMP[1].y, TEMP[2].xxxx 4: ADD TEMP[0].xy, TEMP[1].xyyy, TEMP[0].wwww 5: MOV TEMP[1].xyz, IMM[0].yyyy 6: ADD TEMP[2].xyz, IN[1].xyzz, -CONST[0].xyzz 7: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz 8: RSQ TEMP[3].x, TEMP[3].xxxx 9: MUL TEMP[2].y, TEMP[2].xyzz, TEMP[3].xxxx 10: MAD TEMP[2].x, TEMP[2].yyyy, IMM[0].wwww, IMM[1].xxxx 11: MOV_SAT TEMP[2].x, TEMP[2].xxxx 12: ADD TEMP[2].x, IMM[0].zzzz, -TEMP[2].xxxx 13: MOV_SAT TEMP[2].x, TEMP[2].xxxx 14: MOV TEMP[0].xy, TEMP[0].xyyy 15: MOV TEMP[0].w, IN[0].wwww 16: TXP TEMP[0].x, TEMP[0], SAMP[0], 2D 17: MAD TEMP[0].x, CONST[2].zzzz, TEMP[0].xxxx, CONST[2].wwww 18: RCP TEMP[0].x, TEMP[0].xxxx 19: ADD TEMP[0].x, TEMP[0].xxxx, -IN[0].wwww 20: MUL TEMP[0].x, TEMP[0].xxxx, IMM[1].yyyy 21: MOV_SAT TEMP[0].x, TEMP[0].xxxx 22: MUL TEMP[0].x, TEMP[2].xxxx, TEMP[0].xxxx 23: MIN TEMP[0].x, TEMP[0].xxxx, IMM[0].xxxx 24: MOV_SAT TEMP[0].x, TEMP[0].xxxx 25: MOV TEMP[1].w, TEMP[0].xxxx 26: MOV OUT[0], TEMP[1] 27: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) %30 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %31 = load <32 x i8>, <32 x i8> addrspace(2)* %30, align 32, !tbaa !0 %32 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %33 = load <16 x i8>, <16 x i8> addrspace(2)* %32, align 16, !tbaa !0 %34 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %35 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %36 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %37 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %38 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %39 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %40 = fmul float %34, 5.000000e-01 %41 = fmul float %35, 5.000000e-01 %42 = fmul float %36, 5.000000e-01 %43 = fmul float %41, %27 %44 = fadd float %40, %42 %45 = fadd float %43, %42 %46 = fsub float %37, %24 %47 = fsub float %38, %25 %48 = fsub float %39, %26 %49 = fmul float %46, %46 %50 = fmul float %47, %47 %51 = fadd float %50, %49 %52 = fmul float %48, %48 %53 = fadd float %51, %52 %54 = call float @llvm.AMDGPU.rsq.clamped.f32(float %53) %55 = fmul float %47, %54 %56 = fmul float %55, 0x3FD3333340000000 %57 = fadd float %56, 0x3FE99999A0000000 %58 = call float @llvm.AMDIL.clamp.(float %57, float 0.000000e+00, float 1.000000e+00) %59 = fsub float 1.000000e+00, %58 %60 = call float @llvm.AMDIL.clamp.(float %59, float 0.000000e+00, float 1.000000e+00) %61 = fdiv float %44, %36 %62 = fdiv float %45, %36 %63 = bitcast float %61 to i32 %64 = bitcast float %62 to i32 %65 = insertelement <2 x i32> undef, i32 %63, i32 0 %66 = insertelement <2 x i32> %65, i32 %64, i32 1 %67 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %66, <32 x i8> %31, <16 x i8> %33, i32 2) %68 = extractelement <4 x float> %67, i32 0 %69 = fmul float %28, %68 %70 = fadd float %69, %29 %71 = fdiv float 1.000000e+00, %70 %72 = fsub float %71, %36 %73 = fmul float %72, 0x3FA99999A0000000 %74 = call float @llvm.AMDIL.clamp.(float %73, float 0.000000e+00, float 1.000000e+00) %75 = fmul float %60, %74 %76 = call float @llvm.minnum.f32(float %75, float 5.000000e-01) %77 = call float @llvm.AMDIL.clamp.(float %76, float 0.000000e+00, float 1.000000e+00) %78 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00) %79 = bitcast i32 %78 to float %80 = call i32 @llvm.SI.packf16(float 0.000000e+00, float %77) %81 = bitcast i32 %80 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %79, float %81, float %79, float %81) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100 s_buffer_load_dword s9, s[0:3], 0x1 ; C2048101 s_buffer_load_dword s10, s[0:3], 0x2 ; C2050102 s_buffer_load_dword s11, s[0:3], 0x4 ; C2058104 s_buffer_load_dword s12, s[0:3], 0xa ; C206010A s_buffer_load_dword s0, s[0:3], 0xb ; C200010B v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 3, 0, [m0] ; C8100300 v_interp_p2_f32 v4, [v4], v1, 3, 0, [m0] ; C8110301 v_interp_p1_f32 v5, v0, 0, 1, [m0] ; C8140400 v_interp_p2_f32 v5, [v5], v1, 0, 1, [m0] ; C8150401 v_interp_p1_f32 v6, v0, 1, 1, [m0] ; C8180500 v_interp_p2_f32 v6, [v6], v1, 1, 1, [m0] ; C8190501 v_interp_p1_f32 v0, v0, 2, 1, [m0] ; C8000600 v_interp_p2_f32 v0, [v0], v1, 2, 1, [m0] ; C8010601 v_mul_f32_e32 v1, 0.5, v3 ; 100206F0 v_mul_f32_e32 v3, 0.5, v4 ; 100608F0 v_mad_f32 v2, 0.5, v2, v3 ; D2820002 040E04F0 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v3, s11, v1 ; 3E06020B s_load_dwordx4 s[16:19], s[4:5], 0x0 ; C0880500 v_subrev_f32_e32 v1, s8, v5 ; 0A020A08 v_subrev_f32_e32 v5, s9, v6 ; 0A0A0C09 v_subrev_f32_e32 v0, s10, v0 ; 0A00000A v_mov_b32_e32 v6, 0x6f800000 ; 7E0C02FF 6F800000 v_cmp_gt_f32_e64 vcc, |v4|, v6 ; D008016A 00020D04 v_mov_b32_e32 v6, 0x2f800000 ; 7E0C02FF 2F800000 v_cndmask_b32_e32 v6, 1.0, v6 ; 000C0CF2 v_mul_f32_e32 v7, v6, v4 ; 100E0906 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 v_rcp_f32_e32 v7, v7 ; 7E0E5507 v_mul_f32_e32 v1, v1, v1 ; 10020301 v_mac_f32_e32 v1, v5, v5 ; 3E020B05 v_mac_f32_e32 v1, v0, v0 ; 3E020100 v_mul_f32_e32 v0, v7, v2 ; 10000507 v_mul_f32_e32 v2, v7, v3 ; 10040707 v_rsq_clamp_f32_e32 v1, v1 ; 7E025901 v_mul_f32_e32 v7, v0, v6 ; 100E0D00 v_mul_f32_e32 v8, v2, v6 ; 10100D02 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v0, 1, 0, 0, 0, 0, 0, 0, 0, v[7:8], s[4:11], s[16:19] ; F0800100 00810007 v_mul_f32_e32 v1, v1, v5 ; 10020B01 v_mov_b32_e32 v2, 0x3f4ccccd ; 7E0402FF 3F4CCCCD v_madmk_f32_e32 v1, v1, v2, 0x3e99999a ; 40020501 3E99999A v_mov_b32_e32 v2, s0 ; 7E040200 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v2, s12, v0 ; 3E04000C v_rcp_f32_e32 v0, v2 ; 7E005502 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_sub_f32_e32 v1, 1.0, v1 ; 080202F2 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_subrev_f32_e32 v0, v4, v0 ; 0A000104 v_mul_f32_e32 v0, 0x3d4ccccd, v0 ; 100000FF 3D4CCCCD v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_mul_f32_e32 v0, v0, v1 ; 10000300 v_min_f32_e32 v0, 0.5, v0 ; 1E0000F0 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_cvt_pkrtz_f16_f32_e64 v1, 0, 0 ; D25E0001 00010080 v_cvt_pkrtz_f16_f32_e32 v0, 0, v0 ; 5E000080 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 328 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..3] DCL TEMP[0..1], LOCAL 0: MUL TEMP[0], CONST[0], IN[0].xxxx 1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0] 4: MOV TEMP[1].xyz, IN[1].xyzx 5: MOV OUT[1], TEMP[0] 6: MOV OUT[2], TEMP[1] 7: MOV OUT[0], TEMP[0] 8: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = fmul float %13, %33 %45 = fmul float %14, %33 %46 = fmul float %15, %33 %47 = fmul float %16, %33 %48 = fmul float %17, %34 %49 = fadd float %48, %44 %50 = fmul float %18, %34 %51 = fadd float %50, %45 %52 = fmul float %19, %34 %53 = fadd float %52, %46 %54 = fmul float %20, %34 %55 = fadd float %54, %47 %56 = fmul float %21, %35 %57 = fadd float %56, %49 %58 = fmul float %22, %35 %59 = fadd float %58, %51 %60 = fmul float %23, %35 %61 = fadd float %60, %53 %62 = fmul float %24, %35 %63 = fadd float %62, %55 %64 = fmul float %25, %36 %65 = fadd float %64, %57 %66 = fmul float %26, %36 %67 = fadd float %66, %59 %68 = fmul float %27, %36 %69 = fadd float %68, %61 %70 = fmul float %28, %36 %71 = fadd float %70, %63 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %65, float %67, float %69, float %71) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %41, float %42, float %43, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %65, float %67, float %69, float %71) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[0:3], 0x0 ; C2060100 s_buffer_load_dword s13, s[0:3], 0x1 ; C2068101 buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[8:11], 0 idxen ; E00C2000 80020500 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_buffer_load_dword s5, s[0:3], 0x3 ; C2028103 s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104 s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105 s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106 s_buffer_load_dword s9, s[0:3], 0x7 ; C2048107 s_buffer_load_dword s10, s[0:3], 0x8 ; C2050108 s_buffer_load_dword s11, s[0:3], 0x9 ; C2058109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 v_mul_f32_e32 v0, s12, v1 ; 1000020C s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v0, s6, v2 ; 3E000406 v_mul_f32_e32 v8, s13, v1 ; 1010020D v_mac_f32_e32 v8, s7, v2 ; 3E100407 v_mul_f32_e32 v9, s4, v1 ; 10120204 v_mac_f32_e32 v9, s8, v2 ; 3E120408 v_mul_f32_e32 v1, s5, v1 ; 10020205 v_mac_f32_e32 v1, s9, v2 ; 3E020409 v_mac_f32_e32 v0, s10, v3 ; 3E00060A v_mac_f32_e32 v8, s11, v3 ; 3E10060B v_mac_f32_e32 v9, s14, v3 ; 3E12060E v_mac_f32_e32 v1, s15, v3 ; 3E02060F v_mac_f32_e32 v0, s16, v4 ; 3E000810 v_mac_f32_e32 v8, s17, v4 ; 3E100811 v_mac_f32_e32 v9, s18, v4 ; 3E120812 v_mac_f32_e32 v1, s0, v4 ; 3E020800 exp 15, 32, 0, 0, 0, v0, v8, v9, v1 ; F800020F 01090800 v_mov_b32_e32 v2, 0 ; 7E040280 exp 15, 33, 0, 0, 0, v5, v6, v7, v2 ; F800021F 02070605 exp 15, 12, 0, 1, 0, v0, v8, v9, v1 ; F80008CF 01090800 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 204 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL CONST[0] DCL CONST[2] DCL CONST[4] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 0.1000, 0.5000, 0.0150, 2.0000} IMM[1] FLT32 { -0.9961, 1.0000, 0.0000, 0.0000} 0: MUL TEMP[0].x, CONST[0].xxxx, IMM[0].xxxx 1: MOV TEMP[1].x, TEMP[0].xxxx 2: MUL TEMP[0].x, TEMP[0].xxxx, IMM[0].yyyy 3: MOV TEMP[1].y, TEMP[0].xxxx 4: MAD TEMP[0].xy, IN[1].xyyy, IMM[0].zzzz, TEMP[1].xyyy 5: MOV TEMP[0].xy, TEMP[0].xyyy 6: TEX TEMP[0].xyz, TEMP[0], SAMP[0], 2D 7: MAD TEMP[0].xyz, TEMP[0].xyzz, IMM[0].wwww, IMM[1].xxxx 8: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[0].xyzz 9: RSQ TEMP[1].x, TEMP[1].xxxx 10: MUL TEMP[0].xy, TEMP[0].xyzz, TEMP[1].xxxx 11: MUL TEMP[1].xyw, IN[0], IMM[0].yyyy 12: ADD TEMP[1].xy, TEMP[1].xyyy, TEMP[1].wwww 13: MOV TEMP[2].x, TEMP[0].xxxx 14: RCP TEMP[3].x, CONST[4].wwww 15: MUL TEMP[3].x, CONST[4].zzzz, TEMP[3].xxxx 16: MUL TEMP[3].x, TEMP[0].yyyy, TEMP[3].xxxx 17: MOV TEMP[2].y, TEMP[3].xxxx 18: RCP TEMP[3].xy, IN[0].wwww 19: MUL TEMP[0].xy, TEMP[1].xyyy, TEMP[3].xyyy 20: MUL TEMP[1].xy, TEMP[2].xyyy, CONST[2].xxxx 21: MAD TEMP[0].xy, TEMP[1].xyyy, TEMP[3].xyyy, TEMP[0].xyyy 22: MUL TEMP[0].xy, TEMP[0].xyyy, IN[0].wwww 23: MOV TEMP[1].w, IMM[1].yyyy 24: MOV TEMP[0].xy, TEMP[0].xyyy 25: MOV TEMP[0].w, IN[0].wwww 26: TXP TEMP[0].xyz, TEMP[0], SAMP[1], 2D 27: MOV TEMP[1].xyz, TEMP[0].xyzx 28: MOV OUT[0], TEMP[1] 29: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %28 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %29 = load <32 x i8>, <32 x i8> addrspace(2)* %28, align 32, !tbaa !0 %30 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !tbaa !0 %32 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %33 = bitcast <8 x i32> addrspace(2)* %32 to <32 x i8> addrspace(2)* %34 = load <32 x i8>, <32 x i8> addrspace(2)* %33, align 32, !tbaa !0 %35 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %36 = bitcast <4 x i32> addrspace(2)* %35 to <16 x i8> addrspace(2)* %37 = load <16 x i8>, <16 x i8> addrspace(2)* %36, align 16, !tbaa !0 %38 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %39 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %40 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %41 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %42 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %43 = fmul float %24, 0x3FB99999A0000000 %44 = fmul float %43, 5.000000e-01 %45 = fmul float %41, 0x3F8EB851E0000000 %46 = fadd float %45, %43 %47 = fmul float %42, 0x3F8EB851E0000000 %48 = fadd float %47, %44 %49 = bitcast float %46 to i32 %50 = bitcast float %48 to i32 %51 = insertelement <2 x i32> undef, i32 %49, i32 0 %52 = insertelement <2 x i32> %51, i32 %50, i32 1 %53 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %52, <32 x i8> %29, <16 x i8> %31, i32 2) %54 = extractelement <4 x float> %53, i32 0 %55 = extractelement <4 x float> %53, i32 1 %56 = extractelement <4 x float> %53, i32 2 %57 = fmul float %54, 2.000000e+00 %58 = fadd float %57, 0xBFEFDFDF00000000 %59 = fmul float %55, 2.000000e+00 %60 = fadd float %59, 0xBFEFDFDF00000000 %61 = fmul float %56, 2.000000e+00 %62 = fadd float %61, 0xBFEFDFDF00000000 %63 = fmul float %58, %58 %64 = fmul float %60, %60 %65 = fadd float %64, %63 %66 = fmul float %62, %62 %67 = fadd float %65, %66 %68 = call float @llvm.AMDGPU.rsq.clamped.f32(float %67) %69 = fmul float %58, %68 %70 = fmul float %60, %68 %71 = fmul float %38, 5.000000e-01 %72 = fmul float %39, 5.000000e-01 %73 = fmul float %40, 5.000000e-01 %74 = fadd float %71, %73 %75 = fadd float %72, %73 %76 = fdiv float 1.000000e+00, %27 %77 = fmul float %26, %76 %78 = fmul float %70, %77 %79 = fdiv float 1.000000e+00, %40 %80 = fmul float %74, %79 %81 = fmul float %75, %79 %82 = fmul float %69, %25 %83 = fmul float %78, %25 %84 = fmul float %82, %79 %85 = fadd float %84, %80 %86 = fmul float %83, %79 %87 = fadd float %86, %81 %88 = fmul float %85, %40 %89 = fmul float %87, %40 %90 = fdiv float %88, %40 %91 = fdiv float %89, %40 %92 = bitcast float %90 to i32 %93 = bitcast float %91 to i32 %94 = insertelement <2 x i32> undef, i32 %92, i32 0 %95 = insertelement <2 x i32> %94, i32 %93, i32 1 %96 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %95, <32 x i8> %34, <16 x i8> %37, i32 2) %97 = extractelement <4 x float> %96, i32 0 %98 = extractelement <4 x float> %96, i32 1 %99 = extractelement <4 x float> %96, i32 2 %100 = call i32 @llvm.SI.packf16(float %97, float %98) %101 = bitcast i32 %100 to float %102 = call i32 @llvm.SI.packf16(float %99, float 1.000000e+00) %103 = bitcast i32 %102 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %101, float %103, float %101, float %103) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 3, 0, [m0] ; C8100300 v_interp_p2_f32 v4, [v4], v1, 3, 0, [m0] ; C8110301 v_interp_p1_f32 v5, v0, 0, 1, [m0] ; C8140400 v_interp_p2_f32 v5, [v5], v1, 0, 1, [m0] ; C8150401 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100 s_buffer_load_dword s16, s[0:3], 0x8 ; C2080108 s_buffer_load_dword s17, s[0:3], 0x12 ; C2088112 s_buffer_load_dword s18, s[0:3], 0x13 ; C2090113 v_interp_p1_f32 v0, v0, 1, 1, [m0] ; C8000500 s_load_dwordx8 s[20:27], s[6:7], 0x0 ; C0CA0700 v_interp_p2_f32 v0, [v0], v1, 1, 1, [m0] ; C8010501 v_mov_b32_e32 v1, 0x3dcccccd ; 7E0202FF 3DCCCCCD s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s8, v1 ; 100C0208 v_mul_f32_e32 v7, 0.5, v6 ; 100E0CF0 v_mov_b32_e32 v1, 0x3c75c28f ; 7E0202FF 3C75C28F v_mac_f32_e32 v6, v1, v5 ; 3E0C0B01 v_mac_f32_e32 v7, v1, v0 ; 3E0E0101 s_load_dwordx4 s[0:3], s[4:5], 0x4 ; C0800504 s_load_dwordx8 s[4:11], s[6:7], 0x8 ; C0C20708 image_sample v[5:7], 7, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[20:27], s[12:15] ; F0800700 00650506 v_mov_b32_e32 v0, 0xbf7efef8 ; 7E0002FF BF7EFEF8 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mad_f32 v1, 2.0, v5, v0 ; D2820001 04020AF4 v_mad_f32 v5, 2.0, v6, v0 ; D2820005 04020CF4 v_mad_f32 v0, 2.0, v7, v0 ; D2820000 04020EF4 v_mul_f32_e32 v6, v1, v1 ; 100C0301 v_mac_f32_e32 v6, v5, v5 ; 3E0C0B05 v_mac_f32_e32 v6, v0, v0 ; 3E0C0100 v_rsq_clamp_f32_e32 v0, v6 ; 7E005906 v_mov_b32_e32 v6, 0x6f800000 ; 7E0C02FF 6F800000 v_cmp_gt_f32_e64 vcc, |v4|, v6 ; D008016A 00020D04 v_mov_b32_e32 v6, 0x2f800000 ; 7E0C02FF 2F800000 v_cndmask_b32_e32 v6, 1.0, v6 ; 000C0CF2 v_mul_f32_e32 v1, v0, v1 ; 10020300 v_mul_f32_e32 v0, v0, v5 ; 10000B00 v_rcp_f32_e32 v5, s18 ; 7E0A5412 v_mul_f32_e32 v7, 0.5, v4 ; 100E08F0 v_mad_f32 v2, 0.5, v2, v7 ; D2820002 041E04F0 v_mac_f32_e32 v7, 0.5, v3 ; 3E0E06F0 v_mul_f32_e32 v3, s17, v5 ; 10060A11 v_rcp_f32_e32 v5, v4 ; 7E0A5504 v_mul_f32_e32 v0, v3, v0 ; 10000103 v_mul_f32_e32 v1, s16, v1 ; 10020210 v_mul_f32_e32 v0, s16, v0 ; 10000010 v_mul_f32_e32 v2, v5, v2 ; 10040505 v_mac_f32_e32 v2, v5, v1 ; 3E040305 v_mul_f32_e32 v1, v5, v7 ; 10020F05 v_mul_f32_e32 v3, v6, v4 ; 10060906 v_rcp_f32_e32 v3, v3 ; 7E065503 v_mac_f32_e32 v1, v5, v0 ; 3E020105 v_mul_f32_e32 v0, v4, v2 ; 10000504 v_mul_f32_e32 v1, v4, v1 ; 10020304 v_mul_f32_e32 v0, v3, v0 ; 10000103 v_mul_f32_e32 v1, v3, v1 ; 10020303 v_mul_f32_e32 v2, v0, v6 ; 10040D00 v_mul_f32_e32 v3, v1, v6 ; 10060D01 image_sample v[0:2], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800700 00010002 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e64 v1, v2, 1.0 ; D25E0001 0001E502 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 8 Code Size: 344 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL OUT[5], GENERIC[4] DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[0..14] DCL CONST[16..19] DCL TEMP[0..7], LOCAL IMM[0] FLT32 { 0.2500, -1.0000, 10.0000, 0.4999} IMM[1] INT32 {256, 0, 1, 2} IMM[2] FLT32 { 1.0000, 0.0000, 0.1000, 0.0039} IMM[3] FLT32 { 16.0000, -8.0000, 4.0000, -2.0000} IMM[4] INT32 {4, 0, 0, 0} 0: MUL TEMP[0].x, IN[2].xxxx, IMM[0].xxxx 1: F2I TEMP[0].x, TEMP[0].xxxx 2: F2I TEMP[1].x, IN[2].yyyy 3: IDIV TEMP[2].x, TEMP[1].xxxx, IMM[1].xxxx 4: I2F TEMP[3].x, TEMP[0].xxxx 5: I2F TEMP[4].x, TEMP[2].xxxx 6: MOV TEMP[3].y, TEMP[4].xxxx 7: UMUL TEMP[2].x, TEMP[2].xxxx, IMM[1].xxxx 8: INEG TEMP[2].x, TEMP[2].xxxx 9: UADD TEMP[2].x, TEMP[1].xxxx, TEMP[2].xxxx 10: I2F TEMP[2].x, TEMP[2].xxxx 11: MOV TEMP[3].z, TEMP[2].xxxx 12: ADD TEMP[2].xyz, TEMP[3].xyzz, IMM[0].yyyy 13: I2F TEMP[1].x, TEMP[1].xxxx 14: ADD TEMP[1].x, IN[2].yyyy, -TEMP[1].xxxx 15: MAD TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz, IMM[0].wwww 16: F2I TEMP[1].x, TEMP[1].xxxx 17: USEQ TEMP[4].x, TEMP[1].xxxx, IMM[1].yyyy 18: AND TEMP[4].x, TEMP[4].xxxx, IMM[2].xxxx 19: USEQ TEMP[5].x, TEMP[1].xxxx, IMM[1].zzzz 20: AND TEMP[5].x, TEMP[5].xxxx, IMM[2].xxxx 21: MOV TEMP[4].y, TEMP[5].xxxx 22: USEQ TEMP[1].x, TEMP[1].xxxx, IMM[1].wwww 23: AND TEMP[1].x, TEMP[1].xxxx, IMM[2].xxxx 24: MOV TEMP[4].z, TEMP[1].xxxx 25: MOV TEMP[1].xyz, TEMP[4].xyzx 26: MOV TEMP[4].w, IMM[2].yyyy 27: MOV TEMP[4].xyz, TEMP[3].xyzx 28: MOV TEMP[3].y, IMM[2].yzyy 29: DP4 TEMP[4].x, TEMP[1], TEMP[4] 30: MUL TEMP[3].x, TEMP[4].xxxx, IMM[2].wwww 31: MOV TEMP[3].xy, TEMP[3].xyyy 32: MOV TEMP[3].w, IMM[2].yyyy 33: TXL TEMP[3].xy, TEMP[3], SAMP[0], 2D 34: MAD TEMP[4].x, TEMP[3].xxxx, IMM[3].xxxx, IMM[3].yyyy 35: MOV TEMP[2].w, TEMP[4].xxxx 36: MUL TEMP[3].x, TEMP[3].yyyy, IMM[3].zzzz 37: MOV TEMP[1].w, TEMP[3].xxxx 38: UMUL TEMP[0].x, IMM[4].xxxx, TEMP[0].xxxx 39: I2F TEMP[0].x, TEMP[0].xxxx 40: ADD TEMP[0].x, IN[2].xxxx, -TEMP[0].xxxx 41: ADD TEMP[0].x, TEMP[0].xxxx, IMM[3].wwww 42: MUL TEMP[3], CONST[16], IN[0].xxxx 43: MAD TEMP[3], CONST[17], IN[0].yyyy, TEMP[3] 44: MAD TEMP[3], CONST[18], IN[0].zzzz, TEMP[3] 45: MAD TEMP[3], CONST[19], IN[0].wwww, TEMP[3] 46: MOV TEMP[4].x, CONST[11].xxxx 47: MOV TEMP[4].y, CONST[12].xxxx 48: MOV TEMP[4].z, CONST[13].xxxx 49: MOV TEMP[5].x, CONST[11].yyyy 50: MOV TEMP[5].y, CONST[12].yyyy 51: MOV TEMP[5].z, CONST[13].yyyy 52: MOV TEMP[6].x, CONST[11].zzzz 53: MOV TEMP[6].y, CONST[12].zzzz 54: MOV TEMP[6].z, CONST[13].zzzz 55: MUL TEMP[4].xyz, TEMP[4].xyzz, IN[1].xxxx 56: MAD TEMP[4].xyz, TEMP[5].xyzz, IN[1].yyyy, TEMP[4].xyzz 57: MAD TEMP[0].xyz, TEMP[6].xyzz, TEMP[0].xxxx, TEMP[4].xyzz 58: DP3 TEMP[4].x, TEMP[0].xyzz, TEMP[0].xyzz 59: RSQ TEMP[4].x, TEMP[4].xxxx 60: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[4].xxxx 61: MOV TEMP[4].w, IMM[2].xxxx 62: MOV TEMP[4].xyz, TEMP[0].xyzx 63: DP4 TEMP[5].x, CONST[0], TEMP[4] 64: DP4 TEMP[6].x, CONST[1], TEMP[4] 65: MOV TEMP[5].y, TEMP[6].xxxx 66: DP4 TEMP[4].x, CONST[2], TEMP[4] 67: MOV TEMP[5].z, TEMP[4].xxxx 68: MUL TEMP[4], TEMP[0].xyzz, TEMP[0].yzzx 69: DP4 TEMP[6].x, CONST[3], TEMP[4] 70: DP4 TEMP[7].x, CONST[4], TEMP[4] 71: MOV TEMP[6].y, TEMP[7].xxxx 72: DP4 TEMP[4].x, CONST[5], TEMP[4] 73: MOV TEMP[6].z, TEMP[4].xxxx 74: MUL TEMP[4].x, TEMP[0].yyyy, TEMP[0].yyyy 75: MAD TEMP[4].x, TEMP[0].xxxx, TEMP[0].xxxx, -TEMP[4].xxxx 76: MAD TEMP[4].xyz, CONST[6].xyzz, TEMP[4].xxxx, TEMP[6].xyzz 77: ADD TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xyzz 78: MOV TEMP[0].yzw, TEMP[0].yxyz 79: MUL TEMP[5], CONST[7], IN[0].xxxx 80: MAD TEMP[5], CONST[8], IN[0].yyyy, TEMP[5] 81: MAD TEMP[5], CONST[9], IN[0].zzzz, TEMP[5] 82: MAD TEMP[5].xyz, CONST[10], IN[0].wwww, TEMP[5] 83: MOV TEMP[5].xyz, TEMP[5].xyzx 84: MOV TEMP[5].w, TEMP[4].xxxx 85: MOV TEMP[4].xy, TEMP[4].yzyy 86: MOV TEMP[0].x, TEMP[3].zzzz 87: MOV OUT[4], TEMP[5] 88: MOV OUT[1], TEMP[2] 89: MOV OUT[2], TEMP[1] 90: MOV OUT[3], TEMP[0] 91: MOV OUT[0], TEMP[3] 92: MOV OUT[5], TEMP[4] 93: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260) %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264) %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 268) %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272) %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276) %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280) %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284) %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288) %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292) %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296) %72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300) %73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304) %74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308) %75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312) %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316) %77 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %78 = load <32 x i8>, <32 x i8> addrspace(2)* %77, align 32, !tbaa !0 %79 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %80 = load <16 x i8>, <16 x i8> addrspace(2)* %79, align 16, !tbaa !0 %81 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %82 = load <16 x i8>, <16 x i8> addrspace(2)* %81, align 16, !tbaa !0 %83 = add i32 %5, %7 %84 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %82, i32 0, i32 %83) %85 = extractelement <4 x float> %84, i32 0 %86 = extractelement <4 x float> %84, i32 1 %87 = extractelement <4 x float> %84, i32 2 %88 = extractelement <4 x float> %84, i32 3 %89 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %90 = load <16 x i8>, <16 x i8> addrspace(2)* %89, align 16, !tbaa !0 %91 = add i32 %5, %7 %92 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %90, i32 0, i32 %91) %93 = extractelement <4 x float> %92, i32 0 %94 = extractelement <4 x float> %92, i32 1 %95 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %96 = load <16 x i8>, <16 x i8> addrspace(2)* %95, align 16, !tbaa !0 %97 = add i32 %5, %7 %98 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %96, i32 0, i32 %97) %99 = extractelement <4 x float> %98, i32 0 %100 = extractelement <4 x float> %98, i32 1 %101 = fmul float %99, 2.500000e-01 %102 = fptosi float %101 to i32 %103 = fptosi float %100 to i32 %104 = sdiv i32 %103, 256 %105 = sitofp i32 %102 to float %106 = sitofp i32 %104 to float %107 = shl nsw i32 %104, 8 %108 = sub i32 %103, %107 %109 = sitofp i32 %108 to float %110 = fadd float %105, -1.000000e+00 %111 = fadd float %106, -1.000000e+00 %112 = fadd float %109, -1.000000e+00 %113 = sitofp i32 %103 to float %114 = fsub float %100, %113 %115 = fmul float %114, 1.000000e+01 %116 = fadd float %115, 0x3FDFFE5CA0000000 %117 = fptosi float %116 to i32 %118 = icmp eq i32 %117, 0 %119 = select i1 %118, float 1.000000e+00, float 0.000000e+00 %120 = icmp eq i32 %117, 1 %121 = select i1 %120, float 1.000000e+00, float 0.000000e+00 %122 = icmp eq i32 %117, 2 %123 = select i1 %122, float 1.000000e+00, float 0.000000e+00 %124 = fmul float %119, %105 %125 = fmul float %121, %106 %126 = fadd float %124, %125 %127 = fmul float %123, %109 %128 = fadd float %126, %127 %129 = fadd float %128, 0.000000e+00 %130 = fmul float %129, 0x3F70101020000000 %131 = bitcast float %130 to i32 %132 = insertelement <4 x i32> undef, i32 %131, i32 0 %133 = insertelement <4 x i32> %132, i32 1036831949, i32 1 %134 = insertelement <4 x i32> %133, i32 0, i32 2 %135 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %134, <32 x i8> %78, <16 x i8> %80, i32 2) %136 = extractelement <4 x float> %135, i32 0 %137 = extractelement <4 x float> %135, i32 1 %138 = fmul float %136, 1.600000e+01 %139 = fadd float %138, -8.000000e+00 %140 = fmul float %137, 4.000000e+00 %141 = shl i32 %102, 2 %142 = sitofp i32 %141 to float %143 = fsub float %99, %142 %144 = fadd float %143, -2.000000e+00 %145 = fmul float %61, %85 %146 = fmul float %62, %85 %147 = fmul float %63, %85 %148 = fmul float %64, %85 %149 = fmul float %65, %86 %150 = fadd float %149, %145 %151 = fmul float %66, %86 %152 = fadd float %151, %146 %153 = fmul float %67, %86 %154 = fadd float %153, %147 %155 = fmul float %68, %86 %156 = fadd float %155, %148 %157 = fmul float %69, %87 %158 = fadd float %157, %150 %159 = fmul float %70, %87 %160 = fadd float %159, %152 %161 = fmul float %71, %87 %162 = fadd float %161, %154 %163 = fmul float %72, %87 %164 = fadd float %163, %156 %165 = fmul float %73, %88 %166 = fadd float %165, %158 %167 = fmul float %74, %88 %168 = fadd float %167, %160 %169 = fmul float %75, %88 %170 = fadd float %169, %162 %171 = fmul float %76, %88 %172 = fadd float %171, %164 %173 = fmul float %52, %93 %174 = fmul float %55, %93 %175 = fmul float %58, %93 %176 = fmul float %53, %94 %177 = fadd float %176, %173 %178 = fmul float %56, %94 %179 = fadd float %178, %174 %180 = fmul float %59, %94 %181 = fadd float %180, %175 %182 = fmul float %54, %144 %183 = fadd float %182, %177 %184 = fmul float %57, %144 %185 = fadd float %184, %179 %186 = fmul float %60, %144 %187 = fadd float %186, %181 %188 = fmul float %183, %183 %189 = fmul float %185, %185 %190 = fadd float %189, %188 %191 = fmul float %187, %187 %192 = fadd float %190, %191 %193 = call float @llvm.AMDGPU.rsq.clamped.f32(float %192) %194 = fmul float %183, %193 %195 = fmul float %185, %193 %196 = fmul float %187, %193 %197 = fmul float %13, %194 %198 = fmul float %14, %195 %199 = fadd float %197, %198 %200 = fmul float %15, %196 %201 = fadd float %199, %200 %202 = fadd float %201, %16 %203 = fmul float %17, %194 %204 = fmul float %18, %195 %205 = fadd float %203, %204 %206 = fmul float %19, %196 %207 = fadd float %205, %206 %208 = fadd float %207, %20 %209 = fmul float %21, %194 %210 = fmul float %22, %195 %211 = fadd float %209, %210 %212 = fmul float %23, %196 %213 = fadd float %211, %212 %214 = fadd float %213, %24 %215 = fmul float %194, %195 %216 = fmul float %195, %196 %217 = fmul float %196, %196 %218 = fmul float %196, %194 %219 = fmul float %25, %215 %220 = fmul float %26, %216 %221 = fadd float %219, %220 %222 = fmul float %27, %217 %223 = fadd float %221, %222 %224 = fmul float %28, %218 %225 = fadd float %223, %224 %226 = fmul float %29, %215 %227 = fmul float %30, %216 %228 = fadd float %226, %227 %229 = fmul float %31, %217 %230 = fadd float %228, %229 %231 = fmul float %32, %218 %232 = fadd float %230, %231 %233 = fmul float %33, %215 %234 = fmul float %34, %216 %235 = fadd float %233, %234 %236 = fmul float %35, %217 %237 = fadd float %235, %236 %238 = fmul float %36, %218 %239 = fadd float %237, %238 %240 = fmul float %195, %195 %241 = fmul float %194, %194 %242 = fsub float %241, %240 %243 = fmul float %37, %242 %244 = fadd float %243, %225 %245 = fmul float %38, %242 %246 = fadd float %245, %232 %247 = fmul float %39, %242 %248 = fadd float %247, %239 %249 = fadd float %244, %202 %250 = fadd float %246, %208 %251 = fadd float %248, %214 %252 = fmul float %40, %85 %253 = fmul float %41, %85 %254 = fmul float %42, %85 %255 = fmul float %43, %86 %256 = fadd float %255, %252 %257 = fmul float %44, %86 %258 = fadd float %257, %253 %259 = fmul float %45, %86 %260 = fadd float %259, %254 %261 = fmul float %46, %87 %262 = fadd float %261, %256 %263 = fmul float %47, %87 %264 = fadd float %263, %258 %265 = fmul float %48, %87 %266 = fadd float %265, %260 %267 = fmul float %49, %88 %268 = fadd float %267, %262 %269 = fmul float %50, %88 %270 = fadd float %269, %264 %271 = fmul float %51, %88 %272 = fadd float %271, %266 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %110, float %111, float %112, float %139) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %119, float %121, float %123, float %140) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %170, float %194, float %195, float %196) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %268, float %270, float %272, float %249) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %250, float %251, float %251, float %218) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %166, float %168, float %170, float %172) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0x3efff2e5 ; 7E0202FF 3EFFF2E5 v_mov_b32_e32 v2, 0xc1000000 ; 7E0402FF C1000000 v_mov_b32_e32 v5, 0 ; 7E0A0280 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[60:63], s[2:3], 0x0 ; C09E0300 s_load_dwordx4 s[20:23], s[4:5], 0x0 ; C08A0500 s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_load_dwordx8 s[28:35], s[6:7], 0x0 ; C0CE0700 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s45, s[60:63], 0x26 ; C216BD26 s_buffer_load_dword s44, s[60:63], 0x28 ; C2163D28 buffer_load_format_xyzw v[6:9], v0, s[0:3], 0 idxen ; E00C2000 80000600 buffer_load_format_xyzw v[10:13], v0, s[12:15], 0 idxen ; E00C2000 80030A00 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[12:15], v0, s[8:11], 0 idxen ; E00C2000 80020C00 s_buffer_load_dword s43, s[60:63], 0x29 ; C215BD29 s_buffer_load_dword s42, s[60:63], 0x2a ; C2153D2A s_buffer_load_dword s64, s[60:63], 0x2c ; C2203D2C s_buffer_load_dword s46, s[60:63], 0x2d ; C2173D2D s_buffer_load_dword s39, s[60:63], 0x2e ; C213BD2E s_buffer_load_dword s65, s[60:63], 0x30 ; C220BD30 s_buffer_load_dword s47, s[60:63], 0x31 ; C217BD31 s_buffer_load_dword s40, s[60:63], 0x32 ; C2143D32 s_buffer_load_dword s66, s[60:63], 0x34 ; C2213D34 s_buffer_load_dword s48, s[60:63], 0x35 ; C2183D35 s_buffer_load_dword s41, s[60:63], 0x36 ; C214BD36 s_buffer_load_dword s67, s[60:63], 0x40 ; C221BD40 s_buffer_load_dword s68, s[60:63], 0x41 ; C2223D41 s_buffer_load_dword s1, s[60:63], 0xa ; C200BD0A s_buffer_load_dword s0, s[60:63], 0xb ; C2003D0B s_buffer_load_dword s13, s[60:63], 0xc ; C206BD0C s_buffer_load_dword s16, s[60:63], 0xd ; C2083D0D s_buffer_load_dword s12, s[60:63], 0xe ; C2063D0E s_buffer_load_dword s9, s[60:63], 0x0 ; C204BD00 s_buffer_load_dword s10, s[60:63], 0x1 ; C2053D01 s_buffer_load_dword s8, s[60:63], 0x2 ; C2043D02 s_buffer_load_dword s2, s[60:63], 0x3 ; C2013D03 s_buffer_load_dword s11, s[60:63], 0x4 ; C205BD04 s_buffer_load_dword s3, s[60:63], 0xf ; C201BD0F s_buffer_load_dword s18, s[60:63], 0x10 ; C2093D10 s_buffer_load_dword s27, s[60:63], 0x11 ; C20DBD11 s_buffer_load_dword s14, s[60:63], 0x12 ; C2073D12 s_buffer_load_dword s4, s[60:63], 0x13 ; C2023D13 s_buffer_load_dword s25, s[60:63], 0x14 ; C20CBD14 s_buffer_load_dword s38, s[60:63], 0x15 ; C2133D15 s_buffer_load_dword s17, s[60:63], 0x16 ; C208BD16 s_buffer_load_dword s5, s[60:63], 0x17 ; C202BD17 s_buffer_load_dword s6, s[60:63], 0x18 ; C2033D18 s_buffer_load_dword s69, s[60:63], 0x42 ; C222BD42 s_buffer_load_dword s70, s[60:63], 0x43 ; C2233D43 s_buffer_load_dword s50, s[60:63], 0x44 ; C2193D44 s_buffer_load_dword s51, s[60:63], 0x45 ; C219BD45 s_buffer_load_dword s49, s[60:63], 0x46 ; C218BD46 s_buffer_load_dword s19, s[60:63], 0x5 ; C209BD05 s_buffer_load_dword s15, s[60:63], 0x6 ; C207BD06 s_buffer_load_dword s7, s[60:63], 0x7 ; C203BD07 s_buffer_load_dword s24, s[60:63], 0x8 ; C20C3D08 s_buffer_load_dword s26, s[60:63], 0x9 ; C20D3D09 s_buffer_load_dword s36, s[60:63], 0x19 ; C2123D19 s_buffer_load_dword s37, s[60:63], 0x1a ; C212BD1A s_buffer_load_dword s59, s[60:63], 0x1c ; C21DBD1C s_buffer_load_dword s58, s[60:63], 0x1d ; C21D3D1D s_buffer_load_dword s57, s[60:63], 0x1e ; C21CBD1E s_buffer_load_dword s56, s[60:63], 0x20 ; C21C3D20 s_buffer_load_dword s54, s[60:63], 0x21 ; C21B3D21 s_buffer_load_dword s55, s[60:63], 0x22 ; C21BBD22 s_buffer_load_dword s52, s[60:63], 0x24 ; C21A3D24 s_buffer_load_dword s53, s[60:63], 0x25 ; C21ABD25 s_buffer_load_dword s71, s[60:63], 0x47 ; C223BD47 s_buffer_load_dword s72, s[60:63], 0x48 ; C2243D48 s_buffer_load_dword s73, s[60:63], 0x49 ; C224BD49 s_buffer_load_dword s74, s[60:63], 0x4a ; C2253D4A s_buffer_load_dword s75, s[60:63], 0x4b ; C225BD4B s_buffer_load_dword s76, s[60:63], 0x4c ; C2263D4C s_buffer_load_dword s77, s[60:63], 0x4d ; C226BD4D s_buffer_load_dword s78, s[60:63], 0x4e ; C2273D4E s_buffer_load_dword s60, s[60:63], 0x4f ; C21E3D4F s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v0, s67, v6 ; 10000C43 v_mul_f32_e32 v14, s68, v6 ; 101C0C44 v_mul_f32_e32 v15, s69, v6 ; 101E0C45 v_mul_f32_e32 v16, s70, v6 ; 10200C46 v_mul_f32_e32 v17, s64, v10 ; 10221440 v_mul_f32_e32 v18, s65, v10 ; 10241441 v_mul_f32_e32 v10, s66, v10 ; 10141442 v_mul_f32_e32 v19, s59, v6 ; 10260C3B v_mul_f32_e32 v20, s58, v6 ; 10280C3A v_mul_f32_e32 v6, s57, v6 ; 100C0C39 v_mac_f32_e32 v0, s50, v7 ; 3E000E32 v_mac_f32_e32 v14, s51, v7 ; 3E1C0E33 v_mac_f32_e32 v15, s49, v7 ; 3E1E0E31 v_mac_f32_e32 v16, s71, v7 ; 3E200E47 v_mac_f32_e32 v17, s46, v11 ; 3E22162E v_mac_f32_e32 v18, s47, v11 ; 3E24162F v_mac_f32_e32 v10, s48, v11 ; 3E141630 v_mac_f32_e32 v19, s56, v7 ; 3E260E38 v_mac_f32_e32 v20, s54, v7 ; 3E280E36 v_mac_f32_e32 v6, s55, v7 ; 3E0C0E37 v_mac_f32_e32 v0, s72, v8 ; 3E001048 v_mac_f32_e32 v14, s73, v8 ; 3E1C1049 v_mac_f32_e32 v15, s74, v8 ; 3E1E104A v_mac_f32_e32 v16, s75, v8 ; 3E20104B v_mac_f32_e32 v19, s52, v8 ; 3E261034 v_mac_f32_e32 v20, s53, v8 ; 3E281035 v_mac_f32_e32 v6, s45, v8 ; 3E0C102D v_mac_f32_e32 v0, s76, v9 ; 3E00124C v_mac_f32_e32 v14, s77, v9 ; 3E1C124D v_mac_f32_e32 v15, s78, v9 ; 3E1E124E v_mul_f32_e32 v3, 0x3e800000, v12 ; 100618FF 3E800000 v_cvt_i32_f32_e32 v4, v13 ; 7E08110D v_cvt_i32_f32_e32 v3, v3 ; 7E061103 v_mac_f32_e32 v16, s60, v9 ; 3E20123C v_mac_f32_e32 v19, s44, v9 ; 3E26122C v_cvt_f32_i32_e32 v7, v4 ; 7E0E0B04 v_lshlrev_b32_e32 v8, 2, v3 ; 34100682 v_cvt_f32_i32_e32 v8, v8 ; 7E100B08 v_mac_f32_e32 v20, s43, v9 ; 3E28122B v_mac_f32_e32 v6, s42, v9 ; 3E0C122A v_subrev_f32_e32 v7, v7, v13 ; 0A0E1B07 v_subrev_f32_e32 v8, v8, v12 ; 0A101908 v_madmk_f32_e32 v1, v7, v1, 0x41200000 ; 40020307 41200000 v_ashrrev_i32_e32 v7, 31, v4 ; 300E089F v_lshrrev_b32_e32 v7, 24, v7 ; 2C0E0E98 v_cvt_i32_f32_e32 v1, v1 ; 7E021101 v_add_i32_e32 v7, v4, v7 ; 4A0E0F04 v_and_b32_e32 v9, 0xffffff00, v7 ; 36120EFF FFFFFF00 v_sub_i32_e32 v4, v4, v9 ; 4C081304 v_cmp_eq_i32_e32 vcc, 0, v1 ; 7D040280 v_cndmask_b32_e64 v9, 0, 1.0, vcc ; D2000009 01A9E480 v_cmp_eq_i32_e32 vcc, 1, v1 ; 7D040281 v_cndmask_b32_e64 v11, 0, 1.0, vcc ; D200000B 01A9E480 v_ashrrev_i32_e32 v7, 8, v7 ; 300E0E88 v_cvt_f32_i32_e32 v7, v7 ; 7E0E0B07 v_cvt_f32_i32_e32 v12, v3 ; 7E180B03 v_cvt_f32_i32_e32 v13, v4 ; 7E1A0B04 v_cmp_eq_i32_e32 vcc, 2, v1 ; 7D040282 v_mul_f32_e32 v1, v7, v11 ; 10021707 v_mac_f32_e32 v1, v12, v9 ; 3E02130C v_cndmask_b32_e64 v21, 0, 1.0, vcc ; D2000015 01A9E480 v_mac_f32_e32 v1, v13, v21 ; 3E022B0D v_add_f32_e32 v1, 0, v1 ; 06020280 v_mul_f32_e32 v3, 0x3b808081, v1 ; 100602FF 3B808081 v_mov_b32_e32 v4, 0x3dcccccd ; 7E0802FF 3DCCCCCD image_sample_l v[3:4], 3, 0, 0, 0, 0, 0, 0, 0, v[3:6], s[28:35], s[20:23] ; F0900300 00A70303 v_add_f32_e32 v1, -2.0, v8 ; 060210F5 v_mac_f32_e32 v17, s39, v1 ; 3E220227 v_mac_f32_e32 v18, s40, v1 ; 3E240228 v_mac_f32_e32 v10, s41, v1 ; 3E140229 v_add_f32_e32 v1, -1.0, v12 ; 060218F3 v_add_f32_e32 v5, -1.0, v7 ; 060A0EF3 v_add_f32_e32 v7, -1.0, v13 ; 060E1AF3 v_mul_f32_e32 v8, v17, v17 ; 10102311 v_mac_f32_e32 v8, v18, v18 ; 3E102512 v_mac_f32_e32 v8, v10, v10 ; 3E10150A v_rsq_clamp_f32_e32 v8, v8 ; 7E105908 s_waitcnt vmcnt(0) ; BF8C0770 v_madmk_f32_e32 v2, v3, v2, 0x41800000 ; 40040503 41800000 exp 15, 32, 0, 0, 0, v1, v5, v7, v2 ; F800020F 02070501 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v1, 4.0, v4 ; 100208F6 exp 15, 33, 0, 0, 0, v9, v11, v21, v1 ; F800021F 01150B09 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v1, v8, v17 ; 10022308 v_mul_f32_e32 v2, v8, v18 ; 10042508 v_mul_f32_e32 v3, v8, v10 ; 10061508 v_mul_f32_e32 v4, v3, v2 ; 10080503 v_mul_f32_e32 v5, s16, v4 ; 100A0810 v_mul_f32_e32 v7, s27, v4 ; 100E081B v_mul_f32_e32 v4, s38, v4 ; 10080826 v_mul_f32_e32 v8, v2, v1 ; 10100302 v_mac_f32_e32 v5, s13, v8 ; 3E0A100D v_mac_f32_e32 v7, s18, v8 ; 3E0E1012 v_mac_f32_e32 v4, s25, v8 ; 3E081019 v_mul_f32_e32 v8, v3, v3 ; 10100703 v_mac_f32_e32 v5, s12, v8 ; 3E0A100C v_mac_f32_e32 v7, s14, v8 ; 3E0E100E v_mac_f32_e32 v4, s17, v8 ; 3E081011 v_mul_f32_e32 v8, s10, v2 ; 1010040A v_mac_f32_e32 v8, s9, v1 ; 3E100209 v_mul_f32_e32 v9, s19, v2 ; 10120413 v_mac_f32_e32 v9, s11, v1 ; 3E12020B v_mul_f32_e32 v10, s26, v2 ; 1014041A v_mac_f32_e32 v10, s24, v1 ; 3E140218 v_mac_f32_e32 v8, s8, v3 ; 3E100608 v_mac_f32_e32 v9, s15, v3 ; 3E12060F v_mac_f32_e32 v10, s1, v3 ; 3E140601 exp 15, 34, 0, 0, 0, v15, v1, v2, v3 ; F800022F 0302010F s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v3, v1, v3 ; 10060701 v_mul_f32_e32 v2, v2, v2 ; 10040502 v_mad_f32 v1, v1, v1, -v2 ; D2820001 840A0301 v_add_f32_e32 v2, s2, v8 ; 06041002 v_mac_f32_e32 v5, s3, v3 ; 3E0A0603 v_mac_f32_e32 v7, s4, v3 ; 3E0E0604 v_mac_f32_e32 v4, s5, v3 ; 3E080605 v_mac_f32_e32 v5, s6, v1 ; 3E0A0206 v_mac_f32_e32 v7, s36, v1 ; 3E0E0224 v_mac_f32_e32 v4, s37, v1 ; 3E080225 v_add_f32_e32 v1, v2, v5 ; 06020B02 exp 15, 35, 0, 0, 0, v19, v20, v6, v1 ; F800023F 01061413 s_waitcnt expcnt(0) ; BF8C070F v_add_f32_e32 v1, s7, v9 ; 06021207 v_add_f32_e32 v1, v1, v7 ; 06020F01 v_add_f32_e32 v2, s0, v10 ; 06041400 v_add_f32_e32 v2, v2, v4 ; 06040902 exp 15, 36, 0, 0, 0, v1, v2, v2, v3 ; F800024F 03020201 exp 15, 12, 0, 1, 0, v0, v14, v15, v16 ; F80008CF 100F0E00 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 88 VGPRS: 24 Code Size: 928 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL IN[4], GENERIC[4], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL SAMP[7] DCL SAMP[8] DCL SAMP[9] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL SVIEW[4], 2D, FLOAT DCL SVIEW[5], 2D, FLOAT DCL SVIEW[6], 2D, FLOAT DCL SVIEW[7], 2D, FLOAT DCL SVIEW[8], 2D, FLOAT DCL SVIEW[9], 2D, FLOAT DCL CONST[0..4] DCL CONST[15..23] DCL TEMP[0..37], LOCAL IMM[0] FLT32 { -0.2000, 7.0000, 0.0100, 0.5000} IMM[1] FLT32 { 64.0000, -64.0000, 4.0000, 0.6931} IMM[2] FLT32 { 0.0039, 0.0020, 1.0000, 2.0000} IMM[3] FLT32 { 3.0000, 0.0000, -1.0000, 0.0001} IMM[4] FLT32 { 32.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].x, IN[3].wwww 1: MOV TEMP[0].yz, IN[4].yxyy 2: DP3 TEMP[1].x, CONST[1].xyzz, CONST[1].xyzz 3: RSQ TEMP[1].x, TEMP[1].xxxx 4: MUL TEMP[1].xyz, CONST[1].xyzz, TEMP[1].xxxx 5: ADD TEMP[2].xyz, CONST[0].xyzz, -IN[3].xyzz 6: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz 7: RSQ TEMP[3].x, TEMP[3].xxxx 8: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx 9: ABS TEMP[3].xyz, IN[2].yzww 10: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz 11: RSQ TEMP[4].x, TEMP[4].xxxx 12: MAD TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx, IMM[0].xxxx 13: MUL TEMP[3].xyz, TEMP[3].xyzz, IMM[0].yyyy 14: MAX TEMP[3].xyz, TEMP[3].xyzz, IMM[0].zzzz 15: ADD TEMP[4].x, TEMP[3].xxxx, TEMP[3].yyyy 16: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[3].zzzz 17: RCP TEMP[4].xyz, TEMP[4].xxxx 18: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xyzz 19: ADD TEMP[4], IN[0], IMM[0].wwww 20: FLR TEMP[4].xyz, TEMP[4] 21: MOV TEMP[5].x, CONST[15].xxxx 22: MUL TEMP[6].x, TEMP[4].xxxx, CONST[15].xxxx 23: MOV TEMP[7].x, TEMP[6].xxxx 24: FLR TEMP[6].x, TEMP[6].xxxx 25: MUL TEMP[6].x, TEMP[6].xxxx, CONST[15].xxxx 26: FSGE TEMP[8].x, TEMP[4].xxxx, IMM[1].xxxx 27: UIF TEMP[8].xxxx :0 28: MOV TEMP[5].x, CONST[16].xxxx 29: ADD TEMP[8].x, TEMP[4].xxxx, IMM[1].yyyy 30: MUL TEMP[8].x, TEMP[8].xxxx, CONST[16].xxxx 31: MOV TEMP[7].x, TEMP[8].xxxx 32: FLR TEMP[9].x, TEMP[8].xxxx 33: MUL TEMP[9].x, TEMP[9].xxxx, CONST[16].xxxx 34: MOV TEMP[6].x, TEMP[9].xxxx 35: FRC TEMP[8].x, TEMP[8].xxxx 36: FRC TEMP[10].x, TEMP[9].xxxx 37: MOV TEMP[8].y, TEMP[10].xxxx 38: FLR TEMP[9].x, TEMP[9].xxxx 39: ADD TEMP[9].x, TEMP[9].xxxx, IMM[1].zzzz 40: MOV TEMP[8].z, TEMP[9].xxxx 41: MOV TEMP[8].xyz, TEMP[8].xyzx 42: ELSE :0 43: FRC TEMP[7].x, TEMP[7].xxxx 44: FRC TEMP[9].x, TEMP[6].xxxx 45: MOV TEMP[7].y, TEMP[9].xxxx 46: FLR TEMP[6].x, TEMP[6].xxxx 47: MOV TEMP[7].z, TEMP[6].xxxx 48: MOV TEMP[8].xyz, TEMP[7].xyzx 49: ENDIF 50: MOV TEMP[6].x, CONST[15].xxxx 51: MUL TEMP[7].x, TEMP[4].yyyy, CONST[15].xxxx 52: MOV TEMP[9].x, TEMP[7].xxxx 53: FLR TEMP[7].x, TEMP[7].xxxx 54: MUL TEMP[7].x, TEMP[7].xxxx, CONST[15].xxxx 55: FSGE TEMP[10].x, TEMP[4].yyyy, IMM[1].xxxx 56: UIF TEMP[10].xxxx :0 57: MOV TEMP[6].x, CONST[16].xxxx 58: ADD TEMP[10].x, TEMP[4].yyyy, IMM[1].yyyy 59: MUL TEMP[10].x, TEMP[10].xxxx, CONST[16].xxxx 60: MOV TEMP[9].x, TEMP[10].xxxx 61: FLR TEMP[11].x, TEMP[10].xxxx 62: MUL TEMP[11].x, TEMP[11].xxxx, CONST[16].xxxx 63: MOV TEMP[7].x, TEMP[11].xxxx 64: FRC TEMP[10].x, TEMP[10].xxxx 65: FRC TEMP[12].x, TEMP[11].xxxx 66: MOV TEMP[10].y, TEMP[12].xxxx 67: FLR TEMP[11].x, TEMP[11].xxxx 68: ADD TEMP[11].x, TEMP[11].xxxx, IMM[1].zzzz 69: MOV TEMP[10].z, TEMP[11].xxxx 70: MOV TEMP[10].xyz, TEMP[10].xyzx 71: ELSE :0 72: FRC TEMP[9].x, TEMP[9].xxxx 73: FRC TEMP[11].x, TEMP[7].xxxx 74: MOV TEMP[9].y, TEMP[11].xxxx 75: FLR TEMP[7].x, TEMP[7].xxxx 76: MOV TEMP[9].z, TEMP[7].xxxx 77: MOV TEMP[10].xyz, TEMP[9].xyzx 78: ENDIF 79: MOV TEMP[7].x, CONST[15].xxxx 80: MUL TEMP[9].x, TEMP[4].zzzz, CONST[15].xxxx 81: MOV TEMP[11].x, TEMP[9].xxxx 82: FLR TEMP[9].x, TEMP[9].xxxx 83: MUL TEMP[9].x, TEMP[9].xxxx, CONST[15].xxxx 84: FSGE TEMP[12].x, TEMP[4].zzzz, IMM[1].xxxx 85: UIF TEMP[12].xxxx :0 86: MOV TEMP[7].x, CONST[16].xxxx 87: ADD TEMP[4].x, TEMP[4].zzzz, IMM[1].yyyy 88: MUL TEMP[4].x, TEMP[4].xxxx, CONST[16].xxxx 89: MOV TEMP[11].x, TEMP[4].xxxx 90: FLR TEMP[12].x, TEMP[4].xxxx 91: MUL TEMP[12].x, TEMP[12].xxxx, CONST[16].xxxx 92: MOV TEMP[9].x, TEMP[12].xxxx 93: FRC TEMP[4].x, TEMP[4].xxxx 94: FRC TEMP[13].x, TEMP[12].xxxx 95: MOV TEMP[4].y, TEMP[13].xxxx 96: FLR TEMP[12].x, TEMP[12].xxxx 97: ADD TEMP[12].x, TEMP[12].xxxx, IMM[1].zzzz 98: MOV TEMP[4].z, TEMP[12].xxxx 99: MOV TEMP[4].xyz, TEMP[4].xyzx 100: ELSE :0 101: FRC TEMP[11].x, TEMP[11].xxxx 102: FRC TEMP[12].x, TEMP[9].xxxx 103: MOV TEMP[11].y, TEMP[12].xxxx 104: FLR TEMP[9].x, TEMP[9].xxxx 105: MOV TEMP[11].z, TEMP[9].xxxx 106: MOV TEMP[4].xyz, TEMP[11].xyzx 107: ENDIF 108: ADD TEMP[9].xyz, IN[3].xyzz, -CONST[0].xyzz 109: DP3 TEMP[9].x, TEMP[9].xyzz, TEMP[9].xyzz 110: MUL TEMP[9].x, CONST[21].xxxx, TEMP[9].xxxx 111: LG2 TEMP[9].x, TEMP[9].xxxx 112: MUL TEMP[9].x, TEMP[9].xxxx, IMM[1].wwww 113: MUL TEMP[9].x, TEMP[9].xxxx, CONST[20].xxxx 114: MOV TEMP[11].xy, IN[3].xyxx 115: MOV TEMP[12].x, IMM[2].xxxx 116: FSNE TEMP[13].x, CONST[15].xxxx, TEMP[5].xxxx 117: UIF TEMP[13].xxxx :0 118: MOV TEMP[12].x, IMM[2].yyyy 119: RCP TEMP[13].x, CONST[18].xxxx 120: MUL TEMP[11].xy, IN[3].xyyy, TEMP[13].xxxx 121: ELSE :0 122: RCP TEMP[13].x, CONST[17].xxxx 123: MUL TEMP[11].xy, TEMP[11].xyyy, TEMP[13].xxxx 124: ENDIF 125: FRC TEMP[11].xy, TEMP[11].xyyy 126: MUL TEMP[13].x, CONST[19].xxxx, IMM[2].wwww 127: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[12].xxxx 128: ADD TEMP[13].x, IMM[2].zzzz, -TEMP[13].xxxx 129: MUL TEMP[12].x, TEMP[12].xxxx, CONST[19].xxxx 130: MAD TEMP[11].xy, TEMP[11].xyyy, TEMP[13].xxxx, TEMP[12].xxxx 131: MAD TEMP[11].xy, TEMP[11].xyyy, TEMP[5].xxxx, TEMP[8].xyyy 132: MOV TEMP[12].xy, TEMP[11].xyyy 133: MOV TEMP[12].w, TEMP[9].xxxx 134: TXL TEMP[12], TEMP[12], SAMP[8], 2D 135: FSEQ TEMP[13].x, TEMP[8].zzzz, IMM[1].zzzz 136: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz 137: MOV TEMP[14].xy, TEMP[11].xyyy 138: MOV TEMP[14].w, TEMP[9].xxxx 139: TXL TEMP[14], TEMP[14], SAMP[6], 2D 140: FSEQ TEMP[15].x, TEMP[8].zzzz, IMM[3].xxxx 141: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz 142: MOV TEMP[16].xy, TEMP[11].xyyy 143: MOV TEMP[16].w, TEMP[9].xxxx 144: TXL TEMP[16], TEMP[16], SAMP[4], 2D 145: FSEQ TEMP[17].x, TEMP[8].zzzz, IMM[2].wwww 146: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz 147: MOV TEMP[18].xy, TEMP[11].xyyy 148: MOV TEMP[18].w, TEMP[9].xxxx 149: TXL TEMP[18], TEMP[18], SAMP[2], 2D 150: FSEQ TEMP[19].x, TEMP[8].zzzz, IMM[2].zzzz 151: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz 152: MOV TEMP[11].xy, TEMP[11].xyyy 153: MOV TEMP[11].w, TEMP[9].xxxx 154: TXL TEMP[11], TEMP[11], SAMP[0], 2D 155: FSEQ TEMP[20].x, TEMP[8].zzzz, IMM[3].yyyy 156: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz 157: MUL TEMP[11], TEMP[11], TEMP[20].xxxx 158: MAD TEMP[11], TEMP[18], TEMP[19].xxxx, TEMP[11] 159: MAD TEMP[11], TEMP[16], TEMP[17].xxxx, TEMP[11] 160: MAD TEMP[11], TEMP[14], TEMP[15].xxxx, TEMP[11] 161: MAD TEMP[11], TEMP[12], TEMP[13].xxxx, TEMP[11] 162: MOV TEMP[12].xy, IN[3].zyzz 163: MOV TEMP[13].x, IMM[2].xxxx 164: FSNE TEMP[14].x, CONST[15].xxxx, TEMP[5].xxxx 165: UIF TEMP[14].xxxx :0 166: MOV TEMP[13].x, IMM[2].yyyy 167: RCP TEMP[14].x, CONST[18].xxxx 168: MUL TEMP[12].xy, IN[3].zyyy, TEMP[14].xxxx 169: ELSE :0 170: RCP TEMP[14].x, CONST[17].xxxx 171: MUL TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx 172: ENDIF 173: FRC TEMP[12].xy, TEMP[12].xyyy 174: MUL TEMP[14].x, CONST[19].xxxx, IMM[2].wwww 175: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[13].xxxx 176: ADD TEMP[14].x, IMM[2].zzzz, -TEMP[14].xxxx 177: MUL TEMP[13].x, TEMP[13].xxxx, CONST[19].xxxx 178: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx, TEMP[13].xxxx 179: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[5].xxxx, TEMP[8].xyyy 180: MOV TEMP[13].xy, TEMP[12].xyyy 181: MOV TEMP[13].w, TEMP[9].xxxx 182: TXL TEMP[13], TEMP[13], SAMP[8], 2D 183: FSEQ TEMP[14].x, TEMP[8].zzzz, IMM[1].zzzz 184: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz 185: MOV TEMP[15].xy, TEMP[12].xyyy 186: MOV TEMP[15].w, TEMP[9].xxxx 187: TXL TEMP[15], TEMP[15], SAMP[6], 2D 188: FSEQ TEMP[16].x, TEMP[8].zzzz, IMM[3].xxxx 189: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz 190: MOV TEMP[17].xy, TEMP[12].xyyy 191: MOV TEMP[17].w, TEMP[9].xxxx 192: TXL TEMP[17], TEMP[17], SAMP[4], 2D 193: FSEQ TEMP[18].x, TEMP[8].zzzz, IMM[2].wwww 194: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz 195: MOV TEMP[19].xy, TEMP[12].xyyy 196: MOV TEMP[19].w, TEMP[9].xxxx 197: TXL TEMP[19], TEMP[19], SAMP[2], 2D 198: FSEQ TEMP[20].x, TEMP[8].zzzz, IMM[2].zzzz 199: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz 200: MOV TEMP[12].xy, TEMP[12].xyyy 201: MOV TEMP[12].w, TEMP[9].xxxx 202: TXL TEMP[12], TEMP[12], SAMP[0], 2D 203: FSEQ TEMP[21].x, TEMP[8].zzzz, IMM[3].yyyy 204: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz 205: MUL TEMP[12], TEMP[12], TEMP[21].xxxx 206: MAD TEMP[12], TEMP[19], TEMP[20].xxxx, TEMP[12] 207: MAD TEMP[12], TEMP[17], TEMP[18].xxxx, TEMP[12] 208: MAD TEMP[12], TEMP[15], TEMP[16].xxxx, TEMP[12] 209: MAD TEMP[12], TEMP[13], TEMP[14].xxxx, TEMP[12] 210: MOV TEMP[13].xy, IN[3].zxzz 211: MOV TEMP[14].x, IMM[2].xxxx 212: FSNE TEMP[15].x, CONST[15].xxxx, TEMP[5].xxxx 213: UIF TEMP[15].xxxx :0 214: MOV TEMP[14].x, IMM[2].yyyy 215: RCP TEMP[15].x, CONST[18].xxxx 216: MUL TEMP[13].xy, IN[3].zxxx, TEMP[15].xxxx 217: ELSE :0 218: RCP TEMP[15].x, CONST[17].xxxx 219: MUL TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx 220: ENDIF 221: FRC TEMP[13].xy, TEMP[13].xyyy 222: MUL TEMP[15].x, CONST[19].xxxx, IMM[2].wwww 223: MUL TEMP[15].x, TEMP[15].xxxx, TEMP[14].xxxx 224: ADD TEMP[15].x, IMM[2].zzzz, -TEMP[15].xxxx 225: MUL TEMP[14].x, TEMP[14].xxxx, CONST[19].xxxx 226: MAD TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx, TEMP[14].xxxx 227: MAD TEMP[13].xy, TEMP[13].xyyy, TEMP[5].xxxx, TEMP[8].xyyy 228: MOV TEMP[14].xy, TEMP[13].xyyy 229: MOV TEMP[14].w, TEMP[9].xxxx 230: TXL TEMP[14], TEMP[14], SAMP[8], 2D 231: FSEQ TEMP[15].x, TEMP[8].zzzz, IMM[1].zzzz 232: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz 233: MOV TEMP[16].xy, TEMP[13].xyyy 234: MOV TEMP[16].w, TEMP[9].xxxx 235: TXL TEMP[16], TEMP[16], SAMP[6], 2D 236: FSEQ TEMP[17].x, TEMP[8].zzzz, IMM[3].xxxx 237: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz 238: MOV TEMP[18].xy, TEMP[13].xyyy 239: MOV TEMP[18].w, TEMP[9].xxxx 240: TXL TEMP[18], TEMP[18], SAMP[4], 2D 241: FSEQ TEMP[19].x, TEMP[8].zzzz, IMM[2].wwww 242: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz 243: MOV TEMP[20].xy, TEMP[13].xyyy 244: MOV TEMP[20].w, TEMP[9].xxxx 245: TXL TEMP[20], TEMP[20], SAMP[2], 2D 246: FSEQ TEMP[21].x, TEMP[8].zzzz, IMM[2].zzzz 247: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz 248: MOV TEMP[13].xy, TEMP[13].xyyy 249: MOV TEMP[13].w, TEMP[9].xxxx 250: TXL TEMP[13], TEMP[13], SAMP[0], 2D 251: FSEQ TEMP[22].x, TEMP[8].zzzz, IMM[3].yyyy 252: AND TEMP[22].x, TEMP[22].xxxx, IMM[2].zzzz 253: MUL TEMP[13], TEMP[13], TEMP[22].xxxx 254: MAD TEMP[13], TEMP[20], TEMP[21].xxxx, TEMP[13] 255: MAD TEMP[13], TEMP[18], TEMP[19].xxxx, TEMP[13] 256: MAD TEMP[13], TEMP[16], TEMP[17].xxxx, TEMP[13] 257: MAD TEMP[13], TEMP[14], TEMP[15].xxxx, TEMP[13] 258: MOV TEMP[14].xy, IN[3].xyxx 259: MOV TEMP[15].x, IMM[2].xxxx 260: FSNE TEMP[16].x, CONST[15].xxxx, TEMP[6].xxxx 261: UIF TEMP[16].xxxx :0 262: MOV TEMP[15].x, IMM[2].yyyy 263: RCP TEMP[16].x, CONST[18].xxxx 264: MUL TEMP[14].xy, IN[3].xyyy, TEMP[16].xxxx 265: ELSE :0 266: RCP TEMP[16].x, CONST[17].xxxx 267: MUL TEMP[14].xy, TEMP[14].xyyy, TEMP[16].xxxx 268: ENDIF 269: FRC TEMP[14].xy, TEMP[14].xyyy 270: MUL TEMP[16].x, CONST[19].xxxx, IMM[2].wwww 271: MUL TEMP[16].x, TEMP[16].xxxx, TEMP[15].xxxx 272: ADD TEMP[16].x, IMM[2].zzzz, -TEMP[16].xxxx 273: MUL TEMP[15].x, TEMP[15].xxxx, CONST[19].xxxx 274: MAD TEMP[14].xy, TEMP[14].xyyy, TEMP[16].xxxx, TEMP[15].xxxx 275: MAD TEMP[14].xy, TEMP[14].xyyy, TEMP[6].xxxx, TEMP[10].xyyy 276: MOV TEMP[15].xy, TEMP[14].xyyy 277: MOV TEMP[15].w, TEMP[9].xxxx 278: TXL TEMP[15], TEMP[15], SAMP[8], 2D 279: FSEQ TEMP[16].x, TEMP[10].zzzz, IMM[1].zzzz 280: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz 281: MOV TEMP[17].xy, TEMP[14].xyyy 282: MOV TEMP[17].w, TEMP[9].xxxx 283: TXL TEMP[17], TEMP[17], SAMP[6], 2D 284: FSEQ TEMP[18].x, TEMP[10].zzzz, IMM[3].xxxx 285: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz 286: MOV TEMP[19].xy, TEMP[14].xyyy 287: MOV TEMP[19].w, TEMP[9].xxxx 288: TXL TEMP[19], TEMP[19], SAMP[4], 2D 289: FSEQ TEMP[20].x, TEMP[10].zzzz, IMM[2].wwww 290: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz 291: MOV TEMP[21].xy, TEMP[14].xyyy 292: MOV TEMP[21].w, TEMP[9].xxxx 293: TXL TEMP[21], TEMP[21], SAMP[2], 2D 294: FSEQ TEMP[22].x, TEMP[10].zzzz, IMM[2].zzzz 295: AND TEMP[22].x, TEMP[22].xxxx, IMM[2].zzzz 296: MOV TEMP[14].xy, TEMP[14].xyyy 297: MOV TEMP[14].w, TEMP[9].xxxx 298: TXL TEMP[14], TEMP[14], SAMP[0], 2D 299: FSEQ TEMP[23].x, TEMP[10].zzzz, IMM[3].yyyy 300: AND TEMP[23].x, TEMP[23].xxxx, IMM[2].zzzz 301: MUL TEMP[14], TEMP[14], TEMP[23].xxxx 302: MAD TEMP[14], TEMP[21], TEMP[22].xxxx, TEMP[14] 303: MAD TEMP[14], TEMP[19], TEMP[20].xxxx, TEMP[14] 304: MAD TEMP[14], TEMP[17], TEMP[18].xxxx, TEMP[14] 305: MAD TEMP[14], TEMP[15], TEMP[16].xxxx, TEMP[14] 306: MOV TEMP[15].xy, IN[3].zyzz 307: MOV TEMP[16].x, IMM[2].xxxx 308: FSNE TEMP[17].x, CONST[15].xxxx, TEMP[6].xxxx 309: UIF TEMP[17].xxxx :0 310: MOV TEMP[16].x, IMM[2].yyyy 311: RCP TEMP[17].x, CONST[18].xxxx 312: MUL TEMP[15].xy, IN[3].zyyy, TEMP[17].xxxx 313: ELSE :0 314: RCP TEMP[17].x, CONST[17].xxxx 315: MUL TEMP[15].xy, TEMP[15].xyyy, TEMP[17].xxxx 316: ENDIF 317: FRC TEMP[15].xy, TEMP[15].xyyy 318: MUL TEMP[17].x, CONST[19].xxxx, IMM[2].wwww 319: MUL TEMP[17].x, TEMP[17].xxxx, TEMP[16].xxxx 320: ADD TEMP[17].x, IMM[2].zzzz, -TEMP[17].xxxx 321: MUL TEMP[16].x, TEMP[16].xxxx, CONST[19].xxxx 322: MAD TEMP[15].xy, TEMP[15].xyyy, TEMP[17].xxxx, TEMP[16].xxxx 323: MAD TEMP[15].xy, TEMP[15].xyyy, TEMP[6].xxxx, TEMP[10].xyyy 324: MOV TEMP[16].xy, TEMP[15].xyyy 325: MOV TEMP[16].w, TEMP[9].xxxx 326: TXL TEMP[16], TEMP[16], SAMP[8], 2D 327: FSEQ TEMP[17].x, TEMP[10].zzzz, IMM[1].zzzz 328: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz 329: MOV TEMP[18].xy, TEMP[15].xyyy 330: MOV TEMP[18].w, TEMP[9].xxxx 331: TXL TEMP[18], TEMP[18], SAMP[6], 2D 332: FSEQ TEMP[19].x, TEMP[10].zzzz, IMM[3].xxxx 333: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz 334: MOV TEMP[20].xy, TEMP[15].xyyy 335: MOV TEMP[20].w, TEMP[9].xxxx 336: TXL TEMP[20], TEMP[20], SAMP[4], 2D 337: FSEQ TEMP[21].x, TEMP[10].zzzz, IMM[2].wwww 338: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz 339: MOV TEMP[22].xy, TEMP[15].xyyy 340: MOV TEMP[22].w, TEMP[9].xxxx 341: TXL TEMP[22], TEMP[22], SAMP[2], 2D 342: FSEQ TEMP[23].x, TEMP[10].zzzz, IMM[2].zzzz 343: AND TEMP[23].x, TEMP[23].xxxx, IMM[2].zzzz 344: MOV TEMP[15].xy, TEMP[15].xyyy 345: MOV TEMP[15].w, TEMP[9].xxxx 346: TXL TEMP[15], TEMP[15], SAMP[0], 2D 347: FSEQ TEMP[24].x, TEMP[10].zzzz, IMM[3].yyyy 348: AND TEMP[24].x, TEMP[24].xxxx, IMM[2].zzzz 349: MUL TEMP[15], TEMP[15], TEMP[24].xxxx 350: MAD TEMP[15], TEMP[22], TEMP[23].xxxx, TEMP[15] 351: MAD TEMP[15], TEMP[20], TEMP[21].xxxx, TEMP[15] 352: MAD TEMP[15], TEMP[18], TEMP[19].xxxx, TEMP[15] 353: MAD TEMP[15], TEMP[16], TEMP[17].xxxx, TEMP[15] 354: MOV TEMP[16].xy, IN[3].zxzz 355: MOV TEMP[17].x, IMM[2].xxxx 356: FSNE TEMP[18].x, CONST[15].xxxx, TEMP[6].xxxx 357: UIF TEMP[18].xxxx :0 358: MOV TEMP[17].x, IMM[2].yyyy 359: RCP TEMP[18].x, CONST[18].xxxx 360: MUL TEMP[16].xy, IN[3].zxxx, TEMP[18].xxxx 361: ELSE :0 362: RCP TEMP[18].x, CONST[17].xxxx 363: MUL TEMP[16].xy, TEMP[16].xyyy, TEMP[18].xxxx 364: ENDIF 365: FRC TEMP[16].xy, TEMP[16].xyyy 366: MUL TEMP[18].x, CONST[19].xxxx, IMM[2].wwww 367: MUL TEMP[18].x, TEMP[18].xxxx, TEMP[17].xxxx 368: ADD TEMP[18].x, IMM[2].zzzz, -TEMP[18].xxxx 369: MUL TEMP[17].x, TEMP[17].xxxx, CONST[19].xxxx 370: MAD TEMP[16].xy, TEMP[16].xyyy, TEMP[18].xxxx, TEMP[17].xxxx 371: MAD TEMP[16].xy, TEMP[16].xyyy, TEMP[6].xxxx, TEMP[10].xyyy 372: MOV TEMP[17].xy, TEMP[16].xyyy 373: MOV TEMP[17].w, TEMP[9].xxxx 374: TXL TEMP[17], TEMP[17], SAMP[8], 2D 375: FSEQ TEMP[18].x, TEMP[10].zzzz, IMM[1].zzzz 376: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz 377: MOV TEMP[19].xy, TEMP[16].xyyy 378: MOV TEMP[19].w, TEMP[9].xxxx 379: TXL TEMP[19], TEMP[19], SAMP[6], 2D 380: FSEQ TEMP[20].x, TEMP[10].zzzz, IMM[3].xxxx 381: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz 382: MOV TEMP[21].xy, TEMP[16].xyyy 383: MOV TEMP[21].w, TEMP[9].xxxx 384: TXL TEMP[21], TEMP[21], SAMP[4], 2D 385: FSEQ TEMP[22].x, TEMP[10].zzzz, IMM[2].wwww 386: AND TEMP[22].x, TEMP[22].xxxx, IMM[2].zzzz 387: MOV TEMP[23].xy, TEMP[16].xyyy 388: MOV TEMP[23].w, TEMP[9].xxxx 389: TXL TEMP[23], TEMP[23], SAMP[2], 2D 390: FSEQ TEMP[24].x, TEMP[10].zzzz, IMM[2].zzzz 391: AND TEMP[24].x, TEMP[24].xxxx, IMM[2].zzzz 392: MOV TEMP[16].xy, TEMP[16].xyyy 393: MOV TEMP[16].w, TEMP[9].xxxx 394: TXL TEMP[16], TEMP[16], SAMP[0], 2D 395: FSEQ TEMP[25].x, TEMP[10].zzzz, IMM[3].yyyy 396: AND TEMP[25].x, TEMP[25].xxxx, IMM[2].zzzz 397: MUL TEMP[16], TEMP[16], TEMP[25].xxxx 398: MAD TEMP[16], TEMP[23], TEMP[24].xxxx, TEMP[16] 399: MAD TEMP[16], TEMP[21], TEMP[22].xxxx, TEMP[16] 400: MAD TEMP[16], TEMP[19], TEMP[20].xxxx, TEMP[16] 401: MAD TEMP[16], TEMP[17], TEMP[18].xxxx, TEMP[16] 402: MOV TEMP[17].xy, IN[3].xyxx 403: MOV TEMP[18].x, IMM[2].xxxx 404: FSNE TEMP[19].x, CONST[15].xxxx, TEMP[7].xxxx 405: UIF TEMP[19].xxxx :0 406: MOV TEMP[18].x, IMM[2].yyyy 407: RCP TEMP[19].x, CONST[18].xxxx 408: MUL TEMP[17].xy, IN[3].xyyy, TEMP[19].xxxx 409: ELSE :0 410: RCP TEMP[19].x, CONST[17].xxxx 411: MUL TEMP[17].xy, TEMP[17].xyyy, TEMP[19].xxxx 412: ENDIF 413: FRC TEMP[17].xy, TEMP[17].xyyy 414: MUL TEMP[19].x, CONST[19].xxxx, IMM[2].wwww 415: MUL TEMP[19].x, TEMP[19].xxxx, TEMP[18].xxxx 416: ADD TEMP[19].x, IMM[2].zzzz, -TEMP[19].xxxx 417: MUL TEMP[18].x, TEMP[18].xxxx, CONST[19].xxxx 418: MAD TEMP[17].xy, TEMP[17].xyyy, TEMP[19].xxxx, TEMP[18].xxxx 419: MAD TEMP[17].xy, TEMP[17].xyyy, TEMP[7].xxxx, TEMP[4].xyyy 420: MOV TEMP[18].xy, TEMP[17].xyyy 421: MOV TEMP[18].w, TEMP[9].xxxx 422: TXL TEMP[18], TEMP[18], SAMP[8], 2D 423: FSEQ TEMP[19].x, TEMP[4].zzzz, IMM[1].zzzz 424: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz 425: MOV TEMP[20].xy, TEMP[17].xyyy 426: MOV TEMP[20].w, TEMP[9].xxxx 427: TXL TEMP[20], TEMP[20], SAMP[6], 2D 428: FSEQ TEMP[21].x, TEMP[4].zzzz, IMM[3].xxxx 429: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz 430: MOV TEMP[22].xy, TEMP[17].xyyy 431: MOV TEMP[22].w, TEMP[9].xxxx 432: TXL TEMP[22], TEMP[22], SAMP[4], 2D 433: FSEQ TEMP[23].x, TEMP[4].zzzz, IMM[2].wwww 434: AND TEMP[23].x, TEMP[23].xxxx, IMM[2].zzzz 435: MOV TEMP[24].xy, TEMP[17].xyyy 436: MOV TEMP[24].w, TEMP[9].xxxx 437: TXL TEMP[24], TEMP[24], SAMP[2], 2D 438: FSEQ TEMP[25].x, TEMP[4].zzzz, IMM[2].zzzz 439: AND TEMP[25].x, TEMP[25].xxxx, IMM[2].zzzz 440: MOV TEMP[17].xy, TEMP[17].xyyy 441: MOV TEMP[17].w, TEMP[9].xxxx 442: TXL TEMP[17], TEMP[17], SAMP[0], 2D 443: FSEQ TEMP[26].x, TEMP[4].zzzz, IMM[3].yyyy 444: AND TEMP[26].x, TEMP[26].xxxx, IMM[2].zzzz 445: MUL TEMP[17], TEMP[17], TEMP[26].xxxx 446: MAD TEMP[17], TEMP[24], TEMP[25].xxxx, TEMP[17] 447: MAD TEMP[17], TEMP[22], TEMP[23].xxxx, TEMP[17] 448: MAD TEMP[17], TEMP[20], TEMP[21].xxxx, TEMP[17] 449: MAD TEMP[17], TEMP[18], TEMP[19].xxxx, TEMP[17] 450: MOV TEMP[18].xy, IN[3].zyzz 451: MOV TEMP[19].x, IMM[2].xxxx 452: FSNE TEMP[20].x, CONST[15].xxxx, TEMP[7].xxxx 453: UIF TEMP[20].xxxx :0 454: MOV TEMP[19].x, IMM[2].yyyy 455: RCP TEMP[20].x, CONST[18].xxxx 456: MUL TEMP[18].xy, IN[3].zyyy, TEMP[20].xxxx 457: ELSE :0 458: RCP TEMP[20].x, CONST[17].xxxx 459: MUL TEMP[18].xy, TEMP[18].xyyy, TEMP[20].xxxx 460: ENDIF 461: FRC TEMP[18].xy, TEMP[18].xyyy 462: MUL TEMP[20].x, CONST[19].xxxx, IMM[2].wwww 463: MUL TEMP[20].x, TEMP[20].xxxx, TEMP[19].xxxx 464: ADD TEMP[20].x, IMM[2].zzzz, -TEMP[20].xxxx 465: MUL TEMP[19].x, TEMP[19].xxxx, CONST[19].xxxx 466: MAD TEMP[18].xy, TEMP[18].xyyy, TEMP[20].xxxx, TEMP[19].xxxx 467: MAD TEMP[18].xy, TEMP[18].xyyy, TEMP[7].xxxx, TEMP[4].xyyy 468: MOV TEMP[19].xy, TEMP[18].xyyy 469: MOV TEMP[19].w, TEMP[9].xxxx 470: TXL TEMP[19], TEMP[19], SAMP[8], 2D 471: FSEQ TEMP[20].x, TEMP[4].zzzz, IMM[1].zzzz 472: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz 473: MOV TEMP[21].xy, TEMP[18].xyyy 474: MOV TEMP[21].w, TEMP[9].xxxx 475: TXL TEMP[21], TEMP[21], SAMP[6], 2D 476: FSEQ TEMP[22].x, TEMP[4].zzzz, IMM[3].xxxx 477: AND TEMP[22].x, TEMP[22].xxxx, IMM[2].zzzz 478: MOV TEMP[23].xy, TEMP[18].xyyy 479: MOV TEMP[23].w, TEMP[9].xxxx 480: TXL TEMP[23], TEMP[23], SAMP[4], 2D 481: FSEQ TEMP[24].x, TEMP[4].zzzz, IMM[2].wwww 482: AND TEMP[24].x, TEMP[24].xxxx, IMM[2].zzzz 483: MOV TEMP[25].xy, TEMP[18].xyyy 484: MOV TEMP[25].w, TEMP[9].xxxx 485: TXL TEMP[25], TEMP[25], SAMP[2], 2D 486: FSEQ TEMP[26].x, TEMP[4].zzzz, IMM[2].zzzz 487: AND TEMP[26].x, TEMP[26].xxxx, IMM[2].zzzz 488: MOV TEMP[18].xy, TEMP[18].xyyy 489: MOV TEMP[18].w, TEMP[9].xxxx 490: TXL TEMP[18], TEMP[18], SAMP[0], 2D 491: FSEQ TEMP[27].x, TEMP[4].zzzz, IMM[3].yyyy 492: AND TEMP[27].x, TEMP[27].xxxx, IMM[2].zzzz 493: MUL TEMP[18], TEMP[18], TEMP[27].xxxx 494: MAD TEMP[18], TEMP[25], TEMP[26].xxxx, TEMP[18] 495: MAD TEMP[18], TEMP[23], TEMP[24].xxxx, TEMP[18] 496: MAD TEMP[18], TEMP[21], TEMP[22].xxxx, TEMP[18] 497: MAD TEMP[18], TEMP[19], TEMP[20].xxxx, TEMP[18] 498: MOV TEMP[19].xy, IN[3].zxzz 499: MOV TEMP[20].x, IMM[2].xxxx 500: FSNE TEMP[21].x, CONST[15].xxxx, TEMP[7].xxxx 501: UIF TEMP[21].xxxx :0 502: MOV TEMP[20].x, IMM[2].yyyy 503: RCP TEMP[21].x, CONST[18].xxxx 504: MUL TEMP[19].xy, IN[3].zxxx, TEMP[21].xxxx 505: ELSE :0 506: RCP TEMP[21].x, CONST[17].xxxx 507: MUL TEMP[19].xy, TEMP[19].xyyy, TEMP[21].xxxx 508: ENDIF 509: FRC TEMP[19].xy, TEMP[19].xyyy 510: MUL TEMP[21].x, CONST[19].xxxx, IMM[2].wwww 511: MUL TEMP[21].x, TEMP[21].xxxx, TEMP[20].xxxx 512: ADD TEMP[21].x, IMM[2].zzzz, -TEMP[21].xxxx 513: MUL TEMP[20].x, TEMP[20].xxxx, CONST[19].xxxx 514: MAD TEMP[19].xy, TEMP[19].xyyy, TEMP[21].xxxx, TEMP[20].xxxx 515: MAD TEMP[19].xy, TEMP[19].xyyy, TEMP[7].xxxx, TEMP[4].xyyy 516: MOV TEMP[20].xy, TEMP[19].xyyy 517: MOV TEMP[20].w, TEMP[9].xxxx 518: TXL TEMP[20], TEMP[20], SAMP[8], 2D 519: FSEQ TEMP[21].x, TEMP[4].zzzz, IMM[1].zzzz 520: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz 521: MOV TEMP[22].xy, TEMP[19].xyyy 522: MOV TEMP[22].w, TEMP[9].xxxx 523: TXL TEMP[22], TEMP[22], SAMP[6], 2D 524: FSEQ TEMP[23].x, TEMP[4].zzzz, IMM[3].xxxx 525: AND TEMP[23].x, TEMP[23].xxxx, IMM[2].zzzz 526: MOV TEMP[24].xy, TEMP[19].xyyy 527: MOV TEMP[24].w, TEMP[9].xxxx 528: TXL TEMP[24], TEMP[24], SAMP[4], 2D 529: FSEQ TEMP[25].x, TEMP[4].zzzz, IMM[2].wwww 530: AND TEMP[25].x, TEMP[25].xxxx, IMM[2].zzzz 531: MOV TEMP[26].xy, TEMP[19].xyyy 532: MOV TEMP[26].w, TEMP[9].xxxx 533: TXL TEMP[26], TEMP[26], SAMP[2], 2D 534: FSEQ TEMP[27].x, TEMP[4].zzzz, IMM[2].zzzz 535: AND TEMP[27].x, TEMP[27].xxxx, IMM[2].zzzz 536: MOV TEMP[19].xy, TEMP[19].xyyy 537: MOV TEMP[19].w, TEMP[9].xxxx 538: TXL TEMP[19], TEMP[19], SAMP[0], 2D 539: FSEQ TEMP[28].x, TEMP[4].zzzz, IMM[3].yyyy 540: AND TEMP[28].x, TEMP[28].xxxx, IMM[2].zzzz 541: MUL TEMP[19], TEMP[19], TEMP[28].xxxx 542: MAD TEMP[19], TEMP[26], TEMP[27].xxxx, TEMP[19] 543: MAD TEMP[19], TEMP[24], TEMP[25].xxxx, TEMP[19] 544: MAD TEMP[19], TEMP[22], TEMP[23].xxxx, TEMP[19] 545: MAD TEMP[19], TEMP[20], TEMP[21].xxxx, TEMP[19] 546: MUL TEMP[17], TEMP[17], TEMP[3].zzzz 547: MAD TEMP[17], TEMP[18], TEMP[3].xxxx, TEMP[17] 548: MAD TEMP[17], TEMP[19], TEMP[3].yyyy, TEMP[17] 549: MUL TEMP[14], TEMP[14], TEMP[3].zzzz 550: MAD TEMP[14], TEMP[15], TEMP[3].xxxx, TEMP[14] 551: MAD TEMP[14], TEMP[16], TEMP[3].yyyy, TEMP[14] 552: MUL TEMP[11], TEMP[11], TEMP[3].zzzz 553: MAD TEMP[11], TEMP[12], TEMP[3].xxxx, TEMP[11] 554: MAD TEMP[11], TEMP[13], TEMP[3].yyyy, TEMP[11] 555: MUL TEMP[11], IN[1].xxxx, TEMP[11] 556: MAD TEMP[11], IN[1].yyyy, TEMP[14], TEMP[11] 557: MAD TEMP[11].xyz, IN[1].zzzz, TEMP[17], TEMP[11] 558: MOV TEMP[12].xy, IN[3].zyzz 559: MOV TEMP[13].x, IMM[2].xxxx 560: FSNE TEMP[14].x, CONST[15].xxxx, TEMP[5].xxxx 561: UIF TEMP[14].xxxx :0 562: MOV TEMP[13].x, IMM[2].yyyy 563: RCP TEMP[14].x, CONST[18].xxxx 564: MUL TEMP[12].xy, IN[3].zyyy, TEMP[14].xxxx 565: ELSE :0 566: RCP TEMP[14].x, CONST[17].xxxx 567: MUL TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx 568: ENDIF 569: FRC TEMP[12].xy, TEMP[12].xyyy 570: MUL TEMP[14].x, CONST[19].xxxx, IMM[2].wwww 571: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[13].xxxx 572: ADD TEMP[14].x, IMM[2].zzzz, -TEMP[14].xxxx 573: MUL TEMP[13].x, TEMP[13].xxxx, CONST[19].xxxx 574: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx, TEMP[13].xxxx 575: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[5].xxxx, TEMP[8].xyyy 576: MOV TEMP[13].xy, TEMP[12].xyyy 577: MOV TEMP[13].w, TEMP[9].xxxx 578: TXL TEMP[13], TEMP[13], SAMP[9], 2D 579: FSEQ TEMP[14].x, TEMP[8].zzzz, IMM[1].zzzz 580: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz 581: MOV TEMP[15].xy, TEMP[12].xyyy 582: MOV TEMP[15].w, TEMP[9].xxxx 583: TXL TEMP[15], TEMP[15], SAMP[7], 2D 584: FSEQ TEMP[16].x, TEMP[8].zzzz, IMM[3].xxxx 585: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz 586: MOV TEMP[17].xy, TEMP[12].xyyy 587: MOV TEMP[17].w, TEMP[9].xxxx 588: TXL TEMP[17], TEMP[17], SAMP[5], 2D 589: FSEQ TEMP[18].x, TEMP[8].zzzz, IMM[2].wwww 590: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz 591: MOV TEMP[19].xy, TEMP[12].xyyy 592: MOV TEMP[19].w, TEMP[9].xxxx 593: TXL TEMP[19], TEMP[19], SAMP[3], 2D 594: FSEQ TEMP[20].x, TEMP[8].zzzz, IMM[2].zzzz 595: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz 596: MOV TEMP[12].xy, TEMP[12].xyyy 597: MOV TEMP[12].w, TEMP[9].xxxx 598: TXL TEMP[12], TEMP[12], SAMP[1], 2D 599: FSEQ TEMP[21].x, TEMP[8].zzzz, IMM[3].yyyy 600: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz 601: MUL TEMP[12], TEMP[12], TEMP[21].xxxx 602: MAD TEMP[12], TEMP[19], TEMP[20].xxxx, TEMP[12] 603: MAD TEMP[12], TEMP[17], TEMP[18].xxxx, TEMP[12] 604: MAD TEMP[12], TEMP[15], TEMP[16].xxxx, TEMP[12] 605: MAD TEMP[12].yw, TEMP[13], TEMP[14].xxxx, TEMP[12] 606: MAD TEMP[12].xy, TEMP[12].wyyy, IMM[2].wwww, IMM[3].zzzz 607: DP2 TEMP[13].x, TEMP[12].xyyy, TEMP[12].xyyy 608: MOV_SAT TEMP[29].x, TEMP[13].xxxx 609: MOV TEMP[13].xy, IN[3].zxzz 610: MOV TEMP[14].x, IMM[2].xxxx 611: FSNE TEMP[15].x, CONST[15].xxxx, TEMP[5].xxxx 612: UIF TEMP[15].xxxx :0 613: MOV TEMP[14].x, IMM[2].yyyy 614: RCP TEMP[15].x, CONST[18].xxxx 615: MUL TEMP[13].xy, IN[3].zxxx, TEMP[15].xxxx 616: ELSE :0 617: RCP TEMP[15].x, CONST[17].xxxx 618: MUL TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx 619: ENDIF 620: FRC TEMP[13].xy, TEMP[13].xyyy 621: MUL TEMP[15].x, CONST[19].xxxx, IMM[2].wwww 622: MUL TEMP[15].x, TEMP[15].xxxx, TEMP[14].xxxx 623: ADD TEMP[15].x, IMM[2].zzzz, -TEMP[15].xxxx 624: MUL TEMP[14].x, TEMP[14].xxxx, CONST[19].xxxx 625: MAD TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx, TEMP[14].xxxx 626: MAD TEMP[13].xy, TEMP[13].xyyy, TEMP[5].xxxx, TEMP[8].xyyy 627: MOV TEMP[14].xy, TEMP[13].xyyy 628: MOV TEMP[14].w, TEMP[9].xxxx 629: TXL TEMP[14], TEMP[14], SAMP[9], 2D 630: FSEQ TEMP[15].x, TEMP[8].zzzz, IMM[1].zzzz 631: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz 632: MOV TEMP[16].xy, TEMP[13].xyyy 633: MOV TEMP[16].w, TEMP[9].xxxx 634: TXL TEMP[16], TEMP[16], SAMP[7], 2D 635: FSEQ TEMP[17].x, TEMP[8].zzzz, IMM[3].xxxx 636: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz 637: MOV TEMP[18].xy, TEMP[13].xyyy 638: MOV TEMP[18].w, TEMP[9].xxxx 639: TXL TEMP[18], TEMP[18], SAMP[5], 2D 640: FSEQ TEMP[19].x, TEMP[8].zzzz, IMM[2].wwww 641: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz 642: MOV TEMP[20].xy, TEMP[13].xyyy 643: MOV TEMP[20].w, TEMP[9].xxxx 644: TXL TEMP[20], TEMP[20], SAMP[3], 2D 645: FSEQ TEMP[21].x, TEMP[8].zzzz, IMM[2].zzzz 646: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz 647: MOV TEMP[13].xy, TEMP[13].xyyy 648: MOV TEMP[13].w, TEMP[9].xxxx 649: TXL TEMP[13], TEMP[13], SAMP[1], 2D 650: FSEQ TEMP[22].x, TEMP[8].zzzz, IMM[3].yyyy 651: AND TEMP[22].x, TEMP[22].xxxx, IMM[2].zzzz 652: MUL TEMP[13], TEMP[13], TEMP[22].xxxx 653: MAD TEMP[13], TEMP[20], TEMP[21].xxxx, TEMP[13] 654: MAD TEMP[13], TEMP[18], TEMP[19].xxxx, TEMP[13] 655: MAD TEMP[13], TEMP[16], TEMP[17].xxxx, TEMP[13] 656: MAD TEMP[13].yw, TEMP[14], TEMP[15].xxxx, TEMP[13] 657: MAD TEMP[13].xy, TEMP[13].wyyy, IMM[2].wwww, IMM[3].zzzz 658: DP2 TEMP[14].x, TEMP[13].xyyy, TEMP[13].xyyy 659: MOV_SAT TEMP[30].x, TEMP[14].xxxx 660: MOV TEMP[14].xy, IN[3].xyxx 661: MOV TEMP[15].x, IMM[2].xxxx 662: FSNE TEMP[16].x, CONST[15].xxxx, TEMP[5].xxxx 663: UIF TEMP[16].xxxx :0 664: MOV TEMP[15].x, IMM[2].yyyy 665: RCP TEMP[16].x, CONST[18].xxxx 666: MUL TEMP[14].xy, IN[3].xyyy, TEMP[16].xxxx 667: ELSE :0 668: RCP TEMP[16].x, CONST[17].xxxx 669: MUL TEMP[14].xy, TEMP[14].xyyy, TEMP[16].xxxx 670: ENDIF 671: FRC TEMP[14].xy, TEMP[14].xyyy 672: MUL TEMP[16].x, CONST[19].xxxx, IMM[2].wwww 673: MUL TEMP[16].x, TEMP[16].xxxx, TEMP[15].xxxx 674: ADD TEMP[16].x, IMM[2].zzzz, -TEMP[16].xxxx 675: MUL TEMP[15].x, TEMP[15].xxxx, CONST[19].xxxx 676: MAD TEMP[14].xy, TEMP[14].xyyy, TEMP[16].xxxx, TEMP[15].xxxx 677: MAD TEMP[5].xy, TEMP[14].xyyy, TEMP[5].xxxx, TEMP[8].xyyy 678: MOV TEMP[14].xy, TEMP[5].xyyy 679: MOV TEMP[14].w, TEMP[9].xxxx 680: TXL TEMP[14], TEMP[14], SAMP[9], 2D 681: FSEQ TEMP[15].x, TEMP[8].zzzz, IMM[1].zzzz 682: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz 683: MOV TEMP[16].xy, TEMP[5].xyyy 684: MOV TEMP[16].w, TEMP[9].xxxx 685: TXL TEMP[16], TEMP[16], SAMP[7], 2D 686: FSEQ TEMP[17].x, TEMP[8].zzzz, IMM[3].xxxx 687: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz 688: MOV TEMP[18].xy, TEMP[5].xyyy 689: MOV TEMP[18].w, TEMP[9].xxxx 690: TXL TEMP[18], TEMP[18], SAMP[5], 2D 691: FSEQ TEMP[19].x, TEMP[8].zzzz, IMM[2].wwww 692: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz 693: MOV TEMP[20].xy, TEMP[5].xyyy 694: MOV TEMP[20].w, TEMP[9].xxxx 695: TXL TEMP[20], TEMP[20], SAMP[3], 2D 696: FSEQ TEMP[21].x, TEMP[8].zzzz, IMM[2].zzzz 697: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz 698: MOV TEMP[5].xy, TEMP[5].xyyy 699: MOV TEMP[5].w, TEMP[9].xxxx 700: TXL TEMP[5], TEMP[5], SAMP[1], 2D 701: FSEQ TEMP[8].x, TEMP[8].zzzz, IMM[3].yyyy 702: AND TEMP[8].x, TEMP[8].xxxx, IMM[2].zzzz 703: MUL TEMP[5], TEMP[5], TEMP[8].xxxx 704: MAD TEMP[5], TEMP[20], TEMP[21].xxxx, TEMP[5] 705: MAD TEMP[5], TEMP[18], TEMP[19].xxxx, TEMP[5] 706: MAD TEMP[5], TEMP[16], TEMP[17].xxxx, TEMP[5] 707: MAD TEMP[5].yw, TEMP[14], TEMP[15].xxxx, TEMP[5] 708: MAD TEMP[5].xy, TEMP[5].wyyy, IMM[2].wwww, IMM[3].zzzz 709: DP2 TEMP[8].x, TEMP[5].xyyy, TEMP[5].xyyy 710: MOV_SAT TEMP[31].x, TEMP[8].xxxx 711: MOV TEMP[8].x, IMM[3].yyyy 712: MOV TEMP[8].y, TEMP[12].xxxx 713: MOV TEMP[8].z, TEMP[12].yyyy 714: MOV TEMP[12].y, IMM[3].yyyy 715: MOV TEMP[12].x, TEMP[13].yyyy 716: MOV TEMP[12].z, TEMP[13].xxxx 717: MOV TEMP[13].z, IMM[3].yyyy 718: MOV TEMP[13].xy, TEMP[5].xyxx 719: MUL TEMP[5].xyz, TEMP[8].xyzz, TEMP[3].xxxx 720: MAD TEMP[5].xyz, TEMP[12].xyzz, TEMP[3].yyyy, TEMP[5].xyzz 721: MAD TEMP[5].xyz, TEMP[13].xyzz, TEMP[3].zzzz, TEMP[5].xyzz 722: MOV TEMP[8].xy, IN[3].zyzz 723: MOV TEMP[12].x, IMM[2].xxxx 724: FSNE TEMP[13].x, CONST[15].xxxx, TEMP[6].xxxx 725: UIF TEMP[13].xxxx :0 726: MOV TEMP[12].x, IMM[2].yyyy 727: RCP TEMP[13].x, CONST[18].xxxx 728: MUL TEMP[8].xy, IN[3].zyyy, TEMP[13].xxxx 729: ELSE :0 730: RCP TEMP[13].x, CONST[17].xxxx 731: MUL TEMP[8].xy, TEMP[8].xyyy, TEMP[13].xxxx 732: ENDIF 733: FRC TEMP[8].xy, TEMP[8].xyyy 734: MUL TEMP[13].x, CONST[19].xxxx, IMM[2].wwww 735: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[12].xxxx 736: ADD TEMP[13].x, IMM[2].zzzz, -TEMP[13].xxxx 737: MUL TEMP[12].x, TEMP[12].xxxx, CONST[19].xxxx 738: MAD TEMP[8].xy, TEMP[8].xyyy, TEMP[13].xxxx, TEMP[12].xxxx 739: MAD TEMP[8].xy, TEMP[8].xyyy, TEMP[6].xxxx, TEMP[10].xyyy 740: MOV TEMP[12].xy, TEMP[8].xyyy 741: MOV TEMP[12].w, TEMP[9].xxxx 742: TXL TEMP[12], TEMP[12], SAMP[9], 2D 743: FSEQ TEMP[13].x, TEMP[10].zzzz, IMM[1].zzzz 744: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz 745: MOV TEMP[14].xy, TEMP[8].xyyy 746: MOV TEMP[14].w, TEMP[9].xxxx 747: TXL TEMP[14], TEMP[14], SAMP[7], 2D 748: FSEQ TEMP[15].x, TEMP[10].zzzz, IMM[3].xxxx 749: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz 750: MOV TEMP[16].xy, TEMP[8].xyyy 751: MOV TEMP[16].w, TEMP[9].xxxx 752: TXL TEMP[16], TEMP[16], SAMP[5], 2D 753: FSEQ TEMP[17].x, TEMP[10].zzzz, IMM[2].wwww 754: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz 755: MOV TEMP[18].xy, TEMP[8].xyyy 756: MOV TEMP[18].w, TEMP[9].xxxx 757: TXL TEMP[18], TEMP[18], SAMP[3], 2D 758: FSEQ TEMP[19].x, TEMP[10].zzzz, IMM[2].zzzz 759: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz 760: MOV TEMP[8].xy, TEMP[8].xyyy 761: MOV TEMP[8].w, TEMP[9].xxxx 762: TXL TEMP[8], TEMP[8], SAMP[1], 2D 763: FSEQ TEMP[20].x, TEMP[10].zzzz, IMM[3].yyyy 764: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz 765: MUL TEMP[8], TEMP[8], TEMP[20].xxxx 766: MAD TEMP[8], TEMP[18], TEMP[19].xxxx, TEMP[8] 767: MAD TEMP[8], TEMP[16], TEMP[17].xxxx, TEMP[8] 768: MAD TEMP[8], TEMP[14], TEMP[15].xxxx, TEMP[8] 769: MAD TEMP[8].yw, TEMP[12], TEMP[13].xxxx, TEMP[8] 770: MAD TEMP[8].xy, TEMP[8].wyyy, IMM[2].wwww, IMM[3].zzzz 771: DP2 TEMP[12].x, TEMP[8].xyyy, TEMP[8].xyyy 772: MOV_SAT TEMP[32].x, TEMP[12].xxxx 773: MOV TEMP[12].xy, IN[3].zxzz 774: MOV TEMP[13].x, IMM[2].xxxx 775: FSNE TEMP[14].x, CONST[15].xxxx, TEMP[6].xxxx 776: UIF TEMP[14].xxxx :0 777: MOV TEMP[13].x, IMM[2].yyyy 778: RCP TEMP[14].x, CONST[18].xxxx 779: MUL TEMP[12].xy, IN[3].zxxx, TEMP[14].xxxx 780: ELSE :0 781: RCP TEMP[14].x, CONST[17].xxxx 782: MUL TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx 783: ENDIF 784: FRC TEMP[12].xy, TEMP[12].xyyy 785: MUL TEMP[14].x, CONST[19].xxxx, IMM[2].wwww 786: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[13].xxxx 787: ADD TEMP[14].x, IMM[2].zzzz, -TEMP[14].xxxx 788: MUL TEMP[13].x, TEMP[13].xxxx, CONST[19].xxxx 789: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx, TEMP[13].xxxx 790: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[6].xxxx, TEMP[10].xyyy 791: MOV TEMP[13].xy, TEMP[12].xyyy 792: MOV TEMP[13].w, TEMP[9].xxxx 793: TXL TEMP[13], TEMP[13], SAMP[9], 2D 794: FSEQ TEMP[14].x, TEMP[10].zzzz, IMM[1].zzzz 795: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz 796: MOV TEMP[15].xy, TEMP[12].xyyy 797: MOV TEMP[15].w, TEMP[9].xxxx 798: TXL TEMP[15], TEMP[15], SAMP[7], 2D 799: FSEQ TEMP[16].x, TEMP[10].zzzz, IMM[3].xxxx 800: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz 801: MOV TEMP[17].xy, TEMP[12].xyyy 802: MOV TEMP[17].w, TEMP[9].xxxx 803: TXL TEMP[17], TEMP[17], SAMP[5], 2D 804: FSEQ TEMP[18].x, TEMP[10].zzzz, IMM[2].wwww 805: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz 806: MOV TEMP[19].xy, TEMP[12].xyyy 807: MOV TEMP[19].w, TEMP[9].xxxx 808: TXL TEMP[19], TEMP[19], SAMP[3], 2D 809: FSEQ TEMP[20].x, TEMP[10].zzzz, IMM[2].zzzz 810: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz 811: MOV TEMP[12].xy, TEMP[12].xyyy 812: MOV TEMP[12].w, TEMP[9].xxxx 813: TXL TEMP[12], TEMP[12], SAMP[1], 2D 814: FSEQ TEMP[21].x, TEMP[10].zzzz, IMM[3].yyyy 815: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz 816: MUL TEMP[12], TEMP[12], TEMP[21].xxxx 817: MAD TEMP[12], TEMP[19], TEMP[20].xxxx, TEMP[12] 818: MAD TEMP[12], TEMP[17], TEMP[18].xxxx, TEMP[12] 819: MAD TEMP[12], TEMP[15], TEMP[16].xxxx, TEMP[12] 820: MAD TEMP[12].yw, TEMP[13], TEMP[14].xxxx, TEMP[12] 821: MAD TEMP[12].xy, TEMP[12].wyyy, IMM[2].wwww, IMM[3].zzzz 822: DP2 TEMP[13].x, TEMP[12].xyyy, TEMP[12].xyyy 823: MOV_SAT TEMP[33].x, TEMP[13].xxxx 824: MOV TEMP[13].xy, IN[3].xyxx 825: MOV TEMP[14].x, IMM[2].xxxx 826: FSNE TEMP[15].x, CONST[15].xxxx, TEMP[6].xxxx 827: UIF TEMP[15].xxxx :0 828: MOV TEMP[14].x, IMM[2].yyyy 829: RCP TEMP[15].x, CONST[18].xxxx 830: MUL TEMP[13].xy, IN[3].xyyy, TEMP[15].xxxx 831: ELSE :0 832: RCP TEMP[15].x, CONST[17].xxxx 833: MUL TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx 834: ENDIF 835: FRC TEMP[13].xy, TEMP[13].xyyy 836: MUL TEMP[15].x, CONST[19].xxxx, IMM[2].wwww 837: MUL TEMP[15].x, TEMP[15].xxxx, TEMP[14].xxxx 838: ADD TEMP[15].x, IMM[2].zzzz, -TEMP[15].xxxx 839: MUL TEMP[14].x, TEMP[14].xxxx, CONST[19].xxxx 840: MAD TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx, TEMP[14].xxxx 841: MAD TEMP[6].xy, TEMP[13].xyyy, TEMP[6].xxxx, TEMP[10].xyyy 842: MOV TEMP[13].xy, TEMP[6].xyyy 843: MOV TEMP[13].w, TEMP[9].xxxx 844: TXL TEMP[13], TEMP[13], SAMP[9], 2D 845: FSEQ TEMP[14].x, TEMP[10].zzzz, IMM[1].zzzz 846: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz 847: MOV TEMP[15].xy, TEMP[6].xyyy 848: MOV TEMP[15].w, TEMP[9].xxxx 849: TXL TEMP[15], TEMP[15], SAMP[7], 2D 850: FSEQ TEMP[16].x, TEMP[10].zzzz, IMM[3].xxxx 851: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz 852: MOV TEMP[17].xy, TEMP[6].xyyy 853: MOV TEMP[17].w, TEMP[9].xxxx 854: TXL TEMP[17], TEMP[17], SAMP[5], 2D 855: FSEQ TEMP[18].x, TEMP[10].zzzz, IMM[2].wwww 856: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz 857: MOV TEMP[19].xy, TEMP[6].xyyy 858: MOV TEMP[19].w, TEMP[9].xxxx 859: TXL TEMP[19], TEMP[19], SAMP[3], 2D 860: FSEQ TEMP[20].x, TEMP[10].zzzz, IMM[2].zzzz 861: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz 862: MOV TEMP[6].xy, TEMP[6].xyyy 863: MOV TEMP[6].w, TEMP[9].xxxx 864: TXL TEMP[6], TEMP[6], SAMP[1], 2D 865: FSEQ TEMP[10].x, TEMP[10].zzzz, IMM[3].yyyy 866: AND TEMP[10].x, TEMP[10].xxxx, IMM[2].zzzz 867: MUL TEMP[6], TEMP[6], TEMP[10].xxxx 868: MAD TEMP[6], TEMP[19], TEMP[20].xxxx, TEMP[6] 869: MAD TEMP[6], TEMP[17], TEMP[18].xxxx, TEMP[6] 870: MAD TEMP[6], TEMP[15], TEMP[16].xxxx, TEMP[6] 871: MAD TEMP[6].yw, TEMP[13], TEMP[14].xxxx, TEMP[6] 872: MAD TEMP[6].xy, TEMP[6].wyyy, IMM[2].wwww, IMM[3].zzzz 873: DP2 TEMP[10].x, TEMP[6].xyyy, TEMP[6].xyyy 874: MOV_SAT TEMP[34].x, TEMP[10].xxxx 875: MOV TEMP[10].x, IMM[3].yyyy 876: MOV TEMP[10].y, TEMP[8].xxxx 877: MOV TEMP[10].z, TEMP[8].yyyy 878: MOV TEMP[8].y, IMM[3].yyyy 879: MOV TEMP[8].x, TEMP[12].yyyy 880: MOV TEMP[8].z, TEMP[12].xxxx 881: MOV TEMP[12].z, IMM[3].yyyy 882: MOV TEMP[12].xy, TEMP[6].xyxx 883: MUL TEMP[6].xyz, TEMP[10].xyzz, TEMP[3].xxxx 884: MAD TEMP[6].xyz, TEMP[8].xyzz, TEMP[3].yyyy, TEMP[6].xyzz 885: MAD TEMP[6].xyz, TEMP[12].xyzz, TEMP[3].zzzz, TEMP[6].xyzz 886: MOV TEMP[8].xy, IN[3].zyzz 887: MOV TEMP[10].x, IMM[2].xxxx 888: FSNE TEMP[12].x, CONST[15].xxxx, TEMP[7].xxxx 889: UIF TEMP[12].xxxx :0 890: MOV TEMP[10].x, IMM[2].yyyy 891: RCP TEMP[12].x, CONST[18].xxxx 892: MUL TEMP[8].xy, IN[3].zyyy, TEMP[12].xxxx 893: ELSE :0 894: RCP TEMP[12].x, CONST[17].xxxx 895: MUL TEMP[8].xy, TEMP[8].xyyy, TEMP[12].xxxx 896: ENDIF 897: FRC TEMP[8].xy, TEMP[8].xyyy 898: MUL TEMP[12].x, CONST[19].xxxx, IMM[2].wwww 899: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[10].xxxx 900: ADD TEMP[12].x, IMM[2].zzzz, -TEMP[12].xxxx 901: MUL TEMP[10].x, TEMP[10].xxxx, CONST[19].xxxx 902: MAD TEMP[8].xy, TEMP[8].xyyy, TEMP[12].xxxx, TEMP[10].xxxx 903: MAD TEMP[8].xy, TEMP[8].xyyy, TEMP[7].xxxx, TEMP[4].xyyy 904: MOV TEMP[10].xy, TEMP[8].xyyy 905: MOV TEMP[10].w, TEMP[9].xxxx 906: TXL TEMP[10], TEMP[10], SAMP[9], 2D 907: FSEQ TEMP[12].x, TEMP[4].zzzz, IMM[1].zzzz 908: AND TEMP[12].x, TEMP[12].xxxx, IMM[2].zzzz 909: MOV TEMP[13].xy, TEMP[8].xyyy 910: MOV TEMP[13].w, TEMP[9].xxxx 911: TXL TEMP[13], TEMP[13], SAMP[7], 2D 912: FSEQ TEMP[14].x, TEMP[4].zzzz, IMM[3].xxxx 913: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz 914: MOV TEMP[15].xy, TEMP[8].xyyy 915: MOV TEMP[15].w, TEMP[9].xxxx 916: TXL TEMP[15], TEMP[15], SAMP[5], 2D 917: FSEQ TEMP[16].x, TEMP[4].zzzz, IMM[2].wwww 918: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz 919: MOV TEMP[17].xy, TEMP[8].xyyy 920: MOV TEMP[17].w, TEMP[9].xxxx 921: TXL TEMP[17], TEMP[17], SAMP[3], 2D 922: FSEQ TEMP[18].x, TEMP[4].zzzz, IMM[2].zzzz 923: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz 924: MOV TEMP[8].xy, TEMP[8].xyyy 925: MOV TEMP[8].w, TEMP[9].xxxx 926: TXL TEMP[8], TEMP[8], SAMP[1], 2D 927: FSEQ TEMP[19].x, TEMP[4].zzzz, IMM[3].yyyy 928: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz 929: MUL TEMP[8], TEMP[8], TEMP[19].xxxx 930: MAD TEMP[8], TEMP[17], TEMP[18].xxxx, TEMP[8] 931: MAD TEMP[8], TEMP[15], TEMP[16].xxxx, TEMP[8] 932: MAD TEMP[8], TEMP[13], TEMP[14].xxxx, TEMP[8] 933: MAD TEMP[8].yw, TEMP[10], TEMP[12].xxxx, TEMP[8] 934: MAD TEMP[8].xy, TEMP[8].wyyy, IMM[2].wwww, IMM[3].zzzz 935: DP2 TEMP[10].x, TEMP[8].xyyy, TEMP[8].xyyy 936: MOV_SAT TEMP[35].x, TEMP[10].xxxx 937: MOV TEMP[10].xy, IN[3].zxzz 938: MOV TEMP[12].x, IMM[2].xxxx 939: FSNE TEMP[13].x, CONST[15].xxxx, TEMP[7].xxxx 940: UIF TEMP[13].xxxx :0 941: MOV TEMP[12].x, IMM[2].yyyy 942: RCP TEMP[13].x, CONST[18].xxxx 943: MUL TEMP[10].xy, IN[3].zxxx, TEMP[13].xxxx 944: ELSE :0 945: RCP TEMP[13].x, CONST[17].xxxx 946: MUL TEMP[10].xy, TEMP[10].xyyy, TEMP[13].xxxx 947: ENDIF 948: FRC TEMP[10].xy, TEMP[10].xyyy 949: MUL TEMP[13].x, CONST[19].xxxx, IMM[2].wwww 950: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[12].xxxx 951: ADD TEMP[13].x, IMM[2].zzzz, -TEMP[13].xxxx 952: MUL TEMP[12].x, TEMP[12].xxxx, CONST[19].xxxx 953: MAD TEMP[10].xy, TEMP[10].xyyy, TEMP[13].xxxx, TEMP[12].xxxx 954: MAD TEMP[10].xy, TEMP[10].xyyy, TEMP[7].xxxx, TEMP[4].xyyy 955: MOV TEMP[12].xy, TEMP[10].xyyy 956: MOV TEMP[12].w, TEMP[9].xxxx 957: TXL TEMP[12], TEMP[12], SAMP[9], 2D 958: FSEQ TEMP[13].x, TEMP[4].zzzz, IMM[1].zzzz 959: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz 960: MOV TEMP[14].xy, TEMP[10].xyyy 961: MOV TEMP[14].w, TEMP[9].xxxx 962: TXL TEMP[14], TEMP[14], SAMP[7], 2D 963: FSEQ TEMP[15].x, TEMP[4].zzzz, IMM[3].xxxx 964: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz 965: MOV TEMP[16].xy, TEMP[10].xyyy 966: MOV TEMP[16].w, TEMP[9].xxxx 967: TXL TEMP[16], TEMP[16], SAMP[5], 2D 968: FSEQ TEMP[17].x, TEMP[4].zzzz, IMM[2].wwww 969: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz 970: MOV TEMP[18].xy, TEMP[10].xyyy 971: MOV TEMP[18].w, TEMP[9].xxxx 972: TXL TEMP[18], TEMP[18], SAMP[3], 2D 973: FSEQ TEMP[19].x, TEMP[4].zzzz, IMM[2].zzzz 974: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz 975: MOV TEMP[10].xy, TEMP[10].xyyy 976: MOV TEMP[10].w, TEMP[9].xxxx 977: TXL TEMP[10], TEMP[10], SAMP[1], 2D 978: FSEQ TEMP[20].x, TEMP[4].zzzz, IMM[3].yyyy 979: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz 980: MUL TEMP[10], TEMP[10], TEMP[20].xxxx 981: MAD TEMP[10], TEMP[18], TEMP[19].xxxx, TEMP[10] 982: MAD TEMP[10], TEMP[16], TEMP[17].xxxx, TEMP[10] 983: MAD TEMP[10], TEMP[14], TEMP[15].xxxx, TEMP[10] 984: MAD TEMP[10].yw, TEMP[12], TEMP[13].xxxx, TEMP[10] 985: MAD TEMP[10].xy, TEMP[10].wyyy, IMM[2].wwww, IMM[3].zzzz 986: DP2 TEMP[12].x, TEMP[10].xyyy, TEMP[10].xyyy 987: MOV_SAT TEMP[36].x, TEMP[12].xxxx 988: MOV TEMP[12].xy, IN[3].xyxx 989: MOV TEMP[13].x, IMM[2].xxxx 990: FSNE TEMP[14].x, CONST[15].xxxx, TEMP[7].xxxx 991: UIF TEMP[14].xxxx :0 992: MOV TEMP[13].x, IMM[2].yyyy 993: RCP TEMP[14].x, CONST[18].xxxx 994: MUL TEMP[12].xy, IN[3].xyyy, TEMP[14].xxxx 995: ELSE :0 996: RCP TEMP[14].x, CONST[17].xxxx 997: MUL TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx 998: ENDIF 999: FRC TEMP[12].xy, TEMP[12].xyyy 1000: MUL TEMP[14].x, CONST[19].xxxx, IMM[2].wwww 1001: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[13].xxxx 1002: ADD TEMP[14].x, IMM[2].zzzz, -TEMP[14].xxxx 1003: MUL TEMP[13].x, TEMP[13].xxxx, CONST[19].xxxx 1004: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx, TEMP[13].xxxx 1005: MAD TEMP[7].xy, TEMP[12].xyyy, TEMP[7].xxxx, TEMP[4].xyyy 1006: MOV TEMP[12].xy, TEMP[7].xyyy 1007: MOV TEMP[12].w, TEMP[9].xxxx 1008: TXL TEMP[12], TEMP[12], SAMP[9], 2D 1009: FSEQ TEMP[13].x, TEMP[4].zzzz, IMM[1].zzzz 1010: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz 1011: MOV TEMP[14].xy, TEMP[7].xyyy 1012: MOV TEMP[14].w, TEMP[9].xxxx 1013: TXL TEMP[14], TEMP[14], SAMP[7], 2D 1014: FSEQ TEMP[15].x, TEMP[4].zzzz, IMM[3].xxxx 1015: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz 1016: MOV TEMP[16].xy, TEMP[7].xyyy 1017: MOV TEMP[16].w, TEMP[9].xxxx 1018: TXL TEMP[16], TEMP[16], SAMP[5], 2D 1019: FSEQ TEMP[17].x, TEMP[4].zzzz, IMM[2].wwww 1020: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz 1021: MOV TEMP[18].xy, TEMP[7].xyyy 1022: MOV TEMP[18].w, TEMP[9].xxxx 1023: TXL TEMP[18], TEMP[18], SAMP[3], 2D 1024: FSEQ TEMP[19].x, TEMP[4].zzzz, IMM[2].zzzz 1025: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz 1026: MOV TEMP[7].xy, TEMP[7].xyyy 1027: MOV TEMP[7].w, TEMP[9].xxxx 1028: TXL TEMP[7], TEMP[7], SAMP[1], 2D 1029: FSEQ TEMP[4].x, TEMP[4].zzzz, IMM[3].yyyy 1030: AND TEMP[4].x, TEMP[4].xxxx, IMM[2].zzzz 1031: MUL TEMP[4], TEMP[7], TEMP[4].xxxx 1032: MAD TEMP[4], TEMP[18], TEMP[19].xxxx, TEMP[4] 1033: MAD TEMP[4], TEMP[16], TEMP[17].xxxx, TEMP[4] 1034: MAD TEMP[4], TEMP[14], TEMP[15].xxxx, TEMP[4] 1035: MAD TEMP[4].yw, TEMP[12], TEMP[13].xxxx, TEMP[4] 1036: MAD TEMP[4].xy, TEMP[4].wyyy, IMM[2].wwww, IMM[3].zzzz 1037: DP2 TEMP[7].x, TEMP[4].xyyy, TEMP[4].xyyy 1038: MOV_SAT TEMP[37].x, TEMP[7].xxxx 1039: MOV TEMP[7].x, IMM[3].yyyy 1040: MOV TEMP[7].y, TEMP[8].xxxx 1041: MOV TEMP[7].z, TEMP[8].yyyy 1042: MOV TEMP[8].y, IMM[3].yyyy 1043: MOV TEMP[8].x, TEMP[10].yyyy 1044: MOV TEMP[8].z, TEMP[10].xxxx 1045: MOV TEMP[9].z, IMM[3].yyyy 1046: MOV TEMP[9].xy, TEMP[4].xyxx 1047: MOV TEMP[4].w, IMM[2].zzzz 1048: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[3].xxxx 1049: MAD TEMP[7].xyz, TEMP[8].xyzz, TEMP[3].yyyy, TEMP[7].xyzz 1050: MAD TEMP[3].xyz, TEMP[9].xyzz, TEMP[3].zzzz, TEMP[7].xyzz 1051: MUL TEMP[5].xyz, IN[1].xxxx, TEMP[5].xyzz 1052: MAD TEMP[5].xyz, IN[1].yyyy, TEMP[6].xyzz, TEMP[5].xyzz 1053: MAD TEMP[4].xyz, IN[1].zzzz, TEMP[3].xyzz, TEMP[5].xyzz 1054: DP4 TEMP[3].x, TEMP[4], TEMP[4] 1055: RSQ TEMP[3].x, TEMP[3].xxxx 1056: MUL TEMP[3].xyz, TEMP[4], TEMP[3].xxxx 1057: MUL TEMP[3].xyz, TEMP[3].xyzz, IN[0].wwww 1058: ADD TEMP[3].xyz, IN[2].yzww, -TEMP[3].xyzz 1059: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz 1060: RSQ TEMP[4].x, TEMP[4].xxxx 1061: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx 1062: MOV TEMP[4].w, IMM[3].yyyy 1063: MUL TEMP[4].xyz, TEMP[11].xyzz, TEMP[0].xyzz 1064: ADD TEMP[0].xyz, TEMP[1].xyzz, TEMP[2].xyzz 1065: DP3 TEMP[2].x, TEMP[0].xyzz, TEMP[0].xyzz 1066: RSQ TEMP[2].x, TEMP[2].xxxx 1067: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[2].xxxx 1068: DP3 TEMP[0].x, TEMP[3].xyzz, TEMP[0].xyzz 1069: MAX TEMP[0].x, IMM[3].wwww, TEMP[0].xxxx 1070: MUL TEMP[2].x, IMM[4].xxxx, IN[1].wwww 1071: POW TEMP[0].x, TEMP[0].xxxx, TEMP[2].xxxx 1072: MOV_SAT TEMP[0].x, TEMP[0].xxxx 1073: MOV TEMP[2].w, IMM[3].yyyy 1074: MOV TEMP[2].xyz, CONST[22].xyzx 1075: MOV TEMP[5].w, IMM[2].zzzz 1076: MUL TEMP[6].x, IMM[2].wwww, TEMP[0].xxxx 1077: ADD TEMP[6].x, IMM[3].xxxx, -TEMP[6].xxxx 1078: MUL TEMP[6].x, TEMP[0].xxxx, TEMP[6].xxxx 1079: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[6].xxxx 1080: MUL TEMP[0].x, TEMP[0].xxxx, IN[1].wwww 1081: MUL TEMP[6].xyz, TEMP[11].xyzz, CONST[4].xyzz 1082: DP3 TEMP[1].x, TEMP[3].xyzz, TEMP[1].xyzz 1083: MOV_SAT TEMP[1].x, TEMP[1].xxxx 1084: MUL TEMP[3], CONST[23], IMM[2].wwww 1085: MAX TEMP[2], TEMP[3], TEMP[2] 1086: MIN TEMP[2].xyz, TEMP[2], IMM[4].yyyz 1087: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[11].xyzz 1088: MAD TEMP[1].xyz, TEMP[6].xyzz, TEMP[1].xxxx, TEMP[2].xyzz 1089: MAD TEMP[0].xyz, CONST[4].xyzz, TEMP[0].xxxx, TEMP[1].xyzz 1090: MUL TEMP[5].xyz, TEMP[0].xyzz, IMM[0].wwww 1091: ADD TEMP[0].xyz, TEMP[4], TEMP[5] 1092: MAD TEMP[1].x, IN[2].xxxx, CONST[3].zzzz, CONST[3].wwww 1093: MOV_SAT TEMP[1].x, TEMP[1].xxxx 1094: LRP TEMP[4].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[2].xyzz 1095: MOV TEMP[4].w, IMM[2].zzzz 1096: MOV OUT[0], TEMP[4] 1097: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 272) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 288) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 304) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 320) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 336) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 352) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 356) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 360) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 368) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 372) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 376) %51 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %52 = load <8 x i32>, <8 x i32> addrspace(2)* %51, align 32, !tbaa !0 %53 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %54 = load <4 x i32>, <4 x i32> addrspace(2)* %53, align 16, !tbaa !0 %55 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %56 = load <8 x i32>, <8 x i32> addrspace(2)* %55, align 32, !tbaa !0 %57 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %58 = load <4 x i32>, <4 x i32> addrspace(2)* %57, align 16, !tbaa !0 %59 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %60 = load <8 x i32>, <8 x i32> addrspace(2)* %59, align 32, !tbaa !0 %61 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %62 = load <4 x i32>, <4 x i32> addrspace(2)* %61, align 16, !tbaa !0 %63 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %64 = load <8 x i32>, <8 x i32> addrspace(2)* %63, align 32, !tbaa !0 %65 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %66 = load <4 x i32>, <4 x i32> addrspace(2)* %65, align 16, !tbaa !0 %67 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %68 = load <8 x i32>, <8 x i32> addrspace(2)* %67, align 32, !tbaa !0 %69 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %70 = load <4 x i32>, <4 x i32> addrspace(2)* %69, align 16, !tbaa !0 %71 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5 %72 = load <8 x i32>, <8 x i32> addrspace(2)* %71, align 32, !tbaa !0 %73 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5 %74 = load <4 x i32>, <4 x i32> addrspace(2)* %73, align 16, !tbaa !0 %75 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 6 %76 = load <8 x i32>, <8 x i32> addrspace(2)* %75, align 32, !tbaa !0 %77 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 6 %78 = load <4 x i32>, <4 x i32> addrspace(2)* %77, align 16, !tbaa !0 %79 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 7 %80 = load <8 x i32>, <8 x i32> addrspace(2)* %79, align 32, !tbaa !0 %81 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 7 %82 = load <4 x i32>, <4 x i32> addrspace(2)* %81, align 16, !tbaa !0 %83 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 8 %84 = load <8 x i32>, <8 x i32> addrspace(2)* %83, align 32, !tbaa !0 %85 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 8 %86 = load <4 x i32>, <4 x i32> addrspace(2)* %85, align 16, !tbaa !0 %87 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 9 %88 = load <8 x i32>, <8 x i32> addrspace(2)* %87, align 32, !tbaa !0 %89 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 9 %90 = load <4 x i32>, <4 x i32> addrspace(2)* %89, align 16, !tbaa !0 %91 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %92 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %93 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %94 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %95 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %96 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %97 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %98 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %99 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %100 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %101 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %102 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %103 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %104 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %105 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %106 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7) %107 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %108 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %109 = fmul float %27, %27 %110 = fmul float %28, %28 %111 = fadd float %110, %109 %112 = fmul float %29, %29 %113 = fadd float %111, %112 %114 = call float @llvm.AMDGPU.rsq.clamped.f32(float %113) %115 = fmul float %27, %114 %116 = fmul float %28, %114 %117 = fmul float %29, %114 %118 = fsub float %24, %103 %119 = fsub float %25, %104 %120 = fsub float %26, %105 %121 = fmul float %118, %118 %122 = fmul float %119, %119 %123 = fadd float %122, %121 %124 = fmul float %120, %120 %125 = fadd float %123, %124 %126 = call float @llvm.AMDGPU.rsq.clamped.f32(float %125) %127 = fmul float %118, %126 %128 = fmul float %119, %126 %129 = fmul float %120, %126 %130 = call float @llvm.fabs.f32(float %100) %131 = call float @llvm.fabs.f32(float %101) %132 = call float @llvm.fabs.f32(float %102) %133 = fmul float %130, %130 %134 = fmul float %131, %131 %135 = fadd float %134, %133 %136 = fmul float %132, %132 %137 = fadd float %135, %136 %138 = call float @llvm.AMDGPU.rsq.clamped.f32(float %137) %139 = fmul float %130, %138 %140 = fadd float %139, 0xBFC99999A0000000 %141 = fmul float %131, %138 %142 = fadd float %141, 0xBFC99999A0000000 %143 = fmul float %132, %138 %144 = fadd float %143, 0xBFC99999A0000000 %145 = fmul float %140, 7.000000e+00 %146 = fmul float %142, 7.000000e+00 %147 = fmul float %144, 7.000000e+00 %148 = call float @llvm.maxnum.f32(float %145, float 0x3F847AE140000000) %149 = call float @llvm.maxnum.f32(float %146, float 0x3F847AE140000000) %150 = call float @llvm.maxnum.f32(float %147, float 0x3F847AE140000000) %151 = fadd float %148, %149 %152 = fadd float %151, %150 %153 = fdiv float 1.000000e+00, %152 %154 = fmul float %148, %153 %155 = fmul float %149, %153 %156 = fmul float %150, %153 %157 = fadd float %91, 5.000000e-01 %158 = fadd float %92, 5.000000e-01 %159 = fadd float %93, 5.000000e-01 %160 = call float @llvm.floor.f32(float %157) %161 = call float @llvm.floor.f32(float %158) %162 = call float @llvm.floor.f32(float %159) %163 = fmul float %160, %38 %164 = call float @llvm.floor.f32(float %163) %165 = fmul float %164, %38 %166 = fcmp ult float %160, 6.400000e+01 br i1 %166, label %ELSE, label %IF IF: ; preds = %main_body %167 = fadd float %160, -6.400000e+01 %168 = fmul float %167, %39 %169 = call float @llvm.floor.f32(float %168) %170 = fmul float %169, %39 %171 = call float @llvm.floor.f32(float %168) %172 = fsub float %168, %171 %173 = call float @llvm.floor.f32(float %170) %174 = fsub float %170, %173 %175 = call float @llvm.floor.f32(float %170) %176 = fadd float %175, 4.000000e+00 br label %ENDIF ELSE: ; preds = %main_body %177 = call float @llvm.floor.f32(float %163) %178 = fsub float %163, %177 %179 = call float @llvm.floor.f32(float %165) %180 = fsub float %165, %179 %181 = call float @llvm.floor.f32(float %165) br label %ENDIF ENDIF: ; preds = %ELSE, %IF %temp32.0 = phi float [ %172, %IF ], [ %178, %ELSE ] %temp33.0 = phi float [ %174, %IF ], [ %180, %ELSE ] %temp34.0 = phi float [ %176, %IF ], [ %181, %ELSE ] %temp20.0 = phi float [ %39, %IF ], [ %38, %ELSE ] %182 = fmul float %161, %38 %183 = call float @llvm.floor.f32(float %182) %184 = fmul float %183, %38 %185 = fcmp ult float %161, 6.400000e+01 br i1 %185, label %ELSE154, label %IF153 IF153: ; preds = %ENDIF %186 = fadd float %161, -6.400000e+01 %187 = fmul float %186, %39 %188 = call float @llvm.floor.f32(float %187) %189 = fmul float %188, %39 %190 = call float @llvm.floor.f32(float %187) %191 = fsub float %187, %190 %192 = call float @llvm.floor.f32(float %189) %193 = fsub float %189, %192 %194 = call float @llvm.floor.f32(float %189) %195 = fadd float %194, 4.000000e+00 br label %ENDIF152 ELSE154: ; preds = %ENDIF %196 = call float @llvm.floor.f32(float %182) %197 = fsub float %182, %196 %198 = call float @llvm.floor.f32(float %184) %199 = fsub float %184, %198 %200 = call float @llvm.floor.f32(float %184) br label %ENDIF152 ENDIF152: ; preds = %ELSE154, %IF153 %temp40.0 = phi float [ %191, %IF153 ], [ %197, %ELSE154 ] %temp41.0 = phi float [ %193, %IF153 ], [ %199, %ELSE154 ] %temp42.0 = phi float [ %195, %IF153 ], [ %200, %ELSE154 ] %temp24.0 = phi float [ %39, %IF153 ], [ %38, %ELSE154 ] %201 = fmul float %162, %38 %202 = call float @llvm.floor.f32(float %201) %203 = fmul float %202, %38 %204 = fcmp ult float %162, 6.400000e+01 br i1 %204, label %ELSE157, label %IF156 IF156: ; preds = %ENDIF152 %205 = fadd float %162, -6.400000e+01 %206 = fmul float %205, %39 %207 = call float @llvm.floor.f32(float %206) %208 = fmul float %207, %39 %209 = call float @llvm.floor.f32(float %206) %210 = fsub float %206, %209 %211 = call float @llvm.floor.f32(float %208) %212 = fsub float %208, %211 %213 = call float @llvm.floor.f32(float %208) %214 = fadd float %213, 4.000000e+00 br label %ENDIF155 ELSE157: ; preds = %ENDIF152 %215 = call float @llvm.floor.f32(float %201) %216 = fsub float %201, %215 %217 = call float @llvm.floor.f32(float %203) %218 = fsub float %203, %217 %219 = call float @llvm.floor.f32(float %203) br label %ENDIF155 ENDIF155: ; preds = %ELSE157, %IF156 %temp28.0 = phi float [ %39, %IF156 ], [ %38, %ELSE157 ] %temp18.0 = phi float [ %214, %IF156 ], [ %219, %ELSE157 ] %temp17.0 = phi float [ %212, %IF156 ], [ %218, %ELSE157 ] %temp16.0 = phi float [ %210, %IF156 ], [ %216, %ELSE157 ] %220 = fsub float %103, %24 %221 = fsub float %104, %25 %222 = fsub float %105, %26 %223 = fmul float %220, %220 %224 = fmul float %221, %221 %225 = fadd float %224, %223 %226 = fmul float %222, %222 %227 = fadd float %225, %226 %228 = fmul float %44, %227 %229 = call float @llvm.log2.f32(float %228) %230 = fmul float %229, 0x3FE62E4300000000 %231 = fmul float %230, %43 %232 = fcmp une float %38, %temp20.0 %.sink212 = select i1 %232, float %41, float %40 %temp48.0 = select i1 %232, float 1.953125e-03, float 3.906250e-03 %233 = fdiv float 1.000000e+00, %.sink212 %234 = fmul float %103, %233 %235 = fmul float %104, %233 %236 = call float @llvm.floor.f32(float %234) %237 = fsub float %234, %236 %238 = call float @llvm.floor.f32(float %235) %239 = fsub float %235, %238 %240 = fmul float %42, 2.000000e+00 %241 = fmul float %240, %temp48.0 %242 = fsub float 1.000000e+00, %241 %243 = fmul float %temp48.0, %42 %244 = fmul float %237, %242 %245 = fadd float %244, %243 %246 = fmul float %239, %242 %247 = fadd float %246, %243 %248 = fmul float %245, %temp20.0 %249 = fadd float %248, %temp32.0 %250 = fmul float %247, %temp20.0 %251 = fadd float %250, %temp33.0 %252 = bitcast float %249 to i32 %253 = bitcast float %251 to i32 %254 = bitcast float %231 to i32 %255 = insertelement <4 x i32> undef, i32 %252, i32 0 %256 = insertelement <4 x i32> %255, i32 %253, i32 1 %257 = insertelement <4 x i32> %256, i32 %254, i32 2 %258 = bitcast <8 x i32> %84 to <32 x i8> %259 = bitcast <4 x i32> %86 to <16 x i8> %260 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %257, <32 x i8> %258, <16 x i8> %259, i32 2) %261 = extractelement <4 x float> %260, i32 0 %262 = extractelement <4 x float> %260, i32 1 %263 = extractelement <4 x float> %260, i32 2 %264 = fcmp oeq float %temp34.0, 4.000000e+00 %265 = select i1 %264, float 1.000000e+00, float 0.000000e+00 %266 = bitcast float %249 to i32 %267 = bitcast float %251 to i32 %268 = bitcast float %231 to i32 %269 = insertelement <4 x i32> undef, i32 %266, i32 0 %270 = insertelement <4 x i32> %269, i32 %267, i32 1 %271 = insertelement <4 x i32> %270, i32 %268, i32 2 %272 = bitcast <8 x i32> %76 to <32 x i8> %273 = bitcast <4 x i32> %78 to <16 x i8> %274 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %271, <32 x i8> %272, <16 x i8> %273, i32 2) %275 = extractelement <4 x float> %274, i32 0 %276 = extractelement <4 x float> %274, i32 1 %277 = extractelement <4 x float> %274, i32 2 %278 = fcmp oeq float %temp34.0, 3.000000e+00 %279 = select i1 %278, float 1.000000e+00, float 0.000000e+00 %280 = bitcast float %249 to i32 %281 = bitcast float %251 to i32 %282 = bitcast float %231 to i32 %283 = insertelement <4 x i32> undef, i32 %280, i32 0 %284 = insertelement <4 x i32> %283, i32 %281, i32 1 %285 = insertelement <4 x i32> %284, i32 %282, i32 2 %286 = bitcast <8 x i32> %68 to <32 x i8> %287 = bitcast <4 x i32> %70 to <16 x i8> %288 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %285, <32 x i8> %286, <16 x i8> %287, i32 2) %289 = extractelement <4 x float> %288, i32 0 %290 = extractelement <4 x float> %288, i32 1 %291 = extractelement <4 x float> %288, i32 2 %292 = fcmp oeq float %temp34.0, 2.000000e+00 %293 = select i1 %292, float 1.000000e+00, float 0.000000e+00 %294 = bitcast float %249 to i32 %295 = bitcast float %251 to i32 %296 = bitcast float %231 to i32 %297 = insertelement <4 x i32> undef, i32 %294, i32 0 %298 = insertelement <4 x i32> %297, i32 %295, i32 1 %299 = insertelement <4 x i32> %298, i32 %296, i32 2 %300 = bitcast <8 x i32> %60 to <32 x i8> %301 = bitcast <4 x i32> %62 to <16 x i8> %302 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %299, <32 x i8> %300, <16 x i8> %301, i32 2) %303 = extractelement <4 x float> %302, i32 0 %304 = extractelement <4 x float> %302, i32 1 %305 = extractelement <4 x float> %302, i32 2 %306 = fcmp oeq float %temp34.0, 1.000000e+00 %307 = select i1 %306, float 1.000000e+00, float 0.000000e+00 %308 = bitcast float %249 to i32 %309 = bitcast float %251 to i32 %310 = bitcast float %231 to i32 %311 = insertelement <4 x i32> undef, i32 %308, i32 0 %312 = insertelement <4 x i32> %311, i32 %309, i32 1 %313 = insertelement <4 x i32> %312, i32 %310, i32 2 %314 = bitcast <8 x i32> %52 to <32 x i8> %315 = bitcast <4 x i32> %54 to <16 x i8> %316 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %313, <32 x i8> %314, <16 x i8> %315, i32 2) %317 = extractelement <4 x float> %316, i32 0 %318 = extractelement <4 x float> %316, i32 1 %319 = extractelement <4 x float> %316, i32 2 %320 = fcmp oeq float %temp34.0, 0.000000e+00 %321 = select i1 %320, float 1.000000e+00, float 0.000000e+00 %322 = fmul float %317, %321 %323 = fmul float %318, %321 %324 = fmul float %319, %321 %325 = fmul float %303, %307 %326 = fadd float %325, %322 %327 = fmul float %304, %307 %328 = fadd float %327, %323 %329 = fmul float %305, %307 %330 = fadd float %329, %324 %331 = fmul float %289, %293 %332 = fadd float %331, %326 %333 = fmul float %290, %293 %334 = fadd float %333, %328 %335 = fmul float %291, %293 %336 = fadd float %335, %330 %337 = fmul float %275, %279 %338 = fadd float %337, %332 %339 = fmul float %276, %279 %340 = fadd float %339, %334 %341 = fmul float %277, %279 %342 = fadd float %341, %336 %343 = fmul float %261, %265 %344 = fadd float %343, %338 %345 = fmul float %262, %265 %346 = fadd float %345, %340 %347 = fmul float %263, %265 %348 = fadd float %347, %342 %349 = fcmp une float %38, %temp20.0 %.sink213 = select i1 %349, float %41, float %40 %temp52.0 = select i1 %349, float 1.953125e-03, float 3.906250e-03 %350 = fdiv float 1.000000e+00, %.sink213 %351 = fmul float %105, %350 %352 = fmul float %104, %350 %353 = call float @llvm.floor.f32(float %351) %354 = fsub float %351, %353 %355 = call float @llvm.floor.f32(float %352) %356 = fsub float %352, %355 %357 = fmul float %42, 2.000000e+00 %358 = fmul float %357, %temp52.0 %359 = fsub float 1.000000e+00, %358 %360 = fmul float %temp52.0, %42 %361 = fmul float %354, %359 %362 = fadd float %361, %360 %363 = fmul float %356, %359 %364 = fadd float %363, %360 %365 = fmul float %362, %temp20.0 %366 = fadd float %365, %temp32.0 %367 = fmul float %364, %temp20.0 %368 = fadd float %367, %temp33.0 %369 = bitcast float %366 to i32 %370 = bitcast float %368 to i32 %371 = bitcast float %231 to i32 %372 = insertelement <4 x i32> undef, i32 %369, i32 0 %373 = insertelement <4 x i32> %372, i32 %370, i32 1 %374 = insertelement <4 x i32> %373, i32 %371, i32 2 %375 = bitcast <8 x i32> %84 to <32 x i8> %376 = bitcast <4 x i32> %86 to <16 x i8> %377 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %374, <32 x i8> %375, <16 x i8> %376, i32 2) %378 = extractelement <4 x float> %377, i32 0 %379 = extractelement <4 x float> %377, i32 1 %380 = extractelement <4 x float> %377, i32 2 %381 = fcmp oeq float %temp34.0, 4.000000e+00 %382 = select i1 %381, float 1.000000e+00, float 0.000000e+00 %383 = bitcast float %366 to i32 %384 = bitcast float %368 to i32 %385 = bitcast float %231 to i32 %386 = insertelement <4 x i32> undef, i32 %383, i32 0 %387 = insertelement <4 x i32> %386, i32 %384, i32 1 %388 = insertelement <4 x i32> %387, i32 %385, i32 2 %389 = bitcast <8 x i32> %76 to <32 x i8> %390 = bitcast <4 x i32> %78 to <16 x i8> %391 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %388, <32 x i8> %389, <16 x i8> %390, i32 2) %392 = extractelement <4 x float> %391, i32 0 %393 = extractelement <4 x float> %391, i32 1 %394 = extractelement <4 x float> %391, i32 2 %395 = fcmp oeq float %temp34.0, 3.000000e+00 %396 = select i1 %395, float 1.000000e+00, float 0.000000e+00 %397 = bitcast float %366 to i32 %398 = bitcast float %368 to i32 %399 = bitcast float %231 to i32 %400 = insertelement <4 x i32> undef, i32 %397, i32 0 %401 = insertelement <4 x i32> %400, i32 %398, i32 1 %402 = insertelement <4 x i32> %401, i32 %399, i32 2 %403 = bitcast <8 x i32> %68 to <32 x i8> %404 = bitcast <4 x i32> %70 to <16 x i8> %405 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %402, <32 x i8> %403, <16 x i8> %404, i32 2) %406 = extractelement <4 x float> %405, i32 0 %407 = extractelement <4 x float> %405, i32 1 %408 = extractelement <4 x float> %405, i32 2 %409 = fcmp oeq float %temp34.0, 2.000000e+00 %410 = select i1 %409, float 1.000000e+00, float 0.000000e+00 %411 = bitcast float %366 to i32 %412 = bitcast float %368 to i32 %413 = bitcast float %231 to i32 %414 = insertelement <4 x i32> undef, i32 %411, i32 0 %415 = insertelement <4 x i32> %414, i32 %412, i32 1 %416 = insertelement <4 x i32> %415, i32 %413, i32 2 %417 = bitcast <8 x i32> %60 to <32 x i8> %418 = bitcast <4 x i32> %62 to <16 x i8> %419 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %416, <32 x i8> %417, <16 x i8> %418, i32 2) %420 = extractelement <4 x float> %419, i32 0 %421 = extractelement <4 x float> %419, i32 1 %422 = extractelement <4 x float> %419, i32 2 %423 = fcmp oeq float %temp34.0, 1.000000e+00 %424 = select i1 %423, float 1.000000e+00, float 0.000000e+00 %425 = bitcast float %366 to i32 %426 = bitcast float %368 to i32 %427 = bitcast float %231 to i32 %428 = insertelement <4 x i32> undef, i32 %425, i32 0 %429 = insertelement <4 x i32> %428, i32 %426, i32 1 %430 = insertelement <4 x i32> %429, i32 %427, i32 2 %431 = bitcast <8 x i32> %52 to <32 x i8> %432 = bitcast <4 x i32> %54 to <16 x i8> %433 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %430, <32 x i8> %431, <16 x i8> %432, i32 2) %434 = extractelement <4 x float> %433, i32 0 %435 = extractelement <4 x float> %433, i32 1 %436 = extractelement <4 x float> %433, i32 2 %437 = fcmp oeq float %temp34.0, 0.000000e+00 %438 = select i1 %437, float 1.000000e+00, float 0.000000e+00 %439 = fmul float %434, %438 %440 = fmul float %435, %438 %441 = fmul float %436, %438 %442 = fmul float %420, %424 %443 = fadd float %442, %439 %444 = fmul float %421, %424 %445 = fadd float %444, %440 %446 = fmul float %422, %424 %447 = fadd float %446, %441 %448 = fmul float %406, %410 %449 = fadd float %448, %443 %450 = fmul float %407, %410 %451 = fadd float %450, %445 %452 = fmul float %408, %410 %453 = fadd float %452, %447 %454 = fmul float %392, %396 %455 = fadd float %454, %449 %456 = fmul float %393, %396 %457 = fadd float %456, %451 %458 = fmul float %394, %396 %459 = fadd float %458, %453 %460 = fmul float %378, %382 %461 = fadd float %460, %455 %462 = fmul float %379, %382 %463 = fadd float %462, %457 %464 = fmul float %380, %382 %465 = fadd float %464, %459 %466 = fcmp une float %38, %temp20.0 %.sink214 = select i1 %466, float %41, float %40 %temp56.0 = select i1 %466, float 1.953125e-03, float 3.906250e-03 %467 = fdiv float 1.000000e+00, %.sink214 %468 = fmul float %105, %467 %469 = fmul float %103, %467 %470 = call float @llvm.floor.f32(float %468) %471 = fsub float %468, %470 %472 = call float @llvm.floor.f32(float %469) %473 = fsub float %469, %472 %474 = fmul float %42, 2.000000e+00 %475 = fmul float %474, %temp56.0 %476 = fsub float 1.000000e+00, %475 %477 = fmul float %temp56.0, %42 %478 = fmul float %471, %476 %479 = fadd float %478, %477 %480 = fmul float %473, %476 %481 = fadd float %480, %477 %482 = fmul float %479, %temp20.0 %483 = fadd float %482, %temp32.0 %484 = fmul float %481, %temp20.0 %485 = fadd float %484, %temp33.0 %486 = bitcast float %483 to i32 %487 = bitcast float %485 to i32 %488 = bitcast float %231 to i32 %489 = insertelement <4 x i32> undef, i32 %486, i32 0 %490 = insertelement <4 x i32> %489, i32 %487, i32 1 %491 = insertelement <4 x i32> %490, i32 %488, i32 2 %492 = bitcast <8 x i32> %84 to <32 x i8> %493 = bitcast <4 x i32> %86 to <16 x i8> %494 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %491, <32 x i8> %492, <16 x i8> %493, i32 2) %495 = extractelement <4 x float> %494, i32 0 %496 = extractelement <4 x float> %494, i32 1 %497 = extractelement <4 x float> %494, i32 2 %498 = fcmp oeq float %temp34.0, 4.000000e+00 %499 = select i1 %498, float 1.000000e+00, float 0.000000e+00 %500 = bitcast float %483 to i32 %501 = bitcast float %485 to i32 %502 = bitcast float %231 to i32 %503 = insertelement <4 x i32> undef, i32 %500, i32 0 %504 = insertelement <4 x i32> %503, i32 %501, i32 1 %505 = insertelement <4 x i32> %504, i32 %502, i32 2 %506 = bitcast <8 x i32> %76 to <32 x i8> %507 = bitcast <4 x i32> %78 to <16 x i8> %508 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %505, <32 x i8> %506, <16 x i8> %507, i32 2) %509 = extractelement <4 x float> %508, i32 0 %510 = extractelement <4 x float> %508, i32 1 %511 = extractelement <4 x float> %508, i32 2 %512 = fcmp oeq float %temp34.0, 3.000000e+00 %513 = select i1 %512, float 1.000000e+00, float 0.000000e+00 %514 = bitcast float %483 to i32 %515 = bitcast float %485 to i32 %516 = bitcast float %231 to i32 %517 = insertelement <4 x i32> undef, i32 %514, i32 0 %518 = insertelement <4 x i32> %517, i32 %515, i32 1 %519 = insertelement <4 x i32> %518, i32 %516, i32 2 %520 = bitcast <8 x i32> %68 to <32 x i8> %521 = bitcast <4 x i32> %70 to <16 x i8> %522 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %519, <32 x i8> %520, <16 x i8> %521, i32 2) %523 = extractelement <4 x float> %522, i32 0 %524 = extractelement <4 x float> %522, i32 1 %525 = extractelement <4 x float> %522, i32 2 %526 = fcmp oeq float %temp34.0, 2.000000e+00 %527 = select i1 %526, float 1.000000e+00, float 0.000000e+00 %528 = bitcast float %483 to i32 %529 = bitcast float %485 to i32 %530 = bitcast float %231 to i32 %531 = insertelement <4 x i32> undef, i32 %528, i32 0 %532 = insertelement <4 x i32> %531, i32 %529, i32 1 %533 = insertelement <4 x i32> %532, i32 %530, i32 2 %534 = bitcast <8 x i32> %60 to <32 x i8> %535 = bitcast <4 x i32> %62 to <16 x i8> %536 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %533, <32 x i8> %534, <16 x i8> %535, i32 2) %537 = extractelement <4 x float> %536, i32 0 %538 = extractelement <4 x float> %536, i32 1 %539 = extractelement <4 x float> %536, i32 2 %540 = fcmp oeq float %temp34.0, 1.000000e+00 %541 = select i1 %540, float 1.000000e+00, float 0.000000e+00 %542 = bitcast float %483 to i32 %543 = bitcast float %485 to i32 %544 = bitcast float %231 to i32 %545 = insertelement <4 x i32> undef, i32 %542, i32 0 %546 = insertelement <4 x i32> %545, i32 %543, i32 1 %547 = insertelement <4 x i32> %546, i32 %544, i32 2 %548 = bitcast <8 x i32> %52 to <32 x i8> %549 = bitcast <4 x i32> %54 to <16 x i8> %550 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %547, <32 x i8> %548, <16 x i8> %549, i32 2) %551 = extractelement <4 x float> %550, i32 0 %552 = extractelement <4 x float> %550, i32 1 %553 = extractelement <4 x float> %550, i32 2 %554 = fcmp oeq float %temp34.0, 0.000000e+00 %555 = select i1 %554, float 1.000000e+00, float 0.000000e+00 %556 = fmul float %551, %555 %557 = fmul float %552, %555 %558 = fmul float %553, %555 %559 = fmul float %537, %541 %560 = fadd float %559, %556 %561 = fmul float %538, %541 %562 = fadd float %561, %557 %563 = fmul float %539, %541 %564 = fadd float %563, %558 %565 = fmul float %523, %527 %566 = fadd float %565, %560 %567 = fmul float %524, %527 %568 = fadd float %567, %562 %569 = fmul float %525, %527 %570 = fadd float %569, %564 %571 = fmul float %509, %513 %572 = fadd float %571, %566 %573 = fmul float %510, %513 %574 = fadd float %573, %568 %575 = fmul float %511, %513 %576 = fadd float %575, %570 %577 = fmul float %495, %499 %578 = fadd float %577, %572 %579 = fmul float %496, %499 %580 = fadd float %579, %574 %581 = fmul float %497, %499 %582 = fadd float %581, %576 %583 = fcmp une float %38, %temp24.0 %.sink215 = select i1 %583, float %41, float %40 %temp60.0 = select i1 %583, float 1.953125e-03, float 3.906250e-03 %584 = fdiv float 1.000000e+00, %.sink215 %585 = fmul float %103, %584 %586 = fmul float %104, %584 %587 = call float @llvm.floor.f32(float %585) %588 = fsub float %585, %587 %589 = call float @llvm.floor.f32(float %586) %590 = fsub float %586, %589 %591 = fmul float %42, 2.000000e+00 %592 = fmul float %591, %temp60.0 %593 = fsub float 1.000000e+00, %592 %594 = fmul float %temp60.0, %42 %595 = fmul float %588, %593 %596 = fadd float %595, %594 %597 = fmul float %590, %593 %598 = fadd float %597, %594 %599 = fmul float %596, %temp24.0 %600 = fadd float %599, %temp40.0 %601 = fmul float %598, %temp24.0 %602 = fadd float %601, %temp41.0 %603 = bitcast float %600 to i32 %604 = bitcast float %602 to i32 %605 = bitcast float %231 to i32 %606 = insertelement <4 x i32> undef, i32 %603, i32 0 %607 = insertelement <4 x i32> %606, i32 %604, i32 1 %608 = insertelement <4 x i32> %607, i32 %605, i32 2 %609 = bitcast <8 x i32> %84 to <32 x i8> %610 = bitcast <4 x i32> %86 to <16 x i8> %611 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %608, <32 x i8> %609, <16 x i8> %610, i32 2) %612 = extractelement <4 x float> %611, i32 0 %613 = extractelement <4 x float> %611, i32 1 %614 = extractelement <4 x float> %611, i32 2 %615 = fcmp oeq float %temp42.0, 4.000000e+00 %616 = select i1 %615, float 1.000000e+00, float 0.000000e+00 %617 = bitcast float %600 to i32 %618 = bitcast float %602 to i32 %619 = bitcast float %231 to i32 %620 = insertelement <4 x i32> undef, i32 %617, i32 0 %621 = insertelement <4 x i32> %620, i32 %618, i32 1 %622 = insertelement <4 x i32> %621, i32 %619, i32 2 %623 = bitcast <8 x i32> %76 to <32 x i8> %624 = bitcast <4 x i32> %78 to <16 x i8> %625 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %622, <32 x i8> %623, <16 x i8> %624, i32 2) %626 = extractelement <4 x float> %625, i32 0 %627 = extractelement <4 x float> %625, i32 1 %628 = extractelement <4 x float> %625, i32 2 %629 = fcmp oeq float %temp42.0, 3.000000e+00 %630 = select i1 %629, float 1.000000e+00, float 0.000000e+00 %631 = bitcast float %600 to i32 %632 = bitcast float %602 to i32 %633 = bitcast float %231 to i32 %634 = insertelement <4 x i32> undef, i32 %631, i32 0 %635 = insertelement <4 x i32> %634, i32 %632, i32 1 %636 = insertelement <4 x i32> %635, i32 %633, i32 2 %637 = bitcast <8 x i32> %68 to <32 x i8> %638 = bitcast <4 x i32> %70 to <16 x i8> %639 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %636, <32 x i8> %637, <16 x i8> %638, i32 2) %640 = extractelement <4 x float> %639, i32 0 %641 = extractelement <4 x float> %639, i32 1 %642 = extractelement <4 x float> %639, i32 2 %643 = fcmp oeq float %temp42.0, 2.000000e+00 %644 = select i1 %643, float 1.000000e+00, float 0.000000e+00 %645 = bitcast float %600 to i32 %646 = bitcast float %602 to i32 %647 = bitcast float %231 to i32 %648 = insertelement <4 x i32> undef, i32 %645, i32 0 %649 = insertelement <4 x i32> %648, i32 %646, i32 1 %650 = insertelement <4 x i32> %649, i32 %647, i32 2 %651 = bitcast <8 x i32> %60 to <32 x i8> %652 = bitcast <4 x i32> %62 to <16 x i8> %653 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %650, <32 x i8> %651, <16 x i8> %652, i32 2) %654 = extractelement <4 x float> %653, i32 0 %655 = extractelement <4 x float> %653, i32 1 %656 = extractelement <4 x float> %653, i32 2 %657 = fcmp oeq float %temp42.0, 1.000000e+00 %658 = select i1 %657, float 1.000000e+00, float 0.000000e+00 %659 = bitcast float %600 to i32 %660 = bitcast float %602 to i32 %661 = bitcast float %231 to i32 %662 = insertelement <4 x i32> undef, i32 %659, i32 0 %663 = insertelement <4 x i32> %662, i32 %660, i32 1 %664 = insertelement <4 x i32> %663, i32 %661, i32 2 %665 = bitcast <8 x i32> %52 to <32 x i8> %666 = bitcast <4 x i32> %54 to <16 x i8> %667 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %664, <32 x i8> %665, <16 x i8> %666, i32 2) %668 = extractelement <4 x float> %667, i32 0 %669 = extractelement <4 x float> %667, i32 1 %670 = extractelement <4 x float> %667, i32 2 %671 = fcmp oeq float %temp42.0, 0.000000e+00 %672 = select i1 %671, float 1.000000e+00, float 0.000000e+00 %673 = fmul float %668, %672 %674 = fmul float %669, %672 %675 = fmul float %670, %672 %676 = fmul float %654, %658 %677 = fadd float %676, %673 %678 = fmul float %655, %658 %679 = fadd float %678, %674 %680 = fmul float %656, %658 %681 = fadd float %680, %675 %682 = fmul float %640, %644 %683 = fadd float %682, %677 %684 = fmul float %641, %644 %685 = fadd float %684, %679 %686 = fmul float %642, %644 %687 = fadd float %686, %681 %688 = fmul float %626, %630 %689 = fadd float %688, %683 %690 = fmul float %627, %630 %691 = fadd float %690, %685 %692 = fmul float %628, %630 %693 = fadd float %692, %687 %694 = fmul float %612, %616 %695 = fadd float %694, %689 %696 = fmul float %613, %616 %697 = fadd float %696, %691 %698 = fmul float %614, %616 %699 = fadd float %698, %693 %700 = fcmp une float %38, %temp24.0 %.sink216 = select i1 %700, float %41, float %40 %temp64.0 = select i1 %700, float 1.953125e-03, float 3.906250e-03 %701 = fdiv float 1.000000e+00, %.sink216 %702 = fmul float %105, %701 %703 = fmul float %104, %701 %704 = call float @llvm.floor.f32(float %702) %705 = fsub float %702, %704 %706 = call float @llvm.floor.f32(float %703) %707 = fsub float %703, %706 %708 = fmul float %42, 2.000000e+00 %709 = fmul float %708, %temp64.0 %710 = fsub float 1.000000e+00, %709 %711 = fmul float %temp64.0, %42 %712 = fmul float %705, %710 %713 = fadd float %712, %711 %714 = fmul float %707, %710 %715 = fadd float %714, %711 %716 = fmul float %713, %temp24.0 %717 = fadd float %716, %temp40.0 %718 = fmul float %715, %temp24.0 %719 = fadd float %718, %temp41.0 %720 = bitcast float %717 to i32 %721 = bitcast float %719 to i32 %722 = bitcast float %231 to i32 %723 = insertelement <4 x i32> undef, i32 %720, i32 0 %724 = insertelement <4 x i32> %723, i32 %721, i32 1 %725 = insertelement <4 x i32> %724, i32 %722, i32 2 %726 = bitcast <8 x i32> %84 to <32 x i8> %727 = bitcast <4 x i32> %86 to <16 x i8> %728 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %725, <32 x i8> %726, <16 x i8> %727, i32 2) %729 = extractelement <4 x float> %728, i32 0 %730 = extractelement <4 x float> %728, i32 1 %731 = extractelement <4 x float> %728, i32 2 %732 = fcmp oeq float %temp42.0, 4.000000e+00 %733 = select i1 %732, float 1.000000e+00, float 0.000000e+00 %734 = bitcast float %717 to i32 %735 = bitcast float %719 to i32 %736 = bitcast float %231 to i32 %737 = insertelement <4 x i32> undef, i32 %734, i32 0 %738 = insertelement <4 x i32> %737, i32 %735, i32 1 %739 = insertelement <4 x i32> %738, i32 %736, i32 2 %740 = bitcast <8 x i32> %76 to <32 x i8> %741 = bitcast <4 x i32> %78 to <16 x i8> %742 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %739, <32 x i8> %740, <16 x i8> %741, i32 2) %743 = extractelement <4 x float> %742, i32 0 %744 = extractelement <4 x float> %742, i32 1 %745 = extractelement <4 x float> %742, i32 2 %746 = fcmp oeq float %temp42.0, 3.000000e+00 %747 = select i1 %746, float 1.000000e+00, float 0.000000e+00 %748 = bitcast float %717 to i32 %749 = bitcast float %719 to i32 %750 = bitcast float %231 to i32 %751 = insertelement <4 x i32> undef, i32 %748, i32 0 %752 = insertelement <4 x i32> %751, i32 %749, i32 1 %753 = insertelement <4 x i32> %752, i32 %750, i32 2 %754 = bitcast <8 x i32> %68 to <32 x i8> %755 = bitcast <4 x i32> %70 to <16 x i8> %756 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %753, <32 x i8> %754, <16 x i8> %755, i32 2) %757 = extractelement <4 x float> %756, i32 0 %758 = extractelement <4 x float> %756, i32 1 %759 = extractelement <4 x float> %756, i32 2 %760 = fcmp oeq float %temp42.0, 2.000000e+00 %761 = select i1 %760, float 1.000000e+00, float 0.000000e+00 %762 = bitcast float %717 to i32 %763 = bitcast float %719 to i32 %764 = bitcast float %231 to i32 %765 = insertelement <4 x i32> undef, i32 %762, i32 0 %766 = insertelement <4 x i32> %765, i32 %763, i32 1 %767 = insertelement <4 x i32> %766, i32 %764, i32 2 %768 = bitcast <8 x i32> %60 to <32 x i8> %769 = bitcast <4 x i32> %62 to <16 x i8> %770 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %767, <32 x i8> %768, <16 x i8> %769, i32 2) %771 = extractelement <4 x float> %770, i32 0 %772 = extractelement <4 x float> %770, i32 1 %773 = extractelement <4 x float> %770, i32 2 %774 = fcmp oeq float %temp42.0, 1.000000e+00 %775 = select i1 %774, float 1.000000e+00, float 0.000000e+00 %776 = bitcast float %717 to i32 %777 = bitcast float %719 to i32 %778 = bitcast float %231 to i32 %779 = insertelement <4 x i32> undef, i32 %776, i32 0 %780 = insertelement <4 x i32> %779, i32 %777, i32 1 %781 = insertelement <4 x i32> %780, i32 %778, i32 2 %782 = bitcast <8 x i32> %52 to <32 x i8> %783 = bitcast <4 x i32> %54 to <16 x i8> %784 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %781, <32 x i8> %782, <16 x i8> %783, i32 2) %785 = extractelement <4 x float> %784, i32 0 %786 = extractelement <4 x float> %784, i32 1 %787 = extractelement <4 x float> %784, i32 2 %788 = fcmp oeq float %temp42.0, 0.000000e+00 %789 = select i1 %788, float 1.000000e+00, float 0.000000e+00 %790 = fmul float %785, %789 %791 = fmul float %786, %789 %792 = fmul float %787, %789 %793 = fmul float %771, %775 %794 = fadd float %793, %790 %795 = fmul float %772, %775 %796 = fadd float %795, %791 %797 = fmul float %773, %775 %798 = fadd float %797, %792 %799 = fmul float %757, %761 %800 = fadd float %799, %794 %801 = fmul float %758, %761 %802 = fadd float %801, %796 %803 = fmul float %759, %761 %804 = fadd float %803, %798 %805 = fmul float %743, %747 %806 = fadd float %805, %800 %807 = fmul float %744, %747 %808 = fadd float %807, %802 %809 = fmul float %745, %747 %810 = fadd float %809, %804 %811 = fmul float %729, %733 %812 = fadd float %811, %806 %813 = fmul float %730, %733 %814 = fadd float %813, %808 %815 = fmul float %731, %733 %816 = fadd float %815, %810 %817 = fcmp une float %38, %temp24.0 %.sink217 = select i1 %817, float %41, float %40 %temp68.0 = select i1 %817, float 1.953125e-03, float 3.906250e-03 %818 = fdiv float 1.000000e+00, %.sink217 %819 = fmul float %105, %818 %820 = fmul float %103, %818 %821 = call float @llvm.floor.f32(float %819) %822 = fsub float %819, %821 %823 = call float @llvm.floor.f32(float %820) %824 = fsub float %820, %823 %825 = fmul float %42, 2.000000e+00 %826 = fmul float %825, %temp68.0 %827 = fsub float 1.000000e+00, %826 %828 = fmul float %temp68.0, %42 %829 = fmul float %822, %827 %830 = fadd float %829, %828 %831 = fmul float %824, %827 %832 = fadd float %831, %828 %833 = fmul float %830, %temp24.0 %834 = fadd float %833, %temp40.0 %835 = fmul float %832, %temp24.0 %836 = fadd float %835, %temp41.0 %837 = bitcast float %834 to i32 %838 = bitcast float %836 to i32 %839 = bitcast float %231 to i32 %840 = insertelement <4 x i32> undef, i32 %837, i32 0 %841 = insertelement <4 x i32> %840, i32 %838, i32 1 %842 = insertelement <4 x i32> %841, i32 %839, i32 2 %843 = bitcast <8 x i32> %84 to <32 x i8> %844 = bitcast <4 x i32> %86 to <16 x i8> %845 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %842, <32 x i8> %843, <16 x i8> %844, i32 2) %846 = extractelement <4 x float> %845, i32 0 %847 = extractelement <4 x float> %845, i32 1 %848 = extractelement <4 x float> %845, i32 2 %849 = fcmp oeq float %temp42.0, 4.000000e+00 %850 = select i1 %849, float 1.000000e+00, float 0.000000e+00 %851 = bitcast float %834 to i32 %852 = bitcast float %836 to i32 %853 = bitcast float %231 to i32 %854 = insertelement <4 x i32> undef, i32 %851, i32 0 %855 = insertelement <4 x i32> %854, i32 %852, i32 1 %856 = insertelement <4 x i32> %855, i32 %853, i32 2 %857 = bitcast <8 x i32> %76 to <32 x i8> %858 = bitcast <4 x i32> %78 to <16 x i8> %859 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %856, <32 x i8> %857, <16 x i8> %858, i32 2) %860 = extractelement <4 x float> %859, i32 0 %861 = extractelement <4 x float> %859, i32 1 %862 = extractelement <4 x float> %859, i32 2 %863 = fcmp oeq float %temp42.0, 3.000000e+00 %864 = select i1 %863, float 1.000000e+00, float 0.000000e+00 %865 = bitcast float %834 to i32 %866 = bitcast float %836 to i32 %867 = bitcast float %231 to i32 %868 = insertelement <4 x i32> undef, i32 %865, i32 0 %869 = insertelement <4 x i32> %868, i32 %866, i32 1 %870 = insertelement <4 x i32> %869, i32 %867, i32 2 %871 = bitcast <8 x i32> %68 to <32 x i8> %872 = bitcast <4 x i32> %70 to <16 x i8> %873 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %870, <32 x i8> %871, <16 x i8> %872, i32 2) %874 = extractelement <4 x float> %873, i32 0 %875 = extractelement <4 x float> %873, i32 1 %876 = extractelement <4 x float> %873, i32 2 %877 = fcmp oeq float %temp42.0, 2.000000e+00 %878 = select i1 %877, float 1.000000e+00, float 0.000000e+00 %879 = bitcast float %834 to i32 %880 = bitcast float %836 to i32 %881 = bitcast float %231 to i32 %882 = insertelement <4 x i32> undef, i32 %879, i32 0 %883 = insertelement <4 x i32> %882, i32 %880, i32 1 %884 = insertelement <4 x i32> %883, i32 %881, i32 2 %885 = bitcast <8 x i32> %60 to <32 x i8> %886 = bitcast <4 x i32> %62 to <16 x i8> %887 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %884, <32 x i8> %885, <16 x i8> %886, i32 2) %888 = extractelement <4 x float> %887, i32 0 %889 = extractelement <4 x float> %887, i32 1 %890 = extractelement <4 x float> %887, i32 2 %891 = fcmp oeq float %temp42.0, 1.000000e+00 %892 = select i1 %891, float 1.000000e+00, float 0.000000e+00 %893 = bitcast float %834 to i32 %894 = bitcast float %836 to i32 %895 = bitcast float %231 to i32 %896 = insertelement <4 x i32> undef, i32 %893, i32 0 %897 = insertelement <4 x i32> %896, i32 %894, i32 1 %898 = insertelement <4 x i32> %897, i32 %895, i32 2 %899 = bitcast <8 x i32> %52 to <32 x i8> %900 = bitcast <4 x i32> %54 to <16 x i8> %901 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %898, <32 x i8> %899, <16 x i8> %900, i32 2) %902 = extractelement <4 x float> %901, i32 0 %903 = extractelement <4 x float> %901, i32 1 %904 = extractelement <4 x float> %901, i32 2 %905 = fcmp oeq float %temp42.0, 0.000000e+00 %906 = select i1 %905, float 1.000000e+00, float 0.000000e+00 %907 = fmul float %902, %906 %908 = fmul float %903, %906 %909 = fmul float %904, %906 %910 = fmul float %888, %892 %911 = fadd float %910, %907 %912 = fmul float %889, %892 %913 = fadd float %912, %908 %914 = fmul float %890, %892 %915 = fadd float %914, %909 %916 = fmul float %874, %878 %917 = fadd float %916, %911 %918 = fmul float %875, %878 %919 = fadd float %918, %913 %920 = fmul float %876, %878 %921 = fadd float %920, %915 %922 = fmul float %860, %864 %923 = fadd float %922, %917 %924 = fmul float %861, %864 %925 = fadd float %924, %919 %926 = fmul float %862, %864 %927 = fadd float %926, %921 %928 = fmul float %846, %850 %929 = fadd float %928, %923 %930 = fmul float %847, %850 %931 = fadd float %930, %925 %932 = fmul float %848, %850 %933 = fadd float %932, %927 %934 = fcmp une float %38, %temp28.0 %.sink218 = select i1 %934, float %41, float %40 %temp72.0 = select i1 %934, float 1.953125e-03, float 3.906250e-03 %935 = fdiv float 1.000000e+00, %.sink218 %936 = fmul float %103, %935 %937 = fmul float %104, %935 %938 = call float @llvm.floor.f32(float %936) %939 = fsub float %936, %938 %940 = call float @llvm.floor.f32(float %937) %941 = fsub float %937, %940 %942 = fmul float %42, 2.000000e+00 %943 = fmul float %942, %temp72.0 %944 = fsub float 1.000000e+00, %943 %945 = fmul float %temp72.0, %42 %946 = fmul float %939, %944 %947 = fadd float %946, %945 %948 = fmul float %941, %944 %949 = fadd float %948, %945 %950 = fmul float %947, %temp28.0 %951 = fadd float %950, %temp16.0 %952 = fmul float %949, %temp28.0 %953 = fadd float %952, %temp17.0 %954 = bitcast float %951 to i32 %955 = bitcast float %953 to i32 %956 = bitcast float %231 to i32 %957 = insertelement <4 x i32> undef, i32 %954, i32 0 %958 = insertelement <4 x i32> %957, i32 %955, i32 1 %959 = insertelement <4 x i32> %958, i32 %956, i32 2 %960 = bitcast <8 x i32> %84 to <32 x i8> %961 = bitcast <4 x i32> %86 to <16 x i8> %962 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %959, <32 x i8> %960, <16 x i8> %961, i32 2) %963 = extractelement <4 x float> %962, i32 0 %964 = extractelement <4 x float> %962, i32 1 %965 = extractelement <4 x float> %962, i32 2 %966 = fcmp oeq float %temp18.0, 4.000000e+00 %967 = select i1 %966, float 1.000000e+00, float 0.000000e+00 %968 = bitcast float %951 to i32 %969 = bitcast float %953 to i32 %970 = bitcast float %231 to i32 %971 = insertelement <4 x i32> undef, i32 %968, i32 0 %972 = insertelement <4 x i32> %971, i32 %969, i32 1 %973 = insertelement <4 x i32> %972, i32 %970, i32 2 %974 = bitcast <8 x i32> %76 to <32 x i8> %975 = bitcast <4 x i32> %78 to <16 x i8> %976 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %973, <32 x i8> %974, <16 x i8> %975, i32 2) %977 = extractelement <4 x float> %976, i32 0 %978 = extractelement <4 x float> %976, i32 1 %979 = extractelement <4 x float> %976, i32 2 %980 = fcmp oeq float %temp18.0, 3.000000e+00 %981 = select i1 %980, float 1.000000e+00, float 0.000000e+00 %982 = bitcast float %951 to i32 %983 = bitcast float %953 to i32 %984 = bitcast float %231 to i32 %985 = insertelement <4 x i32> undef, i32 %982, i32 0 %986 = insertelement <4 x i32> %985, i32 %983, i32 1 %987 = insertelement <4 x i32> %986, i32 %984, i32 2 %988 = bitcast <8 x i32> %68 to <32 x i8> %989 = bitcast <4 x i32> %70 to <16 x i8> %990 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %987, <32 x i8> %988, <16 x i8> %989, i32 2) %991 = extractelement <4 x float> %990, i32 0 %992 = extractelement <4 x float> %990, i32 1 %993 = extractelement <4 x float> %990, i32 2 %994 = fcmp oeq float %temp18.0, 2.000000e+00 %995 = select i1 %994, float 1.000000e+00, float 0.000000e+00 %996 = bitcast float %951 to i32 %997 = bitcast float %953 to i32 %998 = bitcast float %231 to i32 %999 = insertelement <4 x i32> undef, i32 %996, i32 0 %1000 = insertelement <4 x i32> %999, i32 %997, i32 1 %1001 = insertelement <4 x i32> %1000, i32 %998, i32 2 %1002 = bitcast <8 x i32> %60 to <32 x i8> %1003 = bitcast <4 x i32> %62 to <16 x i8> %1004 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1001, <32 x i8> %1002, <16 x i8> %1003, i32 2) %1005 = extractelement <4 x float> %1004, i32 0 %1006 = extractelement <4 x float> %1004, i32 1 %1007 = extractelement <4 x float> %1004, i32 2 %1008 = fcmp oeq float %temp18.0, 1.000000e+00 %1009 = select i1 %1008, float 1.000000e+00, float 0.000000e+00 %1010 = bitcast float %951 to i32 %1011 = bitcast float %953 to i32 %1012 = bitcast float %231 to i32 %1013 = insertelement <4 x i32> undef, i32 %1010, i32 0 %1014 = insertelement <4 x i32> %1013, i32 %1011, i32 1 %1015 = insertelement <4 x i32> %1014, i32 %1012, i32 2 %1016 = bitcast <8 x i32> %52 to <32 x i8> %1017 = bitcast <4 x i32> %54 to <16 x i8> %1018 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1015, <32 x i8> %1016, <16 x i8> %1017, i32 2) %1019 = extractelement <4 x float> %1018, i32 0 %1020 = extractelement <4 x float> %1018, i32 1 %1021 = extractelement <4 x float> %1018, i32 2 %1022 = fcmp oeq float %temp18.0, 0.000000e+00 %1023 = select i1 %1022, float 1.000000e+00, float 0.000000e+00 %1024 = fmul float %1019, %1023 %1025 = fmul float %1020, %1023 %1026 = fmul float %1021, %1023 %1027 = fmul float %1005, %1009 %1028 = fadd float %1027, %1024 %1029 = fmul float %1006, %1009 %1030 = fadd float %1029, %1025 %1031 = fmul float %1007, %1009 %1032 = fadd float %1031, %1026 %1033 = fmul float %991, %995 %1034 = fadd float %1033, %1028 %1035 = fmul float %992, %995 %1036 = fadd float %1035, %1030 %1037 = fmul float %993, %995 %1038 = fadd float %1037, %1032 %1039 = fmul float %977, %981 %1040 = fadd float %1039, %1034 %1041 = fmul float %978, %981 %1042 = fadd float %1041, %1036 %1043 = fmul float %979, %981 %1044 = fadd float %1043, %1038 %1045 = fmul float %963, %967 %1046 = fadd float %1045, %1040 %1047 = fmul float %964, %967 %1048 = fadd float %1047, %1042 %1049 = fmul float %965, %967 %1050 = fadd float %1049, %1044 %1051 = fcmp une float %38, %temp28.0 %.sink219 = select i1 %1051, float %41, float %40 %temp76.0 = select i1 %1051, float 1.953125e-03, float 3.906250e-03 %1052 = fdiv float 1.000000e+00, %.sink219 %1053 = fmul float %105, %1052 %1054 = fmul float %104, %1052 %1055 = call float @llvm.floor.f32(float %1053) %1056 = fsub float %1053, %1055 %1057 = call float @llvm.floor.f32(float %1054) %1058 = fsub float %1054, %1057 %1059 = fmul float %42, 2.000000e+00 %1060 = fmul float %1059, %temp76.0 %1061 = fsub float 1.000000e+00, %1060 %1062 = fmul float %temp76.0, %42 %1063 = fmul float %1056, %1061 %1064 = fadd float %1063, %1062 %1065 = fmul float %1058, %1061 %1066 = fadd float %1065, %1062 %1067 = fmul float %1064, %temp28.0 %1068 = fadd float %1067, %temp16.0 %1069 = fmul float %1066, %temp28.0 %1070 = fadd float %1069, %temp17.0 %1071 = bitcast float %1068 to i32 %1072 = bitcast float %1070 to i32 %1073 = bitcast float %231 to i32 %1074 = insertelement <4 x i32> undef, i32 %1071, i32 0 %1075 = insertelement <4 x i32> %1074, i32 %1072, i32 1 %1076 = insertelement <4 x i32> %1075, i32 %1073, i32 2 %1077 = bitcast <8 x i32> %84 to <32 x i8> %1078 = bitcast <4 x i32> %86 to <16 x i8> %1079 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1076, <32 x i8> %1077, <16 x i8> %1078, i32 2) %1080 = extractelement <4 x float> %1079, i32 0 %1081 = extractelement <4 x float> %1079, i32 1 %1082 = extractelement <4 x float> %1079, i32 2 %1083 = fcmp oeq float %temp18.0, 4.000000e+00 %1084 = select i1 %1083, float 1.000000e+00, float 0.000000e+00 %1085 = bitcast float %1068 to i32 %1086 = bitcast float %1070 to i32 %1087 = bitcast float %231 to i32 %1088 = insertelement <4 x i32> undef, i32 %1085, i32 0 %1089 = insertelement <4 x i32> %1088, i32 %1086, i32 1 %1090 = insertelement <4 x i32> %1089, i32 %1087, i32 2 %1091 = bitcast <8 x i32> %76 to <32 x i8> %1092 = bitcast <4 x i32> %78 to <16 x i8> %1093 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1090, <32 x i8> %1091, <16 x i8> %1092, i32 2) %1094 = extractelement <4 x float> %1093, i32 0 %1095 = extractelement <4 x float> %1093, i32 1 %1096 = extractelement <4 x float> %1093, i32 2 %1097 = fcmp oeq float %temp18.0, 3.000000e+00 %1098 = select i1 %1097, float 1.000000e+00, float 0.000000e+00 %1099 = bitcast float %1068 to i32 %1100 = bitcast float %1070 to i32 %1101 = bitcast float %231 to i32 %1102 = insertelement <4 x i32> undef, i32 %1099, i32 0 %1103 = insertelement <4 x i32> %1102, i32 %1100, i32 1 %1104 = insertelement <4 x i32> %1103, i32 %1101, i32 2 %1105 = bitcast <8 x i32> %68 to <32 x i8> %1106 = bitcast <4 x i32> %70 to <16 x i8> %1107 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1104, <32 x i8> %1105, <16 x i8> %1106, i32 2) %1108 = extractelement <4 x float> %1107, i32 0 %1109 = extractelement <4 x float> %1107, i32 1 %1110 = extractelement <4 x float> %1107, i32 2 %1111 = fcmp oeq float %temp18.0, 2.000000e+00 %1112 = select i1 %1111, float 1.000000e+00, float 0.000000e+00 %1113 = bitcast float %1068 to i32 %1114 = bitcast float %1070 to i32 %1115 = bitcast float %231 to i32 %1116 = insertelement <4 x i32> undef, i32 %1113, i32 0 %1117 = insertelement <4 x i32> %1116, i32 %1114, i32 1 %1118 = insertelement <4 x i32> %1117, i32 %1115, i32 2 %1119 = bitcast <8 x i32> %60 to <32 x i8> %1120 = bitcast <4 x i32> %62 to <16 x i8> %1121 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1118, <32 x i8> %1119, <16 x i8> %1120, i32 2) %1122 = extractelement <4 x float> %1121, i32 0 %1123 = extractelement <4 x float> %1121, i32 1 %1124 = extractelement <4 x float> %1121, i32 2 %1125 = fcmp oeq float %temp18.0, 1.000000e+00 %1126 = select i1 %1125, float 1.000000e+00, float 0.000000e+00 %1127 = bitcast float %1068 to i32 %1128 = bitcast float %1070 to i32 %1129 = bitcast float %231 to i32 %1130 = insertelement <4 x i32> undef, i32 %1127, i32 0 %1131 = insertelement <4 x i32> %1130, i32 %1128, i32 1 %1132 = insertelement <4 x i32> %1131, i32 %1129, i32 2 %1133 = bitcast <8 x i32> %52 to <32 x i8> %1134 = bitcast <4 x i32> %54 to <16 x i8> %1135 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1132, <32 x i8> %1133, <16 x i8> %1134, i32 2) %1136 = extractelement <4 x float> %1135, i32 0 %1137 = extractelement <4 x float> %1135, i32 1 %1138 = extractelement <4 x float> %1135, i32 2 %1139 = fcmp oeq float %temp18.0, 0.000000e+00 %1140 = select i1 %1139, float 1.000000e+00, float 0.000000e+00 %1141 = fmul float %1136, %1140 %1142 = fmul float %1137, %1140 %1143 = fmul float %1138, %1140 %1144 = fmul float %1122, %1126 %1145 = fadd float %1144, %1141 %1146 = fmul float %1123, %1126 %1147 = fadd float %1146, %1142 %1148 = fmul float %1124, %1126 %1149 = fadd float %1148, %1143 %1150 = fmul float %1108, %1112 %1151 = fadd float %1150, %1145 %1152 = fmul float %1109, %1112 %1153 = fadd float %1152, %1147 %1154 = fmul float %1110, %1112 %1155 = fadd float %1154, %1149 %1156 = fmul float %1094, %1098 %1157 = fadd float %1156, %1151 %1158 = fmul float %1095, %1098 %1159 = fadd float %1158, %1153 %1160 = fmul float %1096, %1098 %1161 = fadd float %1160, %1155 %1162 = fmul float %1080, %1084 %1163 = fadd float %1162, %1157 %1164 = fmul float %1081, %1084 %1165 = fadd float %1164, %1159 %1166 = fmul float %1082, %1084 %1167 = fadd float %1166, %1161 %1168 = fcmp une float %38, %temp28.0 %.sink220 = select i1 %1168, float %41, float %40 %temp80.0 = select i1 %1168, float 1.953125e-03, float 3.906250e-03 %1169 = fdiv float 1.000000e+00, %.sink220 %1170 = fmul float %105, %1169 %1171 = fmul float %103, %1169 %1172 = call float @llvm.floor.f32(float %1170) %1173 = fsub float %1170, %1172 %1174 = call float @llvm.floor.f32(float %1171) %1175 = fsub float %1171, %1174 %1176 = fmul float %42, 2.000000e+00 %1177 = fmul float %1176, %temp80.0 %1178 = fsub float 1.000000e+00, %1177 %1179 = fmul float %temp80.0, %42 %1180 = fmul float %1173, %1178 %1181 = fadd float %1180, %1179 %1182 = fmul float %1175, %1178 %1183 = fadd float %1182, %1179 %1184 = fmul float %1181, %temp28.0 %1185 = fadd float %1184, %temp16.0 %1186 = fmul float %1183, %temp28.0 %1187 = fadd float %1186, %temp17.0 %1188 = bitcast float %1185 to i32 %1189 = bitcast float %1187 to i32 %1190 = bitcast float %231 to i32 %1191 = insertelement <4 x i32> undef, i32 %1188, i32 0 %1192 = insertelement <4 x i32> %1191, i32 %1189, i32 1 %1193 = insertelement <4 x i32> %1192, i32 %1190, i32 2 %1194 = bitcast <8 x i32> %84 to <32 x i8> %1195 = bitcast <4 x i32> %86 to <16 x i8> %1196 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1193, <32 x i8> %1194, <16 x i8> %1195, i32 2) %1197 = extractelement <4 x float> %1196, i32 0 %1198 = extractelement <4 x float> %1196, i32 1 %1199 = extractelement <4 x float> %1196, i32 2 %1200 = fcmp oeq float %temp18.0, 4.000000e+00 %1201 = select i1 %1200, float 1.000000e+00, float 0.000000e+00 %1202 = bitcast float %1185 to i32 %1203 = bitcast float %1187 to i32 %1204 = bitcast float %231 to i32 %1205 = insertelement <4 x i32> undef, i32 %1202, i32 0 %1206 = insertelement <4 x i32> %1205, i32 %1203, i32 1 %1207 = insertelement <4 x i32> %1206, i32 %1204, i32 2 %1208 = bitcast <8 x i32> %76 to <32 x i8> %1209 = bitcast <4 x i32> %78 to <16 x i8> %1210 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1207, <32 x i8> %1208, <16 x i8> %1209, i32 2) %1211 = extractelement <4 x float> %1210, i32 0 %1212 = extractelement <4 x float> %1210, i32 1 %1213 = extractelement <4 x float> %1210, i32 2 %1214 = fcmp oeq float %temp18.0, 3.000000e+00 %1215 = select i1 %1214, float 1.000000e+00, float 0.000000e+00 %1216 = bitcast float %1185 to i32 %1217 = bitcast float %1187 to i32 %1218 = bitcast float %231 to i32 %1219 = insertelement <4 x i32> undef, i32 %1216, i32 0 %1220 = insertelement <4 x i32> %1219, i32 %1217, i32 1 %1221 = insertelement <4 x i32> %1220, i32 %1218, i32 2 %1222 = bitcast <8 x i32> %68 to <32 x i8> %1223 = bitcast <4 x i32> %70 to <16 x i8> %1224 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1221, <32 x i8> %1222, <16 x i8> %1223, i32 2) %1225 = extractelement <4 x float> %1224, i32 0 %1226 = extractelement <4 x float> %1224, i32 1 %1227 = extractelement <4 x float> %1224, i32 2 %1228 = fcmp oeq float %temp18.0, 2.000000e+00 %1229 = select i1 %1228, float 1.000000e+00, float 0.000000e+00 %1230 = bitcast float %1185 to i32 %1231 = bitcast float %1187 to i32 %1232 = bitcast float %231 to i32 %1233 = insertelement <4 x i32> undef, i32 %1230, i32 0 %1234 = insertelement <4 x i32> %1233, i32 %1231, i32 1 %1235 = insertelement <4 x i32> %1234, i32 %1232, i32 2 %1236 = bitcast <8 x i32> %60 to <32 x i8> %1237 = bitcast <4 x i32> %62 to <16 x i8> %1238 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1235, <32 x i8> %1236, <16 x i8> %1237, i32 2) %1239 = extractelement <4 x float> %1238, i32 0 %1240 = extractelement <4 x float> %1238, i32 1 %1241 = extractelement <4 x float> %1238, i32 2 %1242 = fcmp oeq float %temp18.0, 1.000000e+00 %1243 = select i1 %1242, float 1.000000e+00, float 0.000000e+00 %1244 = bitcast float %1185 to i32 %1245 = bitcast float %1187 to i32 %1246 = bitcast float %231 to i32 %1247 = insertelement <4 x i32> undef, i32 %1244, i32 0 %1248 = insertelement <4 x i32> %1247, i32 %1245, i32 1 %1249 = insertelement <4 x i32> %1248, i32 %1246, i32 2 %1250 = bitcast <8 x i32> %52 to <32 x i8> %1251 = bitcast <4 x i32> %54 to <16 x i8> %1252 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1249, <32 x i8> %1250, <16 x i8> %1251, i32 2) %1253 = extractelement <4 x float> %1252, i32 0 %1254 = extractelement <4 x float> %1252, i32 1 %1255 = extractelement <4 x float> %1252, i32 2 %1256 = fcmp oeq float %temp18.0, 0.000000e+00 %1257 = select i1 %1256, float 1.000000e+00, float 0.000000e+00 %1258 = fmul float %1253, %1257 %1259 = fmul float %1254, %1257 %1260 = fmul float %1255, %1257 %1261 = fmul float %1239, %1243 %1262 = fadd float %1261, %1258 %1263 = fmul float %1240, %1243 %1264 = fadd float %1263, %1259 %1265 = fmul float %1241, %1243 %1266 = fadd float %1265, %1260 %1267 = fmul float %1225, %1229 %1268 = fadd float %1267, %1262 %1269 = fmul float %1226, %1229 %1270 = fadd float %1269, %1264 %1271 = fmul float %1227, %1229 %1272 = fadd float %1271, %1266 %1273 = fmul float %1211, %1215 %1274 = fadd float %1273, %1268 %1275 = fmul float %1212, %1215 %1276 = fadd float %1275, %1270 %1277 = fmul float %1213, %1215 %1278 = fadd float %1277, %1272 %1279 = fmul float %1197, %1201 %1280 = fadd float %1279, %1274 %1281 = fmul float %1198, %1201 %1282 = fadd float %1281, %1276 %1283 = fmul float %1199, %1201 %1284 = fadd float %1283, %1278 %1285 = fmul float %1046, %156 %1286 = fmul float %1048, %156 %1287 = fmul float %1050, %156 %1288 = fmul float %1163, %154 %1289 = fadd float %1288, %1285 %1290 = fmul float %1165, %154 %1291 = fadd float %1290, %1286 %1292 = fmul float %1167, %154 %1293 = fadd float %1292, %1287 %1294 = fmul float %1280, %155 %1295 = fadd float %1294, %1289 %1296 = fmul float %1282, %155 %1297 = fadd float %1296, %1291 %1298 = fmul float %1284, %155 %1299 = fadd float %1298, %1293 %1300 = fmul float %695, %156 %1301 = fmul float %697, %156 %1302 = fmul float %699, %156 %1303 = fmul float %812, %154 %1304 = fadd float %1303, %1300 %1305 = fmul float %814, %154 %1306 = fadd float %1305, %1301 %1307 = fmul float %816, %154 %1308 = fadd float %1307, %1302 %1309 = fmul float %929, %155 %1310 = fadd float %1309, %1304 %1311 = fmul float %931, %155 %1312 = fadd float %1311, %1306 %1313 = fmul float %933, %155 %1314 = fadd float %1313, %1308 %1315 = fmul float %344, %156 %1316 = fmul float %346, %156 %1317 = fmul float %348, %156 %1318 = fmul float %461, %154 %1319 = fadd float %1318, %1315 %1320 = fmul float %463, %154 %1321 = fadd float %1320, %1316 %1322 = fmul float %465, %154 %1323 = fadd float %1322, %1317 %1324 = fmul float %578, %155 %1325 = fadd float %1324, %1319 %1326 = fmul float %580, %155 %1327 = fadd float %1326, %1321 %1328 = fmul float %582, %155 %1329 = fadd float %1328, %1323 %1330 = fmul float %95, %1325 %1331 = fmul float %95, %1327 %1332 = fmul float %95, %1329 %1333 = fmul float %96, %1310 %1334 = fadd float %1333, %1330 %1335 = fmul float %96, %1312 %1336 = fadd float %1335, %1331 %1337 = fmul float %96, %1314 %1338 = fadd float %1337, %1332 %1339 = fmul float %97, %1295 %1340 = fadd float %1339, %1334 %1341 = fmul float %97, %1297 %1342 = fadd float %1341, %1336 %1343 = fmul float %97, %1299 %1344 = fadd float %1343, %1338 %1345 = fcmp une float %38, %temp20.0 %.sink221 = select i1 %1345, float %41, float %40 %temp52.2 = select i1 %1345, float 1.953125e-03, float 3.906250e-03 %1346 = fdiv float 1.000000e+00, %.sink221 %1347 = fmul float %105, %1346 %1348 = fmul float %104, %1346 %1349 = call float @llvm.floor.f32(float %1347) %1350 = fsub float %1347, %1349 %1351 = call float @llvm.floor.f32(float %1348) %1352 = fsub float %1348, %1351 %1353 = fmul float %42, 2.000000e+00 %1354 = fmul float %1353, %temp52.2 %1355 = fsub float 1.000000e+00, %1354 %1356 = fmul float %temp52.2, %42 %1357 = fmul float %1350, %1355 %1358 = fadd float %1357, %1356 %1359 = fmul float %1352, %1355 %1360 = fadd float %1359, %1356 %1361 = fmul float %1358, %temp20.0 %1362 = fadd float %1361, %temp32.0 %1363 = fmul float %1360, %temp20.0 %1364 = fadd float %1363, %temp33.0 %1365 = bitcast float %1362 to i32 %1366 = bitcast float %1364 to i32 %1367 = bitcast float %231 to i32 %1368 = insertelement <4 x i32> undef, i32 %1365, i32 0 %1369 = insertelement <4 x i32> %1368, i32 %1366, i32 1 %1370 = insertelement <4 x i32> %1369, i32 %1367, i32 2 %1371 = bitcast <8 x i32> %88 to <32 x i8> %1372 = bitcast <4 x i32> %90 to <16 x i8> %1373 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1370, <32 x i8> %1371, <16 x i8> %1372, i32 2) %1374 = extractelement <4 x float> %1373, i32 1 %1375 = extractelement <4 x float> %1373, i32 3 %1376 = fcmp oeq float %temp34.0, 4.000000e+00 %1377 = select i1 %1376, float 1.000000e+00, float 0.000000e+00 %1378 = bitcast float %1362 to i32 %1379 = bitcast float %1364 to i32 %1380 = bitcast float %231 to i32 %1381 = insertelement <4 x i32> undef, i32 %1378, i32 0 %1382 = insertelement <4 x i32> %1381, i32 %1379, i32 1 %1383 = insertelement <4 x i32> %1382, i32 %1380, i32 2 %1384 = bitcast <8 x i32> %80 to <32 x i8> %1385 = bitcast <4 x i32> %82 to <16 x i8> %1386 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1383, <32 x i8> %1384, <16 x i8> %1385, i32 2) %1387 = extractelement <4 x float> %1386, i32 1 %1388 = extractelement <4 x float> %1386, i32 3 %1389 = fcmp oeq float %temp34.0, 3.000000e+00 %1390 = select i1 %1389, float 1.000000e+00, float 0.000000e+00 %1391 = bitcast float %1362 to i32 %1392 = bitcast float %1364 to i32 %1393 = bitcast float %231 to i32 %1394 = insertelement <4 x i32> undef, i32 %1391, i32 0 %1395 = insertelement <4 x i32> %1394, i32 %1392, i32 1 %1396 = insertelement <4 x i32> %1395, i32 %1393, i32 2 %1397 = bitcast <8 x i32> %72 to <32 x i8> %1398 = bitcast <4 x i32> %74 to <16 x i8> %1399 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1396, <32 x i8> %1397, <16 x i8> %1398, i32 2) %1400 = extractelement <4 x float> %1399, i32 1 %1401 = extractelement <4 x float> %1399, i32 3 %1402 = fcmp oeq float %temp34.0, 2.000000e+00 %1403 = select i1 %1402, float 1.000000e+00, float 0.000000e+00 %1404 = bitcast float %1362 to i32 %1405 = bitcast float %1364 to i32 %1406 = bitcast float %231 to i32 %1407 = insertelement <4 x i32> undef, i32 %1404, i32 0 %1408 = insertelement <4 x i32> %1407, i32 %1405, i32 1 %1409 = insertelement <4 x i32> %1408, i32 %1406, i32 2 %1410 = bitcast <8 x i32> %64 to <32 x i8> %1411 = bitcast <4 x i32> %66 to <16 x i8> %1412 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1409, <32 x i8> %1410, <16 x i8> %1411, i32 2) %1413 = extractelement <4 x float> %1412, i32 1 %1414 = extractelement <4 x float> %1412, i32 3 %1415 = fcmp oeq float %temp34.0, 1.000000e+00 %1416 = select i1 %1415, float 1.000000e+00, float 0.000000e+00 %1417 = bitcast float %1362 to i32 %1418 = bitcast float %1364 to i32 %1419 = bitcast float %231 to i32 %1420 = insertelement <4 x i32> undef, i32 %1417, i32 0 %1421 = insertelement <4 x i32> %1420, i32 %1418, i32 1 %1422 = insertelement <4 x i32> %1421, i32 %1419, i32 2 %1423 = bitcast <8 x i32> %56 to <32 x i8> %1424 = bitcast <4 x i32> %58 to <16 x i8> %1425 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1422, <32 x i8> %1423, <16 x i8> %1424, i32 2) %1426 = extractelement <4 x float> %1425, i32 1 %1427 = extractelement <4 x float> %1425, i32 3 %1428 = fcmp oeq float %temp34.0, 0.000000e+00 %1429 = select i1 %1428, float 1.000000e+00, float 0.000000e+00 %1430 = fmul float %1426, %1429 %1431 = fmul float %1427, %1429 %1432 = fmul float %1413, %1416 %1433 = fadd float %1432, %1430 %1434 = fmul float %1414, %1416 %1435 = fadd float %1434, %1431 %1436 = fmul float %1400, %1403 %1437 = fadd float %1436, %1433 %1438 = fmul float %1401, %1403 %1439 = fadd float %1438, %1435 %1440 = fmul float %1387, %1390 %1441 = fadd float %1440, %1437 %1442 = fmul float %1388, %1390 %1443 = fadd float %1442, %1439 %1444 = fmul float %1374, %1377 %1445 = fadd float %1444, %1441 %1446 = fmul float %1375, %1377 %1447 = fadd float %1446, %1443 %1448 = fmul float %1447, 2.000000e+00 %1449 = fadd float %1448, -1.000000e+00 %1450 = fmul float %1445, 2.000000e+00 %1451 = fadd float %1450, -1.000000e+00 %1452 = fmul float %1449, %1449 %1453 = fmul float %1451, %1451 %1454 = fadd float %1452, %1453 %1455 = call float @llvm.AMDIL.clamp.(float %1454, float 0.000000e+00, float 1.000000e+00) %1456 = fcmp une float %38, %temp20.0 %.sink222 = select i1 %1456, float %41, float %40 %temp56.2 = select i1 %1456, float 1.953125e-03, float 3.906250e-03 %1457 = fdiv float 1.000000e+00, %.sink222 %1458 = fmul float %105, %1457 %1459 = fmul float %103, %1457 %1460 = call float @llvm.floor.f32(float %1458) %1461 = fsub float %1458, %1460 %1462 = call float @llvm.floor.f32(float %1459) %1463 = fsub float %1459, %1462 %1464 = fmul float %42, 2.000000e+00 %1465 = fmul float %1464, %temp56.2 %1466 = fsub float 1.000000e+00, %1465 %1467 = fmul float %temp56.2, %42 %1468 = fmul float %1461, %1466 %1469 = fadd float %1468, %1467 %1470 = fmul float %1463, %1466 %1471 = fadd float %1470, %1467 %1472 = fmul float %1469, %temp20.0 %1473 = fadd float %1472, %temp32.0 %1474 = fmul float %1471, %temp20.0 %1475 = fadd float %1474, %temp33.0 %1476 = bitcast float %1473 to i32 %1477 = bitcast float %1475 to i32 %1478 = bitcast float %231 to i32 %1479 = insertelement <4 x i32> undef, i32 %1476, i32 0 %1480 = insertelement <4 x i32> %1479, i32 %1477, i32 1 %1481 = insertelement <4 x i32> %1480, i32 %1478, i32 2 %1482 = bitcast <8 x i32> %88 to <32 x i8> %1483 = bitcast <4 x i32> %90 to <16 x i8> %1484 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1481, <32 x i8> %1482, <16 x i8> %1483, i32 2) %1485 = extractelement <4 x float> %1484, i32 1 %1486 = extractelement <4 x float> %1484, i32 3 %1487 = fcmp oeq float %temp34.0, 4.000000e+00 %1488 = select i1 %1487, float 1.000000e+00, float 0.000000e+00 %1489 = bitcast float %1473 to i32 %1490 = bitcast float %1475 to i32 %1491 = bitcast float %231 to i32 %1492 = insertelement <4 x i32> undef, i32 %1489, i32 0 %1493 = insertelement <4 x i32> %1492, i32 %1490, i32 1 %1494 = insertelement <4 x i32> %1493, i32 %1491, i32 2 %1495 = bitcast <8 x i32> %80 to <32 x i8> %1496 = bitcast <4 x i32> %82 to <16 x i8> %1497 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1494, <32 x i8> %1495, <16 x i8> %1496, i32 2) %1498 = extractelement <4 x float> %1497, i32 1 %1499 = extractelement <4 x float> %1497, i32 3 %1500 = fcmp oeq float %temp34.0, 3.000000e+00 %1501 = select i1 %1500, float 1.000000e+00, float 0.000000e+00 %1502 = bitcast float %1473 to i32 %1503 = bitcast float %1475 to i32 %1504 = bitcast float %231 to i32 %1505 = insertelement <4 x i32> undef, i32 %1502, i32 0 %1506 = insertelement <4 x i32> %1505, i32 %1503, i32 1 %1507 = insertelement <4 x i32> %1506, i32 %1504, i32 2 %1508 = bitcast <8 x i32> %72 to <32 x i8> %1509 = bitcast <4 x i32> %74 to <16 x i8> %1510 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1507, <32 x i8> %1508, <16 x i8> %1509, i32 2) %1511 = extractelement <4 x float> %1510, i32 1 %1512 = extractelement <4 x float> %1510, i32 3 %1513 = fcmp oeq float %temp34.0, 2.000000e+00 %1514 = select i1 %1513, float 1.000000e+00, float 0.000000e+00 %1515 = bitcast float %1473 to i32 %1516 = bitcast float %1475 to i32 %1517 = bitcast float %231 to i32 %1518 = insertelement <4 x i32> undef, i32 %1515, i32 0 %1519 = insertelement <4 x i32> %1518, i32 %1516, i32 1 %1520 = insertelement <4 x i32> %1519, i32 %1517, i32 2 %1521 = bitcast <8 x i32> %64 to <32 x i8> %1522 = bitcast <4 x i32> %66 to <16 x i8> %1523 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1520, <32 x i8> %1521, <16 x i8> %1522, i32 2) %1524 = extractelement <4 x float> %1523, i32 1 %1525 = extractelement <4 x float> %1523, i32 3 %1526 = fcmp oeq float %temp34.0, 1.000000e+00 %1527 = select i1 %1526, float 1.000000e+00, float 0.000000e+00 %1528 = bitcast float %1473 to i32 %1529 = bitcast float %1475 to i32 %1530 = bitcast float %231 to i32 %1531 = insertelement <4 x i32> undef, i32 %1528, i32 0 %1532 = insertelement <4 x i32> %1531, i32 %1529, i32 1 %1533 = insertelement <4 x i32> %1532, i32 %1530, i32 2 %1534 = bitcast <8 x i32> %56 to <32 x i8> %1535 = bitcast <4 x i32> %58 to <16 x i8> %1536 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1533, <32 x i8> %1534, <16 x i8> %1535, i32 2) %1537 = extractelement <4 x float> %1536, i32 1 %1538 = extractelement <4 x float> %1536, i32 3 %1539 = fcmp oeq float %temp34.0, 0.000000e+00 %1540 = select i1 %1539, float 1.000000e+00, float 0.000000e+00 %1541 = fmul float %1537, %1540 %1542 = fmul float %1538, %1540 %1543 = fmul float %1524, %1527 %1544 = fadd float %1543, %1541 %1545 = fmul float %1525, %1527 %1546 = fadd float %1545, %1542 %1547 = fmul float %1511, %1514 %1548 = fadd float %1547, %1544 %1549 = fmul float %1512, %1514 %1550 = fadd float %1549, %1546 %1551 = fmul float %1498, %1501 %1552 = fadd float %1551, %1548 %1553 = fmul float %1499, %1501 %1554 = fadd float %1553, %1550 %1555 = fmul float %1485, %1488 %1556 = fadd float %1555, %1552 %1557 = fmul float %1486, %1488 %1558 = fadd float %1557, %1554 %1559 = fmul float %1558, 2.000000e+00 %1560 = fadd float %1559, -1.000000e+00 %1561 = fmul float %1556, 2.000000e+00 %1562 = fadd float %1561, -1.000000e+00 %1563 = fmul float %1560, %1560 %1564 = fmul float %1562, %1562 %1565 = fadd float %1563, %1564 %1566 = call float @llvm.AMDIL.clamp.(float %1565, float 0.000000e+00, float 1.000000e+00) %1567 = fcmp une float %38, %temp20.0 %.sink223 = select i1 %1567, float %41, float %40 %temp60.2 = select i1 %1567, float 1.953125e-03, float 3.906250e-03 %1568 = fdiv float 1.000000e+00, %.sink223 %1569 = fmul float %103, %1568 %1570 = fmul float %104, %1568 %1571 = call float @llvm.floor.f32(float %1569) %1572 = fsub float %1569, %1571 %1573 = call float @llvm.floor.f32(float %1570) %1574 = fsub float %1570, %1573 %1575 = fmul float %42, 2.000000e+00 %1576 = fmul float %1575, %temp60.2 %1577 = fsub float 1.000000e+00, %1576 %1578 = fmul float %temp60.2, %42 %1579 = fmul float %1572, %1577 %1580 = fadd float %1579, %1578 %1581 = fmul float %1574, %1577 %1582 = fadd float %1581, %1578 %1583 = fmul float %1580, %temp20.0 %1584 = fadd float %1583, %temp32.0 %1585 = fmul float %1582, %temp20.0 %1586 = fadd float %1585, %temp33.0 %1587 = bitcast float %1584 to i32 %1588 = bitcast float %1586 to i32 %1589 = bitcast float %231 to i32 %1590 = insertelement <4 x i32> undef, i32 %1587, i32 0 %1591 = insertelement <4 x i32> %1590, i32 %1588, i32 1 %1592 = insertelement <4 x i32> %1591, i32 %1589, i32 2 %1593 = bitcast <8 x i32> %88 to <32 x i8> %1594 = bitcast <4 x i32> %90 to <16 x i8> %1595 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1592, <32 x i8> %1593, <16 x i8> %1594, i32 2) %1596 = extractelement <4 x float> %1595, i32 1 %1597 = extractelement <4 x float> %1595, i32 3 %1598 = fcmp oeq float %temp34.0, 4.000000e+00 %1599 = select i1 %1598, float 1.000000e+00, float 0.000000e+00 %1600 = bitcast float %1584 to i32 %1601 = bitcast float %1586 to i32 %1602 = bitcast float %231 to i32 %1603 = insertelement <4 x i32> undef, i32 %1600, i32 0 %1604 = insertelement <4 x i32> %1603, i32 %1601, i32 1 %1605 = insertelement <4 x i32> %1604, i32 %1602, i32 2 %1606 = bitcast <8 x i32> %80 to <32 x i8> %1607 = bitcast <4 x i32> %82 to <16 x i8> %1608 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1605, <32 x i8> %1606, <16 x i8> %1607, i32 2) %1609 = extractelement <4 x float> %1608, i32 1 %1610 = extractelement <4 x float> %1608, i32 3 %1611 = fcmp oeq float %temp34.0, 3.000000e+00 %1612 = select i1 %1611, float 1.000000e+00, float 0.000000e+00 %1613 = bitcast float %1584 to i32 %1614 = bitcast float %1586 to i32 %1615 = bitcast float %231 to i32 %1616 = insertelement <4 x i32> undef, i32 %1613, i32 0 %1617 = insertelement <4 x i32> %1616, i32 %1614, i32 1 %1618 = insertelement <4 x i32> %1617, i32 %1615, i32 2 %1619 = bitcast <8 x i32> %72 to <32 x i8> %1620 = bitcast <4 x i32> %74 to <16 x i8> %1621 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1618, <32 x i8> %1619, <16 x i8> %1620, i32 2) %1622 = extractelement <4 x float> %1621, i32 1 %1623 = extractelement <4 x float> %1621, i32 3 %1624 = fcmp oeq float %temp34.0, 2.000000e+00 %1625 = select i1 %1624, float 1.000000e+00, float 0.000000e+00 %1626 = bitcast float %1584 to i32 %1627 = bitcast float %1586 to i32 %1628 = bitcast float %231 to i32 %1629 = insertelement <4 x i32> undef, i32 %1626, i32 0 %1630 = insertelement <4 x i32> %1629, i32 %1627, i32 1 %1631 = insertelement <4 x i32> %1630, i32 %1628, i32 2 %1632 = bitcast <8 x i32> %64 to <32 x i8> %1633 = bitcast <4 x i32> %66 to <16 x i8> %1634 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1631, <32 x i8> %1632, <16 x i8> %1633, i32 2) %1635 = extractelement <4 x float> %1634, i32 1 %1636 = extractelement <4 x float> %1634, i32 3 %1637 = fcmp oeq float %temp34.0, 1.000000e+00 %1638 = select i1 %1637, float 1.000000e+00, float 0.000000e+00 %1639 = bitcast float %1584 to i32 %1640 = bitcast float %1586 to i32 %1641 = bitcast float %231 to i32 %1642 = insertelement <4 x i32> undef, i32 %1639, i32 0 %1643 = insertelement <4 x i32> %1642, i32 %1640, i32 1 %1644 = insertelement <4 x i32> %1643, i32 %1641, i32 2 %1645 = bitcast <8 x i32> %56 to <32 x i8> %1646 = bitcast <4 x i32> %58 to <16 x i8> %1647 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1644, <32 x i8> %1645, <16 x i8> %1646, i32 2) %1648 = extractelement <4 x float> %1647, i32 1 %1649 = extractelement <4 x float> %1647, i32 3 %1650 = fcmp oeq float %temp34.0, 0.000000e+00 %1651 = select i1 %1650, float 1.000000e+00, float 0.000000e+00 %1652 = fmul float %1648, %1651 %1653 = fmul float %1649, %1651 %1654 = fmul float %1635, %1638 %1655 = fadd float %1654, %1652 %1656 = fmul float %1636, %1638 %1657 = fadd float %1656, %1653 %1658 = fmul float %1622, %1625 %1659 = fadd float %1658, %1655 %1660 = fmul float %1623, %1625 %1661 = fadd float %1660, %1657 %1662 = fmul float %1609, %1612 %1663 = fadd float %1662, %1659 %1664 = fmul float %1610, %1612 %1665 = fadd float %1664, %1661 %1666 = fmul float %1596, %1599 %1667 = fadd float %1666, %1663 %1668 = fmul float %1597, %1599 %1669 = fadd float %1668, %1665 %1670 = fmul float %1669, 2.000000e+00 %1671 = fadd float %1670, -1.000000e+00 %1672 = fmul float %1667, 2.000000e+00 %1673 = fadd float %1672, -1.000000e+00 %1674 = fmul float %1671, %1671 %1675 = fmul float %1673, %1673 %1676 = fadd float %1674, %1675 %1677 = call float @llvm.AMDIL.clamp.(float %1676, float 0.000000e+00, float 1.000000e+00) %1678 = fmul float %154, 0.000000e+00 %1679 = fmul float %1449, %154 %1680 = fmul float %1451, %154 %1681 = fmul float %1562, %155 %1682 = fadd float %1681, %1678 %1683 = fmul float %155, 0.000000e+00 %1684 = fadd float %1683, %1679 %1685 = fmul float %1560, %155 %1686 = fadd float %1685, %1680 %1687 = fmul float %1671, %156 %1688 = fadd float %1687, %1682 %1689 = fmul float %1673, %156 %1690 = fadd float %1689, %1684 %1691 = fmul float %156, 0.000000e+00 %1692 = fadd float %1691, %1686 %1693 = fcmp une float %38, %temp24.0 %.sink224 = select i1 %1693, float %41, float %40 %temp48.3 = select i1 %1693, float 1.953125e-03, float 3.906250e-03 %1694 = fdiv float 1.000000e+00, %.sink224 %1695 = fmul float %105, %1694 %1696 = fmul float %104, %1694 %1697 = call float @llvm.floor.f32(float %1695) %1698 = fsub float %1695, %1697 %1699 = call float @llvm.floor.f32(float %1696) %1700 = fsub float %1696, %1699 %1701 = fmul float %42, 2.000000e+00 %1702 = fmul float %1701, %temp48.3 %1703 = fsub float 1.000000e+00, %1702 %1704 = fmul float %temp48.3, %42 %1705 = fmul float %1698, %1703 %1706 = fadd float %1705, %1704 %1707 = fmul float %1700, %1703 %1708 = fadd float %1707, %1704 %1709 = fmul float %1706, %temp24.0 %1710 = fadd float %1709, %temp40.0 %1711 = fmul float %1708, %temp24.0 %1712 = fadd float %1711, %temp41.0 %1713 = bitcast float %1710 to i32 %1714 = bitcast float %1712 to i32 %1715 = bitcast float %231 to i32 %1716 = insertelement <4 x i32> undef, i32 %1713, i32 0 %1717 = insertelement <4 x i32> %1716, i32 %1714, i32 1 %1718 = insertelement <4 x i32> %1717, i32 %1715, i32 2 %1719 = bitcast <8 x i32> %88 to <32 x i8> %1720 = bitcast <4 x i32> %90 to <16 x i8> %1721 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1718, <32 x i8> %1719, <16 x i8> %1720, i32 2) %1722 = extractelement <4 x float> %1721, i32 1 %1723 = extractelement <4 x float> %1721, i32 3 %1724 = fcmp oeq float %temp42.0, 4.000000e+00 %1725 = select i1 %1724, float 1.000000e+00, float 0.000000e+00 %1726 = bitcast float %1710 to i32 %1727 = bitcast float %1712 to i32 %1728 = bitcast float %231 to i32 %1729 = insertelement <4 x i32> undef, i32 %1726, i32 0 %1730 = insertelement <4 x i32> %1729, i32 %1727, i32 1 %1731 = insertelement <4 x i32> %1730, i32 %1728, i32 2 %1732 = bitcast <8 x i32> %80 to <32 x i8> %1733 = bitcast <4 x i32> %82 to <16 x i8> %1734 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1731, <32 x i8> %1732, <16 x i8> %1733, i32 2) %1735 = extractelement <4 x float> %1734, i32 1 %1736 = extractelement <4 x float> %1734, i32 3 %1737 = fcmp oeq float %temp42.0, 3.000000e+00 %1738 = select i1 %1737, float 1.000000e+00, float 0.000000e+00 %1739 = bitcast float %1710 to i32 %1740 = bitcast float %1712 to i32 %1741 = bitcast float %231 to i32 %1742 = insertelement <4 x i32> undef, i32 %1739, i32 0 %1743 = insertelement <4 x i32> %1742, i32 %1740, i32 1 %1744 = insertelement <4 x i32> %1743, i32 %1741, i32 2 %1745 = bitcast <8 x i32> %72 to <32 x i8> %1746 = bitcast <4 x i32> %74 to <16 x i8> %1747 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1744, <32 x i8> %1745, <16 x i8> %1746, i32 2) %1748 = extractelement <4 x float> %1747, i32 1 %1749 = extractelement <4 x float> %1747, i32 3 %1750 = fcmp oeq float %temp42.0, 2.000000e+00 %1751 = select i1 %1750, float 1.000000e+00, float 0.000000e+00 %1752 = bitcast float %1710 to i32 %1753 = bitcast float %1712 to i32 %1754 = bitcast float %231 to i32 %1755 = insertelement <4 x i32> undef, i32 %1752, i32 0 %1756 = insertelement <4 x i32> %1755, i32 %1753, i32 1 %1757 = insertelement <4 x i32> %1756, i32 %1754, i32 2 %1758 = bitcast <8 x i32> %64 to <32 x i8> %1759 = bitcast <4 x i32> %66 to <16 x i8> %1760 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1757, <32 x i8> %1758, <16 x i8> %1759, i32 2) %1761 = extractelement <4 x float> %1760, i32 1 %1762 = extractelement <4 x float> %1760, i32 3 %1763 = fcmp oeq float %temp42.0, 1.000000e+00 %1764 = select i1 %1763, float 1.000000e+00, float 0.000000e+00 %1765 = bitcast float %1710 to i32 %1766 = bitcast float %1712 to i32 %1767 = bitcast float %231 to i32 %1768 = insertelement <4 x i32> undef, i32 %1765, i32 0 %1769 = insertelement <4 x i32> %1768, i32 %1766, i32 1 %1770 = insertelement <4 x i32> %1769, i32 %1767, i32 2 %1771 = bitcast <8 x i32> %56 to <32 x i8> %1772 = bitcast <4 x i32> %58 to <16 x i8> %1773 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1770, <32 x i8> %1771, <16 x i8> %1772, i32 2) %1774 = extractelement <4 x float> %1773, i32 1 %1775 = extractelement <4 x float> %1773, i32 3 %1776 = fcmp oeq float %temp42.0, 0.000000e+00 %1777 = select i1 %1776, float 1.000000e+00, float 0.000000e+00 %1778 = fmul float %1774, %1777 %1779 = fmul float %1775, %1777 %1780 = fmul float %1761, %1764 %1781 = fadd float %1780, %1778 %1782 = fmul float %1762, %1764 %1783 = fadd float %1782, %1779 %1784 = fmul float %1748, %1751 %1785 = fadd float %1784, %1781 %1786 = fmul float %1749, %1751 %1787 = fadd float %1786, %1783 %1788 = fmul float %1735, %1738 %1789 = fadd float %1788, %1785 %1790 = fmul float %1736, %1738 %1791 = fadd float %1790, %1787 %1792 = fmul float %1722, %1725 %1793 = fadd float %1792, %1789 %1794 = fmul float %1723, %1725 %1795 = fadd float %1794, %1791 %1796 = fmul float %1795, 2.000000e+00 %1797 = fadd float %1796, -1.000000e+00 %1798 = fmul float %1793, 2.000000e+00 %1799 = fadd float %1798, -1.000000e+00 %1800 = fmul float %1797, %1797 %1801 = fmul float %1799, %1799 %1802 = fadd float %1800, %1801 %1803 = call float @llvm.AMDIL.clamp.(float %1802, float 0.000000e+00, float 1.000000e+00) %1804 = fcmp une float %38, %temp24.0 %.sink225 = select i1 %1804, float %41, float %40 %temp52.4 = select i1 %1804, float 1.953125e-03, float 3.906250e-03 %1805 = fdiv float 1.000000e+00, %.sink225 %1806 = fmul float %105, %1805 %1807 = fmul float %103, %1805 %1808 = call float @llvm.floor.f32(float %1806) %1809 = fsub float %1806, %1808 %1810 = call float @llvm.floor.f32(float %1807) %1811 = fsub float %1807, %1810 %1812 = fmul float %42, 2.000000e+00 %1813 = fmul float %1812, %temp52.4 %1814 = fsub float 1.000000e+00, %1813 %1815 = fmul float %temp52.4, %42 %1816 = fmul float %1809, %1814 %1817 = fadd float %1816, %1815 %1818 = fmul float %1811, %1814 %1819 = fadd float %1818, %1815 %1820 = fmul float %1817, %temp24.0 %1821 = fadd float %1820, %temp40.0 %1822 = fmul float %1819, %temp24.0 %1823 = fadd float %1822, %temp41.0 %1824 = bitcast float %1821 to i32 %1825 = bitcast float %1823 to i32 %1826 = bitcast float %231 to i32 %1827 = insertelement <4 x i32> undef, i32 %1824, i32 0 %1828 = insertelement <4 x i32> %1827, i32 %1825, i32 1 %1829 = insertelement <4 x i32> %1828, i32 %1826, i32 2 %1830 = bitcast <8 x i32> %88 to <32 x i8> %1831 = bitcast <4 x i32> %90 to <16 x i8> %1832 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1829, <32 x i8> %1830, <16 x i8> %1831, i32 2) %1833 = extractelement <4 x float> %1832, i32 1 %1834 = extractelement <4 x float> %1832, i32 3 %1835 = fcmp oeq float %temp42.0, 4.000000e+00 %1836 = select i1 %1835, float 1.000000e+00, float 0.000000e+00 %1837 = bitcast float %1821 to i32 %1838 = bitcast float %1823 to i32 %1839 = bitcast float %231 to i32 %1840 = insertelement <4 x i32> undef, i32 %1837, i32 0 %1841 = insertelement <4 x i32> %1840, i32 %1838, i32 1 %1842 = insertelement <4 x i32> %1841, i32 %1839, i32 2 %1843 = bitcast <8 x i32> %80 to <32 x i8> %1844 = bitcast <4 x i32> %82 to <16 x i8> %1845 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1842, <32 x i8> %1843, <16 x i8> %1844, i32 2) %1846 = extractelement <4 x float> %1845, i32 1 %1847 = extractelement <4 x float> %1845, i32 3 %1848 = fcmp oeq float %temp42.0, 3.000000e+00 %1849 = select i1 %1848, float 1.000000e+00, float 0.000000e+00 %1850 = bitcast float %1821 to i32 %1851 = bitcast float %1823 to i32 %1852 = bitcast float %231 to i32 %1853 = insertelement <4 x i32> undef, i32 %1850, i32 0 %1854 = insertelement <4 x i32> %1853, i32 %1851, i32 1 %1855 = insertelement <4 x i32> %1854, i32 %1852, i32 2 %1856 = bitcast <8 x i32> %72 to <32 x i8> %1857 = bitcast <4 x i32> %74 to <16 x i8> %1858 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1855, <32 x i8> %1856, <16 x i8> %1857, i32 2) %1859 = extractelement <4 x float> %1858, i32 1 %1860 = extractelement <4 x float> %1858, i32 3 %1861 = fcmp oeq float %temp42.0, 2.000000e+00 %1862 = select i1 %1861, float 1.000000e+00, float 0.000000e+00 %1863 = bitcast float %1821 to i32 %1864 = bitcast float %1823 to i32 %1865 = bitcast float %231 to i32 %1866 = insertelement <4 x i32> undef, i32 %1863, i32 0 %1867 = insertelement <4 x i32> %1866, i32 %1864, i32 1 %1868 = insertelement <4 x i32> %1867, i32 %1865, i32 2 %1869 = bitcast <8 x i32> %64 to <32 x i8> %1870 = bitcast <4 x i32> %66 to <16 x i8> %1871 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1868, <32 x i8> %1869, <16 x i8> %1870, i32 2) %1872 = extractelement <4 x float> %1871, i32 1 %1873 = extractelement <4 x float> %1871, i32 3 %1874 = fcmp oeq float %temp42.0, 1.000000e+00 %1875 = select i1 %1874, float 1.000000e+00, float 0.000000e+00 %1876 = bitcast float %1821 to i32 %1877 = bitcast float %1823 to i32 %1878 = bitcast float %231 to i32 %1879 = insertelement <4 x i32> undef, i32 %1876, i32 0 %1880 = insertelement <4 x i32> %1879, i32 %1877, i32 1 %1881 = insertelement <4 x i32> %1880, i32 %1878, i32 2 %1882 = bitcast <8 x i32> %56 to <32 x i8> %1883 = bitcast <4 x i32> %58 to <16 x i8> %1884 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1881, <32 x i8> %1882, <16 x i8> %1883, i32 2) %1885 = extractelement <4 x float> %1884, i32 1 %1886 = extractelement <4 x float> %1884, i32 3 %1887 = fcmp oeq float %temp42.0, 0.000000e+00 %1888 = select i1 %1887, float 1.000000e+00, float 0.000000e+00 %1889 = fmul float %1885, %1888 %1890 = fmul float %1886, %1888 %1891 = fmul float %1872, %1875 %1892 = fadd float %1891, %1889 %1893 = fmul float %1873, %1875 %1894 = fadd float %1893, %1890 %1895 = fmul float %1859, %1862 %1896 = fadd float %1895, %1892 %1897 = fmul float %1860, %1862 %1898 = fadd float %1897, %1894 %1899 = fmul float %1846, %1849 %1900 = fadd float %1899, %1896 %1901 = fmul float %1847, %1849 %1902 = fadd float %1901, %1898 %1903 = fmul float %1833, %1836 %1904 = fadd float %1903, %1900 %1905 = fmul float %1834, %1836 %1906 = fadd float %1905, %1902 %1907 = fmul float %1906, 2.000000e+00 %1908 = fadd float %1907, -1.000000e+00 %1909 = fmul float %1904, 2.000000e+00 %1910 = fadd float %1909, -1.000000e+00 %1911 = fmul float %1908, %1908 %1912 = fmul float %1910, %1910 %1913 = fadd float %1911, %1912 %1914 = call float @llvm.AMDIL.clamp.(float %1913, float 0.000000e+00, float 1.000000e+00) %1915 = fcmp une float %38, %temp24.0 %.sink226 = select i1 %1915, float %41, float %40 %temp56.4 = select i1 %1915, float 1.953125e-03, float 3.906250e-03 %1916 = fdiv float 1.000000e+00, %.sink226 %1917 = fmul float %103, %1916 %1918 = fmul float %104, %1916 %1919 = call float @llvm.floor.f32(float %1917) %1920 = fsub float %1917, %1919 %1921 = call float @llvm.floor.f32(float %1918) %1922 = fsub float %1918, %1921 %1923 = fmul float %42, 2.000000e+00 %1924 = fmul float %1923, %temp56.4 %1925 = fsub float 1.000000e+00, %1924 %1926 = fmul float %temp56.4, %42 %1927 = fmul float %1920, %1925 %1928 = fadd float %1927, %1926 %1929 = fmul float %1922, %1925 %1930 = fadd float %1929, %1926 %1931 = fmul float %1928, %temp24.0 %1932 = fadd float %1931, %temp40.0 %1933 = fmul float %1930, %temp24.0 %1934 = fadd float %1933, %temp41.0 %1935 = bitcast float %1932 to i32 %1936 = bitcast float %1934 to i32 %1937 = bitcast float %231 to i32 %1938 = insertelement <4 x i32> undef, i32 %1935, i32 0 %1939 = insertelement <4 x i32> %1938, i32 %1936, i32 1 %1940 = insertelement <4 x i32> %1939, i32 %1937, i32 2 %1941 = bitcast <8 x i32> %88 to <32 x i8> %1942 = bitcast <4 x i32> %90 to <16 x i8> %1943 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1940, <32 x i8> %1941, <16 x i8> %1942, i32 2) %1944 = extractelement <4 x float> %1943, i32 1 %1945 = extractelement <4 x float> %1943, i32 3 %1946 = fcmp oeq float %temp42.0, 4.000000e+00 %1947 = select i1 %1946, float 1.000000e+00, float 0.000000e+00 %1948 = bitcast float %1932 to i32 %1949 = bitcast float %1934 to i32 %1950 = bitcast float %231 to i32 %1951 = insertelement <4 x i32> undef, i32 %1948, i32 0 %1952 = insertelement <4 x i32> %1951, i32 %1949, i32 1 %1953 = insertelement <4 x i32> %1952, i32 %1950, i32 2 %1954 = bitcast <8 x i32> %80 to <32 x i8> %1955 = bitcast <4 x i32> %82 to <16 x i8> %1956 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1953, <32 x i8> %1954, <16 x i8> %1955, i32 2) %1957 = extractelement <4 x float> %1956, i32 1 %1958 = extractelement <4 x float> %1956, i32 3 %1959 = fcmp oeq float %temp42.0, 3.000000e+00 %1960 = select i1 %1959, float 1.000000e+00, float 0.000000e+00 %1961 = bitcast float %1932 to i32 %1962 = bitcast float %1934 to i32 %1963 = bitcast float %231 to i32 %1964 = insertelement <4 x i32> undef, i32 %1961, i32 0 %1965 = insertelement <4 x i32> %1964, i32 %1962, i32 1 %1966 = insertelement <4 x i32> %1965, i32 %1963, i32 2 %1967 = bitcast <8 x i32> %72 to <32 x i8> %1968 = bitcast <4 x i32> %74 to <16 x i8> %1969 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1966, <32 x i8> %1967, <16 x i8> %1968, i32 2) %1970 = extractelement <4 x float> %1969, i32 1 %1971 = extractelement <4 x float> %1969, i32 3 %1972 = fcmp oeq float %temp42.0, 2.000000e+00 %1973 = select i1 %1972, float 1.000000e+00, float 0.000000e+00 %1974 = bitcast float %1932 to i32 %1975 = bitcast float %1934 to i32 %1976 = bitcast float %231 to i32 %1977 = insertelement <4 x i32> undef, i32 %1974, i32 0 %1978 = insertelement <4 x i32> %1977, i32 %1975, i32 1 %1979 = insertelement <4 x i32> %1978, i32 %1976, i32 2 %1980 = bitcast <8 x i32> %64 to <32 x i8> %1981 = bitcast <4 x i32> %66 to <16 x i8> %1982 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1979, <32 x i8> %1980, <16 x i8> %1981, i32 2) %1983 = extractelement <4 x float> %1982, i32 1 %1984 = extractelement <4 x float> %1982, i32 3 %1985 = fcmp oeq float %temp42.0, 1.000000e+00 %1986 = select i1 %1985, float 1.000000e+00, float 0.000000e+00 %1987 = bitcast float %1932 to i32 %1988 = bitcast float %1934 to i32 %1989 = bitcast float %231 to i32 %1990 = insertelement <4 x i32> undef, i32 %1987, i32 0 %1991 = insertelement <4 x i32> %1990, i32 %1988, i32 1 %1992 = insertelement <4 x i32> %1991, i32 %1989, i32 2 %1993 = bitcast <8 x i32> %56 to <32 x i8> %1994 = bitcast <4 x i32> %58 to <16 x i8> %1995 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1992, <32 x i8> %1993, <16 x i8> %1994, i32 2) %1996 = extractelement <4 x float> %1995, i32 1 %1997 = extractelement <4 x float> %1995, i32 3 %1998 = fcmp oeq float %temp42.0, 0.000000e+00 %1999 = select i1 %1998, float 1.000000e+00, float 0.000000e+00 %2000 = fmul float %1996, %1999 %2001 = fmul float %1997, %1999 %2002 = fmul float %1983, %1986 %2003 = fadd float %2002, %2000 %2004 = fmul float %1984, %1986 %2005 = fadd float %2004, %2001 %2006 = fmul float %1970, %1973 %2007 = fadd float %2006, %2003 %2008 = fmul float %1971, %1973 %2009 = fadd float %2008, %2005 %2010 = fmul float %1957, %1960 %2011 = fadd float %2010, %2007 %2012 = fmul float %1958, %1960 %2013 = fadd float %2012, %2009 %2014 = fmul float %1944, %1947 %2015 = fadd float %2014, %2011 %2016 = fmul float %1945, %1947 %2017 = fadd float %2016, %2013 %2018 = fmul float %2017, 2.000000e+00 %2019 = fadd float %2018, -1.000000e+00 %2020 = fmul float %2015, 2.000000e+00 %2021 = fadd float %2020, -1.000000e+00 %2022 = fmul float %2019, %2019 %2023 = fmul float %2021, %2021 %2024 = fadd float %2022, %2023 %2025 = call float @llvm.AMDIL.clamp.(float %2024, float 0.000000e+00, float 1.000000e+00) %2026 = fmul float %154, 0.000000e+00 %2027 = fmul float %1797, %154 %2028 = fmul float %1799, %154 %2029 = fmul float %1910, %155 %2030 = fadd float %2029, %2026 %2031 = fmul float %155, 0.000000e+00 %2032 = fadd float %2031, %2027 %2033 = fmul float %1908, %155 %2034 = fadd float %2033, %2028 %2035 = fmul float %2019, %156 %2036 = fadd float %2035, %2030 %2037 = fmul float %2021, %156 %2038 = fadd float %2037, %2032 %2039 = fmul float %156, 0.000000e+00 %2040 = fadd float %2039, %2034 %2041 = fcmp une float %38, %temp28.0 %.sink227 = select i1 %2041, float %41, float %40 %temp40.1 = select i1 %2041, float 1.953125e-03, float 3.906250e-03 %2042 = fdiv float 1.000000e+00, %.sink227 %2043 = fmul float %105, %2042 %2044 = fmul float %104, %2042 %2045 = call float @llvm.floor.f32(float %2043) %2046 = fsub float %2043, %2045 %2047 = call float @llvm.floor.f32(float %2044) %2048 = fsub float %2044, %2047 %2049 = fmul float %42, 2.000000e+00 %2050 = fmul float %2049, %temp40.1 %2051 = fsub float 1.000000e+00, %2050 %2052 = fmul float %temp40.1, %42 %2053 = fmul float %2046, %2051 %2054 = fadd float %2053, %2052 %2055 = fmul float %2048, %2051 %2056 = fadd float %2055, %2052 %2057 = fmul float %2054, %temp28.0 %2058 = fadd float %2057, %temp16.0 %2059 = fmul float %2056, %temp28.0 %2060 = fadd float %2059, %temp17.0 %2061 = bitcast float %2058 to i32 %2062 = bitcast float %2060 to i32 %2063 = bitcast float %231 to i32 %2064 = insertelement <4 x i32> undef, i32 %2061, i32 0 %2065 = insertelement <4 x i32> %2064, i32 %2062, i32 1 %2066 = insertelement <4 x i32> %2065, i32 %2063, i32 2 %2067 = bitcast <8 x i32> %88 to <32 x i8> %2068 = bitcast <4 x i32> %90 to <16 x i8> %2069 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2066, <32 x i8> %2067, <16 x i8> %2068, i32 2) %2070 = extractelement <4 x float> %2069, i32 1 %2071 = extractelement <4 x float> %2069, i32 3 %2072 = fcmp oeq float %temp18.0, 4.000000e+00 %2073 = select i1 %2072, float 1.000000e+00, float 0.000000e+00 %2074 = bitcast float %2058 to i32 %2075 = bitcast float %2060 to i32 %2076 = bitcast float %231 to i32 %2077 = insertelement <4 x i32> undef, i32 %2074, i32 0 %2078 = insertelement <4 x i32> %2077, i32 %2075, i32 1 %2079 = insertelement <4 x i32> %2078, i32 %2076, i32 2 %2080 = bitcast <8 x i32> %80 to <32 x i8> %2081 = bitcast <4 x i32> %82 to <16 x i8> %2082 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2079, <32 x i8> %2080, <16 x i8> %2081, i32 2) %2083 = extractelement <4 x float> %2082, i32 1 %2084 = extractelement <4 x float> %2082, i32 3 %2085 = fcmp oeq float %temp18.0, 3.000000e+00 %2086 = select i1 %2085, float 1.000000e+00, float 0.000000e+00 %2087 = bitcast float %2058 to i32 %2088 = bitcast float %2060 to i32 %2089 = bitcast float %231 to i32 %2090 = insertelement <4 x i32> undef, i32 %2087, i32 0 %2091 = insertelement <4 x i32> %2090, i32 %2088, i32 1 %2092 = insertelement <4 x i32> %2091, i32 %2089, i32 2 %2093 = bitcast <8 x i32> %72 to <32 x i8> %2094 = bitcast <4 x i32> %74 to <16 x i8> %2095 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2092, <32 x i8> %2093, <16 x i8> %2094, i32 2) %2096 = extractelement <4 x float> %2095, i32 1 %2097 = extractelement <4 x float> %2095, i32 3 %2098 = fcmp oeq float %temp18.0, 2.000000e+00 %2099 = select i1 %2098, float 1.000000e+00, float 0.000000e+00 %2100 = bitcast float %2058 to i32 %2101 = bitcast float %2060 to i32 %2102 = bitcast float %231 to i32 %2103 = insertelement <4 x i32> undef, i32 %2100, i32 0 %2104 = insertelement <4 x i32> %2103, i32 %2101, i32 1 %2105 = insertelement <4 x i32> %2104, i32 %2102, i32 2 %2106 = bitcast <8 x i32> %64 to <32 x i8> %2107 = bitcast <4 x i32> %66 to <16 x i8> %2108 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2105, <32 x i8> %2106, <16 x i8> %2107, i32 2) %2109 = extractelement <4 x float> %2108, i32 1 %2110 = extractelement <4 x float> %2108, i32 3 %2111 = fcmp oeq float %temp18.0, 1.000000e+00 %2112 = select i1 %2111, float 1.000000e+00, float 0.000000e+00 %2113 = bitcast float %2058 to i32 %2114 = bitcast float %2060 to i32 %2115 = bitcast float %231 to i32 %2116 = insertelement <4 x i32> undef, i32 %2113, i32 0 %2117 = insertelement <4 x i32> %2116, i32 %2114, i32 1 %2118 = insertelement <4 x i32> %2117, i32 %2115, i32 2 %2119 = bitcast <8 x i32> %56 to <32 x i8> %2120 = bitcast <4 x i32> %58 to <16 x i8> %2121 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2118, <32 x i8> %2119, <16 x i8> %2120, i32 2) %2122 = extractelement <4 x float> %2121, i32 1 %2123 = extractelement <4 x float> %2121, i32 3 %2124 = fcmp oeq float %temp18.0, 0.000000e+00 %2125 = select i1 %2124, float 1.000000e+00, float 0.000000e+00 %2126 = fmul float %2122, %2125 %2127 = fmul float %2123, %2125 %2128 = fmul float %2109, %2112 %2129 = fadd float %2128, %2126 %2130 = fmul float %2110, %2112 %2131 = fadd float %2130, %2127 %2132 = fmul float %2096, %2099 %2133 = fadd float %2132, %2129 %2134 = fmul float %2097, %2099 %2135 = fadd float %2134, %2131 %2136 = fmul float %2083, %2086 %2137 = fadd float %2136, %2133 %2138 = fmul float %2084, %2086 %2139 = fadd float %2138, %2135 %2140 = fmul float %2070, %2073 %2141 = fadd float %2140, %2137 %2142 = fmul float %2071, %2073 %2143 = fadd float %2142, %2139 %2144 = fmul float %2143, 2.000000e+00 %2145 = fadd float %2144, -1.000000e+00 %2146 = fmul float %2141, 2.000000e+00 %2147 = fadd float %2146, -1.000000e+00 %2148 = fmul float %2145, %2145 %2149 = fmul float %2147, %2147 %2150 = fadd float %2148, %2149 %2151 = call float @llvm.AMDIL.clamp.(float %2150, float 0.000000e+00, float 1.000000e+00) %2152 = fcmp une float %38, %temp28.0 %.sink228 = select i1 %2152, float %41, float %40 %temp48.5 = select i1 %2152, float 1.953125e-03, float 3.906250e-03 %2153 = fdiv float 1.000000e+00, %.sink228 %2154 = fmul float %105, %2153 %2155 = fmul float %103, %2153 %2156 = call float @llvm.floor.f32(float %2154) %2157 = fsub float %2154, %2156 %2158 = call float @llvm.floor.f32(float %2155) %2159 = fsub float %2155, %2158 %2160 = fmul float %42, 2.000000e+00 %2161 = fmul float %2160, %temp48.5 %2162 = fsub float 1.000000e+00, %2161 %2163 = fmul float %temp48.5, %42 %2164 = fmul float %2157, %2162 %2165 = fadd float %2164, %2163 %2166 = fmul float %2159, %2162 %2167 = fadd float %2166, %2163 %2168 = fmul float %2165, %temp28.0 %2169 = fadd float %2168, %temp16.0 %2170 = fmul float %2167, %temp28.0 %2171 = fadd float %2170, %temp17.0 %2172 = bitcast float %2169 to i32 %2173 = bitcast float %2171 to i32 %2174 = bitcast float %231 to i32 %2175 = insertelement <4 x i32> undef, i32 %2172, i32 0 %2176 = insertelement <4 x i32> %2175, i32 %2173, i32 1 %2177 = insertelement <4 x i32> %2176, i32 %2174, i32 2 %2178 = bitcast <8 x i32> %88 to <32 x i8> %2179 = bitcast <4 x i32> %90 to <16 x i8> %2180 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2177, <32 x i8> %2178, <16 x i8> %2179, i32 2) %2181 = extractelement <4 x float> %2180, i32 1 %2182 = extractelement <4 x float> %2180, i32 3 %2183 = fcmp oeq float %temp18.0, 4.000000e+00 %2184 = select i1 %2183, float 1.000000e+00, float 0.000000e+00 %2185 = bitcast float %2169 to i32 %2186 = bitcast float %2171 to i32 %2187 = bitcast float %231 to i32 %2188 = insertelement <4 x i32> undef, i32 %2185, i32 0 %2189 = insertelement <4 x i32> %2188, i32 %2186, i32 1 %2190 = insertelement <4 x i32> %2189, i32 %2187, i32 2 %2191 = bitcast <8 x i32> %80 to <32 x i8> %2192 = bitcast <4 x i32> %82 to <16 x i8> %2193 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2190, <32 x i8> %2191, <16 x i8> %2192, i32 2) %2194 = extractelement <4 x float> %2193, i32 1 %2195 = extractelement <4 x float> %2193, i32 3 %2196 = fcmp oeq float %temp18.0, 3.000000e+00 %2197 = select i1 %2196, float 1.000000e+00, float 0.000000e+00 %2198 = bitcast float %2169 to i32 %2199 = bitcast float %2171 to i32 %2200 = bitcast float %231 to i32 %2201 = insertelement <4 x i32> undef, i32 %2198, i32 0 %2202 = insertelement <4 x i32> %2201, i32 %2199, i32 1 %2203 = insertelement <4 x i32> %2202, i32 %2200, i32 2 %2204 = bitcast <8 x i32> %72 to <32 x i8> %2205 = bitcast <4 x i32> %74 to <16 x i8> %2206 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2203, <32 x i8> %2204, <16 x i8> %2205, i32 2) %2207 = extractelement <4 x float> %2206, i32 1 %2208 = extractelement <4 x float> %2206, i32 3 %2209 = fcmp oeq float %temp18.0, 2.000000e+00 %2210 = select i1 %2209, float 1.000000e+00, float 0.000000e+00 %2211 = bitcast float %2169 to i32 %2212 = bitcast float %2171 to i32 %2213 = bitcast float %231 to i32 %2214 = insertelement <4 x i32> undef, i32 %2211, i32 0 %2215 = insertelement <4 x i32> %2214, i32 %2212, i32 1 %2216 = insertelement <4 x i32> %2215, i32 %2213, i32 2 %2217 = bitcast <8 x i32> %64 to <32 x i8> %2218 = bitcast <4 x i32> %66 to <16 x i8> %2219 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2216, <32 x i8> %2217, <16 x i8> %2218, i32 2) %2220 = extractelement <4 x float> %2219, i32 1 %2221 = extractelement <4 x float> %2219, i32 3 %2222 = fcmp oeq float %temp18.0, 1.000000e+00 %2223 = select i1 %2222, float 1.000000e+00, float 0.000000e+00 %2224 = bitcast float %2169 to i32 %2225 = bitcast float %2171 to i32 %2226 = bitcast float %231 to i32 %2227 = insertelement <4 x i32> undef, i32 %2224, i32 0 %2228 = insertelement <4 x i32> %2227, i32 %2225, i32 1 %2229 = insertelement <4 x i32> %2228, i32 %2226, i32 2 %2230 = bitcast <8 x i32> %56 to <32 x i8> %2231 = bitcast <4 x i32> %58 to <16 x i8> %2232 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2229, <32 x i8> %2230, <16 x i8> %2231, i32 2) %2233 = extractelement <4 x float> %2232, i32 1 %2234 = extractelement <4 x float> %2232, i32 3 %2235 = fcmp oeq float %temp18.0, 0.000000e+00 %2236 = select i1 %2235, float 1.000000e+00, float 0.000000e+00 %2237 = fmul float %2233, %2236 %2238 = fmul float %2234, %2236 %2239 = fmul float %2220, %2223 %2240 = fadd float %2239, %2237 %2241 = fmul float %2221, %2223 %2242 = fadd float %2241, %2238 %2243 = fmul float %2207, %2210 %2244 = fadd float %2243, %2240 %2245 = fmul float %2208, %2210 %2246 = fadd float %2245, %2242 %2247 = fmul float %2194, %2197 %2248 = fadd float %2247, %2244 %2249 = fmul float %2195, %2197 %2250 = fadd float %2249, %2246 %2251 = fmul float %2181, %2184 %2252 = fadd float %2251, %2248 %2253 = fmul float %2182, %2184 %2254 = fadd float %2253, %2250 %2255 = fmul float %2254, 2.000000e+00 %2256 = fadd float %2255, -1.000000e+00 %2257 = fmul float %2252, 2.000000e+00 %2258 = fadd float %2257, -1.000000e+00 %2259 = fmul float %2256, %2256 %2260 = fmul float %2258, %2258 %2261 = fadd float %2259, %2260 %2262 = call float @llvm.AMDIL.clamp.(float %2261, float 0.000000e+00, float 1.000000e+00) %2263 = fcmp une float %38, %temp28.0 %.sink229 = select i1 %2263, float %41, float %40 %temp52.6 = select i1 %2263, float 1.953125e-03, float 3.906250e-03 %2264 = fdiv float 1.000000e+00, %.sink229 %2265 = fmul float %103, %2264 %2266 = fmul float %104, %2264 %2267 = call float @llvm.floor.f32(float %2265) %2268 = fsub float %2265, %2267 %2269 = call float @llvm.floor.f32(float %2266) %2270 = fsub float %2266, %2269 %2271 = fmul float %42, 2.000000e+00 %2272 = fmul float %2271, %temp52.6 %2273 = fsub float 1.000000e+00, %2272 %2274 = fmul float %temp52.6, %42 %2275 = fmul float %2268, %2273 %2276 = fadd float %2275, %2274 %2277 = fmul float %2270, %2273 %2278 = fadd float %2277, %2274 %2279 = fmul float %2276, %temp28.0 %2280 = fadd float %2279, %temp16.0 %2281 = fmul float %2278, %temp28.0 %2282 = fadd float %2281, %temp17.0 %2283 = bitcast float %2280 to i32 %2284 = bitcast float %2282 to i32 %2285 = bitcast float %231 to i32 %2286 = insertelement <4 x i32> undef, i32 %2283, i32 0 %2287 = insertelement <4 x i32> %2286, i32 %2284, i32 1 %2288 = insertelement <4 x i32> %2287, i32 %2285, i32 2 %2289 = bitcast <8 x i32> %88 to <32 x i8> %2290 = bitcast <4 x i32> %90 to <16 x i8> %2291 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2288, <32 x i8> %2289, <16 x i8> %2290, i32 2) %2292 = extractelement <4 x float> %2291, i32 1 %2293 = extractelement <4 x float> %2291, i32 3 %2294 = fcmp oeq float %temp18.0, 4.000000e+00 %2295 = select i1 %2294, float 1.000000e+00, float 0.000000e+00 %2296 = bitcast float %2280 to i32 %2297 = bitcast float %2282 to i32 %2298 = bitcast float %231 to i32 %2299 = insertelement <4 x i32> undef, i32 %2296, i32 0 %2300 = insertelement <4 x i32> %2299, i32 %2297, i32 1 %2301 = insertelement <4 x i32> %2300, i32 %2298, i32 2 %2302 = bitcast <8 x i32> %80 to <32 x i8> %2303 = bitcast <4 x i32> %82 to <16 x i8> %2304 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2301, <32 x i8> %2302, <16 x i8> %2303, i32 2) %2305 = extractelement <4 x float> %2304, i32 1 %2306 = extractelement <4 x float> %2304, i32 3 %2307 = fcmp oeq float %temp18.0, 3.000000e+00 %2308 = select i1 %2307, float 1.000000e+00, float 0.000000e+00 %2309 = bitcast float %2280 to i32 %2310 = bitcast float %2282 to i32 %2311 = bitcast float %231 to i32 %2312 = insertelement <4 x i32> undef, i32 %2309, i32 0 %2313 = insertelement <4 x i32> %2312, i32 %2310, i32 1 %2314 = insertelement <4 x i32> %2313, i32 %2311, i32 2 %2315 = bitcast <8 x i32> %72 to <32 x i8> %2316 = bitcast <4 x i32> %74 to <16 x i8> %2317 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2314, <32 x i8> %2315, <16 x i8> %2316, i32 2) %2318 = extractelement <4 x float> %2317, i32 1 %2319 = extractelement <4 x float> %2317, i32 3 %2320 = fcmp oeq float %temp18.0, 2.000000e+00 %2321 = select i1 %2320, float 1.000000e+00, float 0.000000e+00 %2322 = bitcast float %2280 to i32 %2323 = bitcast float %2282 to i32 %2324 = bitcast float %231 to i32 %2325 = insertelement <4 x i32> undef, i32 %2322, i32 0 %2326 = insertelement <4 x i32> %2325, i32 %2323, i32 1 %2327 = insertelement <4 x i32> %2326, i32 %2324, i32 2 %2328 = bitcast <8 x i32> %64 to <32 x i8> %2329 = bitcast <4 x i32> %66 to <16 x i8> %2330 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2327, <32 x i8> %2328, <16 x i8> %2329, i32 2) %2331 = extractelement <4 x float> %2330, i32 1 %2332 = extractelement <4 x float> %2330, i32 3 %2333 = fcmp oeq float %temp18.0, 1.000000e+00 %2334 = select i1 %2333, float 1.000000e+00, float 0.000000e+00 %2335 = bitcast float %2280 to i32 %2336 = bitcast float %2282 to i32 %2337 = bitcast float %231 to i32 %2338 = insertelement <4 x i32> undef, i32 %2335, i32 0 %2339 = insertelement <4 x i32> %2338, i32 %2336, i32 1 %2340 = insertelement <4 x i32> %2339, i32 %2337, i32 2 %2341 = bitcast <8 x i32> %56 to <32 x i8> %2342 = bitcast <4 x i32> %58 to <16 x i8> %2343 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2340, <32 x i8> %2341, <16 x i8> %2342, i32 2) %2344 = extractelement <4 x float> %2343, i32 1 %2345 = extractelement <4 x float> %2343, i32 3 %2346 = fcmp oeq float %temp18.0, 0.000000e+00 %2347 = select i1 %2346, float 1.000000e+00, float 0.000000e+00 %2348 = fmul float %2344, %2347 %2349 = fmul float %2345, %2347 %2350 = fmul float %2331, %2334 %2351 = fadd float %2350, %2348 %2352 = fmul float %2332, %2334 %2353 = fadd float %2352, %2349 %2354 = fmul float %2318, %2321 %2355 = fadd float %2354, %2351 %2356 = fmul float %2319, %2321 %2357 = fadd float %2356, %2353 %2358 = fmul float %2305, %2308 %2359 = fadd float %2358, %2355 %2360 = fmul float %2306, %2308 %2361 = fadd float %2360, %2357 %2362 = fmul float %2292, %2295 %2363 = fadd float %2362, %2359 %2364 = fmul float %2293, %2295 %2365 = fadd float %2364, %2361 %2366 = fmul float %2365, 2.000000e+00 %2367 = fadd float %2366, -1.000000e+00 %2368 = fmul float %2363, 2.000000e+00 %2369 = fadd float %2368, -1.000000e+00 %2370 = fmul float %2367, %2367 %2371 = fmul float %2369, %2369 %2372 = fadd float %2370, %2371 %2373 = call float @llvm.AMDIL.clamp.(float %2372, float 0.000000e+00, float 1.000000e+00) %2374 = fmul float %154, 0.000000e+00 %2375 = fmul float %2145, %154 %2376 = fmul float %2147, %154 %2377 = fmul float %2258, %155 %2378 = fadd float %2377, %2374 %2379 = fmul float %155, 0.000000e+00 %2380 = fadd float %2379, %2375 %2381 = fmul float %2256, %155 %2382 = fadd float %2381, %2376 %2383 = fmul float %2367, %156 %2384 = fadd float %2383, %2378 %2385 = fmul float %2369, %156 %2386 = fadd float %2385, %2380 %2387 = fmul float %156, 0.000000e+00 %2388 = fadd float %2387, %2382 %2389 = fmul float %95, %1688 %2390 = fmul float %95, %1690 %2391 = fmul float %95, %1692 %2392 = fmul float %96, %2036 %2393 = fadd float %2392, %2389 %2394 = fmul float %96, %2038 %2395 = fadd float %2394, %2390 %2396 = fmul float %96, %2040 %2397 = fadd float %2396, %2391 %2398 = fmul float %97, %2384 %2399 = fadd float %2398, %2393 %2400 = fmul float %97, %2386 %2401 = fadd float %2400, %2395 %2402 = fmul float %97, %2388 %2403 = fadd float %2402, %2397 %2404 = fmul float %2399, %2399 %2405 = fmul float %2401, %2401 %2406 = fadd float %2404, %2405 %2407 = fmul float %2403, %2403 %2408 = fadd float %2406, %2407 %2409 = fadd float %2408, 1.000000e+00 %2410 = call float @llvm.AMDGPU.rsq.clamped.f32(float %2409) %2411 = fmul float %2399, %2410 %2412 = fmul float %2401, %2410 %2413 = fmul float %2403, %2410 %2414 = fmul float %2411, %94 %2415 = fmul float %2412, %94 %2416 = fmul float %2413, %94 %2417 = fsub float %100, %2414 %2418 = fsub float %101, %2415 %2419 = fsub float %102, %2416 %2420 = fmul float %2417, %2417 %2421 = fmul float %2418, %2418 %2422 = fadd float %2421, %2420 %2423 = fmul float %2419, %2419 %2424 = fadd float %2422, %2423 %2425 = call float @llvm.AMDGPU.rsq.clamped.f32(float %2424) %2426 = fmul float %2417, %2425 %2427 = fmul float %2418, %2425 %2428 = fmul float %2419, %2425 %2429 = fmul float %1340, %106 %2430 = fmul float %1342, %107 %2431 = fmul float %1344, %108 %2432 = fadd float %115, %127 %2433 = fadd float %116, %128 %2434 = fadd float %117, %129 %2435 = fmul float %2432, %2432 %2436 = fmul float %2433, %2433 %2437 = fadd float %2436, %2435 %2438 = fmul float %2434, %2434 %2439 = fadd float %2437, %2438 %2440 = call float @llvm.AMDGPU.rsq.clamped.f32(float %2439) %2441 = fmul float %2432, %2440 %2442 = fmul float %2433, %2440 %2443 = fmul float %2434, %2440 %2444 = fmul float %2426, %2441 %2445 = fmul float %2427, %2442 %2446 = fadd float %2445, %2444 %2447 = fmul float %2428, %2443 %2448 = fadd float %2446, %2447 %2449 = call float @llvm.maxnum.f32(float %2448, float 0x3F1A36E2E0000000) %2450 = fmul float %98, 3.200000e+01 %2451 = call float @llvm.pow.f32(float %2449, float %2450) %2452 = call float @llvm.AMDIL.clamp.(float %2451, float 0.000000e+00, float 1.000000e+00) %2453 = fmul float %2452, 2.000000e+00 %2454 = fsub float 3.000000e+00, %2453 %2455 = fmul float %2452, %2454 %2456 = fmul float %2452, %2455 %2457 = fmul float %2456, %98 %2458 = fmul float %1340, %35 %2459 = fmul float %1342, %36 %2460 = fmul float %1344, %37 %2461 = fmul float %2426, %115 %2462 = fmul float %2427, %116 %2463 = fadd float %2462, %2461 %2464 = fmul float %2428, %117 %2465 = fadd float %2463, %2464 %2466 = call float @llvm.AMDIL.clamp.(float %2465, float 0.000000e+00, float 1.000000e+00) %2467 = fmul float %48, 2.000000e+00 %2468 = fmul float %49, 2.000000e+00 %2469 = fmul float %50, 2.000000e+00 %2470 = call float @llvm.maxnum.f32(float %2467, float %45) %2471 = call float @llvm.maxnum.f32(float %2468, float %46) %2472 = call float @llvm.maxnum.f32(float %2469, float %47) %2473 = call float @llvm.minnum.f32(float %2470, float 1.000000e+00) %2474 = call float @llvm.minnum.f32(float %2471, float 1.000000e+00) %2475 = call float @llvm.minnum.f32(float %2472, float 1.000000e+00) %2476 = fmul float %2473, %1340 %2477 = fmul float %2474, %1342 %2478 = fmul float %2475, %1344 %2479 = fmul float %2458, %2466 %2480 = fadd float %2479, %2476 %2481 = fmul float %2459, %2466 %2482 = fadd float %2481, %2477 %2483 = fmul float %2460, %2466 %2484 = fadd float %2483, %2478 %2485 = fmul float %35, %2457 %2486 = fadd float %2485, %2480 %2487 = fmul float %36, %2457 %2488 = fadd float %2487, %2482 %2489 = fmul float %37, %2457 %2490 = fadd float %2489, %2484 %2491 = fmul float %2486, 5.000000e-01 %2492 = fmul float %2488, 5.000000e-01 %2493 = fmul float %2490, 5.000000e-01 %2494 = fadd float %2429, %2491 %2495 = fadd float %2430, %2492 %2496 = fadd float %2431, %2493 %2497 = fmul float %99, %33 %2498 = fadd float %2497, %34 %2499 = call float @llvm.AMDIL.clamp.(float %2498, float 0.000000e+00, float 1.000000e+00) %2500 = call float @llvm.AMDGPU.lrp(float %2499, float %2494, float %30) %2501 = call float @llvm.AMDGPU.lrp(float %2499, float %2495, float %31) %2502 = call float @llvm.AMDGPU.lrp(float %2499, float %2496, float %32) %2503 = call i32 @llvm.SI.packf16(float %2500, float %2501) %2504 = bitcast i32 %2503 to float %2505 = call i32 @llvm.SI.packf16(float %2502, float 1.000000e+00) %2506 = bitcast i32 %2505 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %2504, float %2506, float %2504, float %2506) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.floor.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_writelane_b32 v254, s10, 60 ; 05FD780A s_mov_b64 s[100:101], s[4:5] ; BEE40404 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v14, v0, 0, 0, [m0] ; C8380000 v_interp_p2_f32 v14, [v14], v1, 0, 0, [m0] ; C8390001 v_interp_p1_f32 v17, v0, 1, 0, [m0] ; C8440100 v_interp_p2_f32 v17, [v17], v1, 1, 0, [m0] ; C8450101 v_interp_p1_f32 v18, v0, 2, 0, [m0] ; C8480200 v_interp_p2_f32 v18, [v18], v1, 2, 0, [m0] ; C8490201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v8, v0, 0, 1, [m0] ; C8200400 v_interp_p2_f32 v8, [v8], v1, 0, 1, [m0] ; C8210401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 v_interp_p1_f32 v6, v0, 2, 1, [m0] ; C8180600 v_interp_p2_f32 v6, [v6], v1, 2, 1, [m0] ; C8190601 v_interp_p1_f32 v2, v0, 3, 1, [m0] ; C8080700 v_interp_p2_f32 v2, [v2], v1, 3, 1, [m0] ; C8090701 s_mov_b32 s12, SCRATCH_RSRC_DWORD0 ; BE8C03FF 00000000 s_mov_b32 s13, SCRATCH_RSRC_DWORD1 ; BE8D03FF 00000000 s_mov_b32 s14, -1 ; BE8E03C1 s_mov_b32 s15, 0x80f000 ; BE8F03FF 0080F000 v_readlane_b32 s12, v254, 60 ; 021979FE s_nop 2 ; BF800002 buffer_store_dword v2, s[12:15], s12 ; E0700000 0C030200 v_interp_p1_f32 v12, v0, 0, 2, [m0] ; C8300800 v_interp_p2_f32 v12, [v12], v1, 0, 2, [m0] ; C8310801 v_interp_p1_f32 v9, v0, 1, 2, [m0] ; C8240900 v_interp_p2_f32 v9, [v9], v1, 1, 2, [m0] ; C8250901 v_interp_p1_f32 v10, v0, 2, 2, [m0] ; C8280A00 v_interp_p2_f32 v10, [v10], v1, 2, 2, [m0] ; C8290A01 s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300 v_interp_p1_f32 v11, v0, 3, 2, [m0] ; C82C0B00 v_interp_p2_f32 v11, [v11], v1, 3, 2, [m0] ; C82D0B01 v_interp_p1_f32 v34, v0, 0, 3, [m0] ; C8880C00 v_interp_p2_f32 v34, [v34], v1, 0, 3, [m0] ; C8890C01 v_interp_p1_f32 v38, v0, 1, 3, [m0] ; C8980D00 v_interp_p2_f32 v38, [v38], v1, 1, 3, [m0] ; C8990D01 v_interp_p1_f32 v28, v0, 2, 3, [m0] ; C8700E00 v_interp_p2_f32 v28, [v28], v1, 2, 3, [m0] ; C8710E01 s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700 v_interp_p1_f32 v2, v0, 3, 3, [m0] ; C8080F00 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s1, s[8:11], 0x4 ; C2008904 s_buffer_load_dword s0, s[8:11], 0x5 ; C2000905 v_interp_p2_f32 v2, [v2], v1, 3, 3, [m0] ; C8090F01 s_mov_b32 s12, SCRATCH_RSRC_DWORD0 ; BE8C03FF 00000000 s_mov_b32 s13, SCRATCH_RSRC_DWORD1 ; BE8D03FF 00000000 s_mov_b32 s14, -1 ; BE8E03C1 s_mov_b32 s15, 0x80f000 ; BE8F03FF 0080F000 buffer_store_dword v2, s[12:15], s12 offset:4 ; E0700004 0C030200 s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; BF8C0000 v_interp_p1_f32 v2, v0, 0, 4, [m0] ; C8081000 s_buffer_load_dword s16, s[8:11], 0x6 ; C2080906 v_interp_p2_f32 v2, [v2], v1, 0, 4, [m0] ; C8091001 s_mov_b32 s12, SCRATCH_RSRC_DWORD0 ; BE8C03FF 00000000 s_mov_b32 s13, SCRATCH_RSRC_DWORD1 ; BE8D03FF 00000000 s_mov_b32 s14, -1 ; BE8E03C1 s_mov_b32 s15, 0x80f000 ; BE8F03FF 0080F000 buffer_store_dword v2, s[12:15], s12 offset:8 ; E0700008 0C030200 v_interp_p1_f32 v0, v0, 1, 4, [m0] ; C8001100 v_interp_p2_f32 v0, [v0], v1, 1, 4, [m0] ; C8011101 s_mov_b32 s12, SCRATCH_RSRC_DWORD0 ; BE8C03FF 00000000 s_mov_b32 s13, SCRATCH_RSRC_DWORD1 ; BE8D03FF 00000000 s_mov_b32 s14, -1 ; BE8E03C1 s_mov_b32 s15, 0x80f000 ; BE8F03FF 0080F000 buffer_store_dword v0, s[12:15], s12 offset:12 ; E070000C 0C030000 s_buffer_load_dword s42, s[8:11], 0x0 ; C2150900 s_buffer_load_dword s41, s[8:11], 0x1 ; C2148901 v_mul_f32_e64 v1, s1, s1 ; D2100001 00000201 v_mac_f32_e64 v1, s0, s0 ; D23E0001 00000000 s_buffer_load_dword s17, s[8:11], 0x3c ; C208893C s_buffer_load_dword s2, s[8:11], 0x40 ; C2010940 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e64 v1, s16, s16 ; D23E0001 00002010 s_buffer_load_dword s43, s[8:11], 0x2 ; C2158902 v_rsq_clamp_f32_e32 v13, v1 ; 7E1A5901 v_add_f32_e32 v1, 0.5, v14 ; 06021CF0 v_floor_f32_e32 v16, v1 ; 7E204901 s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700 v_sub_f32_e32 v14, s42, v34 ; 081C442A v_sub_f32_e32 v1, s41, v38 ; 08024C29 v_mov_b32_e32 v15, 0x42800000 ; 7E1E02FF 42800000 v_cmp_le_f32_e32 vcc, v15, v16 ; 7C06210F v_mul_f32_e32 v19, v14, v14 ; 10261D0E v_mac_f32_e32 v19, v1, v1 ; 3E260301 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v15, s43, v28 ; 081E382B v_mac_f32_e32 v19, v15, v15 ; 3E261F0F v_rsq_clamp_f32_e32 v19, v19 ; 7E265913 v_mov_b32_e32 v21, 0x7fffffff ; 7E2A02FF 7FFFFFFF v_and_b32_e32 v20, v9, v21 ; 36282B09 v_and_b32_e32 v22, v10, v21 ; 362C2B0A v_and_b32_e32 v35, v11, v21 ; 36462B0B v_mul_f32_e64 v21, |v9|, |v9| ; D2100315 00021309 v_mad_f32 v21, |v10|, |v10|, v21 ; D2820315 0456150A v_mad_f32 v21, |v11|, |v11|, v21 ; D2820315 0456170B v_rsq_clamp_f32_e32 v36, v21 ; 7E485915 v_add_f32_e32 v17, 0.5, v17 ; 062222F0 v_add_f32_e32 v18, 0.5, v18 ; 062424F0 v_floor_f32_e32 v21, v17 ; 7E2A4911 v_floor_f32_e32 v17, v18 ; 7E224912 v_mov_b32_e32 v31, s2 ; 7E3E0202 v_mul_f32_e32 v24, s17, v16 ; 10302011 v_floor_f32_e32 v23, v24 ; 7E2E4918 s_and_saveexec_b64 s[12:13], vcc ; BE8C246A s_xor_b64 s[12:13], exec, s[12:13] ; 898C0C7E v_mov_b32_e32 v18, 0xc2800000 ; 7E2402FF C2800000 v_add_f32_e32 v16, v16, v18 ; 06202510 v_mul_f32_e32 v18, s2, v16 ; 10242002 v_floor_f32_e32 v18, v18 ; 7E244912 v_mul_f32_e32 v25, s2, v18 ; 10322402 v_mad_f32 v32, v16, s2, -v18 ; D2820020 84480510 v_floor_f32_e32 v16, v25 ; 7E204919 v_mad_f32 v33, v18, s2, -v16 ; D2820021 84400512 v_add_f32_e32 v18, 4.0, v16 ; 062420F6 s_or_saveexec_b64 s[12:13], s[12:13] ; BE8C250C v_mov_b32_e32 v16, s17 ; 7E200211 v_mov_b32_e32 v40, v31 ; 7E50031F s_xor_b64 exec, exec, s[12:13] ; 89FE0C7E v_mul_f32_e32 v18, s17, v23 ; 10242E11 v_floor_f32_e32 v25, v24 ; 7E324918 v_subrev_f32_e32 v32, v25, v24 ; 0A403119 v_floor_f32_e32 v18, v18 ; 7E244912 v_mad_f32 v33, v23, s17, -v18 ; D2820021 84482317 v_mov_b32_e32 v40, v16 ; 7E500310 s_or_b64 exec, exec, s[12:13] ; 88FE0C7E v_mul_f32_e32 v25, s17, v21 ; 10322A11 v_floor_f32_e32 v24, v25 ; 7E304919 v_mov_b32_e32 v23, 0x42800000 ; 7E2E02FF 42800000 v_cmp_le_f32_e32 vcc, v23, v21 ; 7C062B17 s_and_saveexec_b64 s[12:13], vcc ; BE8C246A s_xor_b64 s[12:13], exec, s[12:13] ; 898C0C7E v_mov_b32_e32 v23, 0xc2800000 ; 7E2E02FF C2800000 v_add_f32_e32 v21, v21, v23 ; 062A2F15 v_mul_f32_e32 v23, s2, v21 ; 102E2A02 v_floor_f32_e32 v23, v23 ; 7E2E4917 v_mul_f32_e32 v26, s2, v23 ; 10342E02 v_mad_f32 v29, v21, s2, -v23 ; D282001D 845C0515 v_floor_f32_e32 v21, v26 ; 7E2A491A v_mad_f32 v30, v23, s2, -v21 ; D282001E 84540517 v_add_f32_e32 v23, 4.0, v21 ; 062E2AF6 s_or_saveexec_b64 s[12:13], s[12:13] ; BE8C250C v_mov_b32_e32 v39, v31 ; 7E4E031F s_xor_b64 exec, exec, s[12:13] ; 89FE0C7E v_mul_f32_e32 v21, s17, v24 ; 102A3011 v_floor_f32_e32 v23, v25 ; 7E2E4919 v_subrev_f32_e32 v29, v23, v25 ; 0A3A3317 v_floor_f32_e32 v23, v21 ; 7E2E4915 v_mad_f32 v30, v24, s17, -v23 ; D282001E 845C2318 v_mov_b32_e32 v39, v16 ; 7E4E0310 s_or_b64 exec, exec, s[12:13] ; 88FE0C7E s_buffer_load_dword s14, s[8:11], 0xf ; C207090F s_buffer_load_dword s15, s[8:11], 0x44 ; C2078944 s_buffer_load_dword s3, s[8:11], 0x48 ; C2018948 v_mul_f32_e32 v25, s17, v17 ; 10322211 v_floor_f32_e32 v21, v25 ; 7E2A4919 v_mov_b32_e32 v24, 0x42800000 ; 7E3002FF 42800000 v_cmp_le_f32_e32 vcc, v24, v17 ; 7C062318 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[12:13], vcc ; BE8C246A s_xor_b64 s[12:13], exec, s[12:13] ; 898C0C7E v_mov_b32_e32 v24, 0xc2800000 ; 7E3002FF C2800000 v_add_f32_e32 v17, v17, v24 ; 06223111 v_mul_f32_e32 v24, s2, v17 ; 10302202 v_floor_f32_e32 v24, v24 ; 7E304918 v_mul_f32_e32 v27, s2, v24 ; 10363002 v_mad_f32 v26, v17, s2, -v24 ; D282001A 84600511 v_floor_f32_e32 v17, v27 ; 7E22491B v_mad_f32 v27, v24, s2, -v17 ; D282001B 84440518 v_add_f32_e32 v24, 4.0, v17 ; 063022F6 s_or_saveexec_b64 s[12:13], s[12:13] ; BE8C250C v_mov_b32_e32 v41, s14 ; 7E52020E v_mov_b32_e32 v37, s15 ; 7E4A020F s_buffer_load_dword s4, s[8:11], 0x8 ; C2020908 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v254, s4, 6 ; 05FD0C04 s_buffer_load_dword s4, s[8:11], 0x9 ; C2020909 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v254, s4, 5 ; 05FD0A04 s_buffer_load_dword s4, s[8:11], 0xa ; C202090A s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v254, s4, 4 ; 05FD0804 s_buffer_load_dword s14, s[8:11], 0xe ; C207090E s_buffer_load_dword s4, s[8:11], 0x10 ; C2020910 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v254, s4, 46 ; 05FD5C04 s_buffer_load_dword s4, s[8:11], 0x11 ; C2020911 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v254, s4, 47 ; 05FD5E04 s_buffer_load_dword s4, s[8:11], 0x12 ; C2020912 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v254, s4, 7 ; 05FD0E04 s_buffer_load_dword s40, s[8:11], 0x4c ; C214094C s_buffer_load_dword s44, s[8:11], 0x50 ; C2160950 s_buffer_load_dword s28, s[8:11], 0x54 ; C20E0954 s_buffer_load_dword s4, s[8:11], 0x58 ; C2020958 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v254, s4, 8 ; 05FD1004 s_buffer_load_dword s4, s[8:11], 0x59 ; C2020959 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v254, s4, 9 ; 05FD1204 s_buffer_load_dword s4, s[8:11], 0x5a ; C202095A s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v254, s4, 10 ; 05FD1404 s_buffer_load_dword s4, s[8:11], 0x5c ; C202095C s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v254, s4, 13 ; 05FD1A04 s_buffer_load_dword s4, s[8:11], 0x5d ; C202095D s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v254, s4, 11 ; 05FD1604 s_buffer_load_dword s4, s[8:11], 0x5e ; C202095E s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v254, s4, 12 ; 05FD1804 v_mov_b32_e32 v42, s3 ; 7E540203 s_xor_b64 exec, exec, s[12:13] ; 89FE0C7E v_mul_f32_e32 v17, s17, v21 ; 10222A11 v_floor_f32_e32 v24, v25 ; 7E304919 v_subrev_f32_e32 v26, v24, v25 ; 0A343318 v_floor_f32_e32 v24, v17 ; 7E304911 v_mad_f32 v27, v21, s17, -v24 ; D282001B 84602315 v_mov_b32_e32 v31, v16 ; 7E3E0310 s_or_b64 exec, exec, s[12:13] ; 88FE0C7E v_mac_f32_e32 v41, s14, v12 ; 3E52180E v_mul_f32_e32 v16, v19, v14 ; 10201D13 v_mul_f32_e32 v17, v19, v1 ; 10220313 v_mul_f32_e32 v12, v19, v15 ; 10181F13 v_mac_f32_e32 v16, s1, v13 ; 3E201A01 v_mac_f32_e32 v17, s0, v13 ; 3E221A00 v_mac_f32_e32 v12, s16, v13 ; 3E181A10 v_add_f32_e64 v1, 0, v41 clamp ; D2060801 00025280 v_cmp_neq_f32_e64 s[22:23], s17, v40 ; D01A0016 00025011 v_cmp_eq_f32_e64 s[2:3], 4.0, v18 ; D0040002 000224F6 v_cmp_eq_f32_e64 s[4:5], 2.0, v18 ; D0040004 000224F4 v_writelane_b32 v254, s4, 28 ; 05FD3804 v_writelane_b32 v254, s5, 29 ; 05FD3A05 v_cmp_eq_f32_e64 s[4:5], 1.0, v18 ; D0040004 000224F2 v_writelane_b32 v254, s4, 26 ; 05FD3404 v_writelane_b32 v254, s5, 27 ; 05FD3605 v_cmp_eq_f32_e32 vcc, 0, v18 ; 7C042480 v_cmp_neq_f32_e64 s[24:25], s17, v39 ; D01A0018 00024E11 v_cmp_neq_f32_e64 s[26:27], s17, v31 ; D01A001A 00023E11 v_cmp_eq_f32_e64 s[4:5], 4.0, v23 ; D0040004 00022EF6 v_writelane_b32 v254, s4, 32 ; 05FD4004 v_writelane_b32 v254, s5, 33 ; 05FD4205 v_cmp_eq_f32_e64 s[4:5], 2.0, v23 ; D0040004 00022EF4 v_writelane_b32 v254, s4, 22 ; 05FD2C04 v_writelane_b32 v254, s5, 23 ; 05FD2E05 v_mul_f32_e32 v15, s1, v13 ; 101E1A01 v_cmp_eq_f32_e64 s[4:5], 1.0, v23 ; D0040004 00022EF2 v_writelane_b32 v254, s4, 24 ; 05FD3004 v_writelane_b32 v254, s5, 25 ; 05FD3205 v_mul_f32_e32 v14, s0, v13 ; 101C1A00 v_mul_f32_e32 v13, s16, v13 ; 101A1A10 v_cmp_eq_f32_e64 s[0:1], 0, v23 ; D0040000 00022E80 v_writelane_b32 v254, s0, 30 ; 05FD3C00 v_writelane_b32 v254, s1, 31 ; 05FD3E01 v_subrev_f32_e32 v19, s42, v34 ; 0A26442A v_mul_f32_e32 v19, v19, v19 ; 10262713 v_subrev_f32_e32 v21, s41, v38 ; 0A2A4C29 v_mac_f32_e32 v19, v21, v21 ; 3E262B15 v_cmp_eq_f32_e64 s[0:1], 4.0, v24 ; D0040000 000230F6 v_writelane_b32 v254, s0, 14 ; 05FD1C00 v_writelane_b32 v254, s1, 15 ; 05FD1E01 v_subrev_f32_e32 v21, s43, v28 ; 0A2A382B v_mac_f32_e32 v19, v21, v21 ; 3E262B15 v_mul_f32_e32 v19, s28, v19 ; 1026261C v_log_f32_e32 v19, v19 ; 7E264F13 v_cmp_eq_f32_e64 s[0:1], 2.0, v24 ; D0040000 000230F4 v_writelane_b32 v254, s0, 16 ; 05FD2000 v_writelane_b32 v254, s1, 17 ; 05FD2201 v_cndmask_b32_e64 v25, 0, 1.0, s[2:3] ; D2000019 0009E480 v_cmp_eq_f32_e64 s[0:1], 1.0, v24 ; D0040000 000230F2 v_writelane_b32 v254, s0, 18 ; 05FD2400 v_writelane_b32 v254, s1, 19 ; 05FD2601 v_mul_f32_e32 v41, 0x3f317218, v19 ; 105226FF 3F317218 v_mov_b32_e32 v19, 0xbe4ccccd ; 7E2602FF BE4CCCCD v_mad_f32 v21, v36, v20, v19 ; D2820015 044E2924 v_mad_f32 v20, v36, v22, v19 ; D2820014 044E2D24 v_mac_f32_e32 v19, v36, v35 ; 3E264724 v_mov_b32_e32 v22, 0x40e00000 ; 7E2C02FF 40E00000 v_cndmask_b32_e64 v35, v37, v42, s[22:23] ; D2000023 005A5525 v_rcp_f32_e32 v35, v35 ; 7E465523 v_mov_b32_e32 v36, 0x3b000000 ; 7E4802FF 3B000000 v_mov_b32_e32 v43, 0x3b800000 ; 7E5602FF 3B800000 v_cndmask_b32_e64 v44, v43, v36, s[22:23] ; D200002C 005A492B v_cmp_eq_f32_e64 s[0:1], 0, v24 ; D0040000 00023080 v_writelane_b32 v254, s0, 20 ; 05FD2800 v_writelane_b32 v254, s1, 21 ; 05FD2A01 v_mul_f32_e32 v45, v35, v34 ; 105A4523 v_floor_f32_e32 v45, v45 ; 7E5A492D v_mad_f32 v45, v34, v35, -v45 ; D282002D 84B64722 v_mul_f32_e32 v46, v35, v38 ; 105C4D23 v_floor_f32_e32 v46, v46 ; 7E5C492E v_mad_f32 v46, v38, v35, -v46 ; D282002E 84BA4726 v_add_f32_e64 v47, s40, s40 ; D206002F 00005028 v_mad_f32 v48, -v47, v44, 1.0 ; D2820030 23CA592F v_mul_f32_e32 v44, s40, v44 ; 10585828 v_mad_f32 v49, v48, v45, v44 ; D2820031 04B25B30 v_mad_f32 v45, v48, v46, v44 ; D282002D 04B25D30 v_mul_f32_e32 v46, v35, v28 ; 105C3923 v_floor_f32_e32 v46, v46 ; 7E5C492E v_mad_f32 v35, v28, v35, -v46 ; D2820023 84BA471C v_mac_f32_e32 v44, v48, v35 ; 3E584730 v_cndmask_b32_e64 v35, v37, v42, s[24:25] ; D2000023 00625525 v_cndmask_b32_e64 v37, v37, v42, s[26:27] ; D2000025 006A5525 v_rcp_f32_e32 v42, v35 ; 7E545523 v_rcp_f32_e32 v48, v37 ; 7E605525 v_cndmask_b32_e64 v50, v43, v36, s[24:25] ; D2000032 0062492B v_cndmask_b32_e64 v51, v43, v36, s[26:27] ; D2000033 006A492B s_load_dwordx4 s[96:99], s[100:101], 0x8 ; C0B06508 v_mul_f32_e32 v35, v42, v34 ; 1046452A v_floor_f32_e32 v35, v35 ; 7E464923 v_mad_f32 v52, v34, v42, -v35 ; D2820034 848E5522 v_mul_f32_e32 v35, v42, v38 ; 10464D2A v_floor_f32_e32 v35, v35 ; 7E464923 v_mad_f32 v53, v38, v42, -v35 ; D2820035 848E5526 v_mul_f32_e32 v37, s44, v41 ; 104A522C v_mad_f32 v35, v40, v49, v32 ; D2820023 04826328 v_mad_f32 v36, v40, v45, v33 ; D2820024 04865B28 v_mac_f32_e32 v32, v40, v44 ; 3E405928 v_mov_b32_e32 v43, v32 ; 7E560320 v_mov_b32_e32 v44, v33 ; 7E580321 v_mov_b32_e32 v45, v34 ; 7E5A0322 v_mov_b32_e32 v46, v35 ; 7E5C0323 v_mac_f32_e32 v33, v40, v49 ; 3E426328 v_mul_f32_e32 v40, v42, v28 ; 1050392A v_floor_f32_e32 v40, v40 ; 7E504928 v_mad_f32 v40, v28, v42, -v40 ; D2820028 84A2551C v_mad_f32 v41, -v47, v50, 1.0 ; D2820029 23CA652F v_mul_f32_e32 v42, s40, v50 ; 10546428 v_mad_f32 v44, v41, v52, v42 ; D282002C 04AA6929 v_mad_f32 v45, v41, v53, v42 ; D282002D 04AA6B29 v_mac_f32_e32 v42, v41, v40 ; 3E545129 v_mul_f32_e32 v40, v48, v34 ; 10504530 v_floor_f32_e32 v40, v40 ; 7E504928 v_mad_f32 v34, v34, v48, -v40 ; D2820022 84A26122 v_mul_f32_e32 v40, v48, v38 ; 10504D30 v_floor_f32_e32 v40, v40 ; 7E504928 v_mad_f32 v38, v38, v48, -v40 ; D2820026 84A26126 v_mul_f32_e32 v40, v48, v28 ; 10503930 v_floor_f32_e32 v40, v40 ; 7E504928 v_mad_f32 v28, v28, v48, -v40 ; D282001C 84A2611C v_mad_f32 v40, -v47, v51, 1.0 ; D2820028 23CA672F v_mul_f32_e32 v47, s40, v51 ; 105E6628 v_mad_f32 v34, v40, v34, v47 ; D2820022 04BE4528 v_mad_f32 v38, v40, v38, v47 ; D2820026 04BE4D28 v_mac_f32_e32 v47, v40, v28 ; 3E5E3928 v_mad_f32 v48, v39, v44, v29 ; D2820030 04765927 v_mad_f32 v49, v39, v45, v30 ; D2820031 047A5B27 v_mac_f32_e32 v29, v39, v42 ; 3E3A5527 v_mad_f32 v40, v31, v34, v26 ; D2820028 046A451F v_mad_f32 v41, v31, v38, v27 ; D2820029 046E4D1F v_mac_f32_e32 v26, v31, v47 ; 3E345F1F v_mov_b32_e32 v51, v29 ; 7E66031D v_mov_b32_e32 v52, v30 ; 7E68031E v_mov_b32_e32 v53, v31 ; 7E6A031F v_mov_b32_e32 v54, v32 ; 7E6C0320 v_mac_f32_e32 v30, v39, v44 ; 3E3C5927 v_mov_b32_e32 v55, v26 ; 7E6E031A v_mov_b32_e32 v56, v27 ; 7E70031B v_mov_b32_e32 v57, v28 ; 7E72031C v_mov_b32_e32 v58, v29 ; 7E74031D s_load_dwordx4 s[44:47], s[100:101], 0x20 ; C0966520 s_load_dwordx8 s[64:71], s[6:7], 0x40 ; C0E00740 v_mac_f32_e32 v27, v31, v34 ; 3E36451F v_mov_b32_e32 v44, v36 ; 7E580324 v_mov_b32_e32 v50, v37 ; 7E640325 v_mov_b32_e32 v52, v49 ; 7E680331 v_mov_b32_e32 v42, v37 ; 7E540325 v_mov_b32_e32 v56, v41 ; 7E700329 v_mov_b32_e32 v45, v37 ; 7E5A0325 v_mov_b32_e32 v34, v37 ; 7E440325 v_mov_b32_e32 v53, v37 ; 7E6A0325 v_mov_b32_e32 v31, v37 ; 7E3E0325 v_mov_b32_e32 v57, v37 ; 7E720325 v_mov_b32_e32 v28, v37 ; 7E380325 s_load_dwordx4 s[8:11], s[100:101], 0x18 ; C0846518 s_load_dwordx8 s[16:23], s[6:7], 0x20 ; C0C80720 s_load_dwordx8 s[56:63], s[6:7], 0x30 ; C0DC0730 s_load_dwordx4 s[12:15], s[100:101], 0x10 ; C0866510 s_load_dwordx4 s[48:51], s[100:101], 0x24 ; C0986524 s_load_dwordx8 s[24:31], s[6:7], 0x10 ; C0CC0710 s_load_dwordx8 s[80:87], s[6:7], 0x48 ; C0E80748 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[59:61], 7, 0, 0, 0, 0, 0, 0, 0, v[35:38], s[64:71], s[44:47] ; F0900700 01703B23 s_load_dwordx4 s[52:55], s[100:101], 0x0 ; C09A6500 s_load_dwordx8 s[32:39], s[6:7], 0x0 ; C0D00700 s_load_dwordx4 s[40:43], s[100:101], 0x1c ; C094651C s_load_dwordx8 s[72:79], s[6:7], 0x38 ; C0E40738 image_sample_l v[62:64], 7, 0, 0, 0, 0, 0, 0, 0, v[35:38], s[56:63], s[8:11] ; F0900700 004E3E23 image_sample_l v[65:67], 7, 0, 0, 0, 0, 0, 0, 0, v[35:38], s[16:23], s[12:15] ; F0900700 00644123 s_load_dwordx4 s[0:3], s[100:101], 0xc ; C080650C s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_writelane_b32 v254, s0, 48 ; 05FD6000 v_writelane_b32 v254, s1, 49 ; 05FD6201 v_writelane_b32 v254, s2, 50 ; 05FD6402 v_writelane_b32 v254, s3, 51 ; 05FD6603 s_load_dwordx8 s[88:95], s[6:7], 0x18 ; C0EC0718 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v254, s88, 52 ; 05FD6858 v_writelane_b32 v254, s89, 53 ; 05FD6A59 v_writelane_b32 v254, s90, 54 ; 05FD6C5A v_writelane_b32 v254, s91, 55 ; 05FD6E5B v_writelane_b32 v254, s92, 56 ; 05FD705C v_writelane_b32 v254, s93, 57 ; 05FD725D v_writelane_b32 v254, s94, 58 ; 05FD745E v_writelane_b32 v254, s95, 59 ; 05FD765F image_sample_l v[68:70], 7, 0, 0, 0, 0, 0, 0, 0, v[35:38], s[24:31], s[96:99] ; F0900700 03064423 s_load_dwordx4 s[0:3], s[100:101], 0x4 ; C0806504 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_writelane_b32 v254, s0, 34 ; 05FD4400 v_writelane_b32 v254, s1, 35 ; 05FD4601 v_writelane_b32 v254, s2, 36 ; 05FD4802 v_writelane_b32 v254, s3, 37 ; 05FD4A03 s_load_dwordx8 s[88:95], s[6:7], 0x8 ; C0EC0708 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v254, s88, 38 ; 05FD4C58 v_writelane_b32 v254, s89, 39 ; 05FD4E59 v_writelane_b32 v254, s90, 40 ; 05FD505A v_writelane_b32 v254, s91, 41 ; 05FD525B v_writelane_b32 v254, s92, 42 ; 05FD545C v_writelane_b32 v254, s93, 43 ; 05FD565D v_writelane_b32 v254, s94, 44 ; 05FD585E v_writelane_b32 v254, s95, 45 ; 05FD5A5F image_sample_l v[71:73], 7, 0, 0, 0, 0, 0, 0, 0, v[35:38], s[32:39], s[52:55] ; F0900700 01A84723 image_sample_l v[74:76], 7, 0, 0, 0, 0, 0, 0, 0, v[43:46], s[64:71], s[44:47] ; F0900700 01704A2B image_sample_l v[77:79], 7, 0, 0, 0, 0, 0, 0, 0, v[43:46], s[56:63], s[8:11] ; F0900700 004E4D2B image_sample_l v[80:82], 7, 0, 0, 0, 0, 0, 0, 0, v[43:46], s[16:23], s[12:15] ; F0900700 0064502B image_sample_l v[83:85], 7, 0, 0, 0, 0, 0, 0, 0, v[43:46], s[24:31], s[96:99] ; F0900700 0306532B image_sample_l v[86:88], 7, 0, 0, 0, 0, 0, 0, 0, v[43:46], s[32:39], s[52:55] ; F0900700 01A8562B image_sample_l v[89:91], 7, 0, 0, 0, 0, 0, 0, 0, v[32:35], s[64:71], s[44:47] ; F0900700 01705920 image_sample_l v[92:94], 7, 0, 0, 0, 0, 0, 0, 0, v[32:35], s[56:63], s[8:11] ; F0900700 004E5C20 image_sample_l v[95:97], 7, 0, 0, 0, 0, 0, 0, 0, v[32:35], s[16:23], s[12:15] ; F0900700 00645F20 image_sample_l v[98:100], 7, 0, 0, 0, 0, 0, 0, 0, v[32:35], s[24:31], s[96:99] ; F0900700 03066220 image_sample_l v[101:103], 7, 0, 0, 0, 0, 0, 0, 0, v[32:35], s[32:39], s[52:55] ; F0900700 01A86520 image_sample_l v[104:106], 7, 0, 0, 0, 0, 0, 0, 0, v[48:51], s[64:71], s[44:47] ; F0900700 01706830 image_sample_l v[107:109], 7, 0, 0, 0, 0, 0, 0, 0, v[48:51], s[56:63], s[8:11] ; F0900700 004E6B30 image_sample_l v[110:112], 7, 0, 0, 0, 0, 0, 0, 0, v[48:51], s[16:23], s[12:15] ; F0900700 00646E30 image_sample_l v[113:115], 7, 0, 0, 0, 0, 0, 0, 0, v[48:51], s[24:31], s[96:99] ; F0900700 03067130 image_sample_l v[116:118], 7, 0, 0, 0, 0, 0, 0, 0, v[48:51], s[32:39], s[52:55] ; F0900700 01A87430 image_sample_l v[119:121], 7, 0, 0, 0, 0, 0, 0, 0, v[51:54], s[64:71], s[44:47] ; F0900700 01707733 image_sample_l v[122:124], 7, 0, 0, 0, 0, 0, 0, 0, v[51:54], s[56:63], s[8:11] ; F0900700 004E7A33 image_sample_l v[125:127], 7, 0, 0, 0, 0, 0, 0, 0, v[51:54], s[16:23], s[12:15] ; F0900700 00647D33 image_sample_l v[128:130], 7, 0, 0, 0, 0, 0, 0, 0, v[51:54], s[24:31], s[96:99] ; F0900700 03068033 image_sample_l v[131:133], 7, 0, 0, 0, 0, 0, 0, 0, v[51:54], s[32:39], s[52:55] ; F0900700 01A88333 image_sample_l v[134:136], 7, 0, 0, 0, 0, 0, 0, 0, v[29:32], s[64:71], s[44:47] ; F0900700 0170861D image_sample_l v[137:139], 7, 0, 0, 0, 0, 0, 0, 0, v[29:32], s[56:63], s[8:11] ; F0900700 004E891D image_sample_l v[140:142], 7, 0, 0, 0, 0, 0, 0, 0, v[29:32], s[16:23], s[12:15] ; F0900700 00648C1D image_sample_l v[143:145], 7, 0, 0, 0, 0, 0, 0, 0, v[29:32], s[24:31], s[96:99] ; F0900700 03068F1D image_sample_l v[146:148], 7, 0, 0, 0, 0, 0, 0, 0, v[29:32], s[32:39], s[52:55] ; F0900700 01A8921D image_sample_l v[149:151], 7, 0, 0, 0, 0, 0, 0, 0, v[40:43], s[64:71], s[44:47] ; F0900700 01709528 image_sample_l v[152:154], 7, 0, 0, 0, 0, 0, 0, 0, v[40:43], s[56:63], s[8:11] ; F0900700 004E9828 image_sample_l v[155:157], 7, 0, 0, 0, 0, 0, 0, 0, v[40:43], s[16:23], s[12:15] ; F0900700 00649B28 image_sample_l v[158:160], 7, 0, 0, 0, 0, 0, 0, 0, v[40:43], s[24:31], s[96:99] ; F0900700 03069E28 image_sample_l v[161:163], 7, 0, 0, 0, 0, 0, 0, 0, v[40:43], s[32:39], s[52:55] ; F0900700 01A8A128 image_sample_l v[164:166], 7, 0, 0, 0, 0, 0, 0, 0, v[55:58], s[64:71], s[44:47] ; F0900700 0170A437 image_sample_l v[167:169], 7, 0, 0, 0, 0, 0, 0, 0, v[55:58], s[56:63], s[8:11] ; F0900700 004EA737 image_sample_l v[170:172], 7, 0, 0, 0, 0, 0, 0, 0, v[55:58], s[16:23], s[12:15] ; F0900700 0064AA37 s_load_dwordx4 s[88:91], s[100:101], 0x14 ; C0AC6514 s_load_dwordx8 s[0:7], s[6:7], 0x28 ; C0C00728 image_sample_l v[173:175], 7, 0, 0, 0, 0, 0, 0, 0, v[55:58], s[24:31], s[96:99] ; F0900700 0306AD37 image_sample_l v[176:178], 7, 0, 0, 0, 0, 0, 0, 0, v[55:58], s[32:39], s[52:55] ; F0900700 01A8B037 image_sample_l v[179:181], 7, 0, 0, 0, 0, 0, 0, 0, v[26:29], s[64:71], s[44:47] ; F0900700 0170B31A image_sample_l v[182:184], 7, 0, 0, 0, 0, 0, 0, 0, v[26:29], s[56:63], s[8:11] ; F0900700 004EB61A image_sample_l v[185:187], 7, 0, 0, 0, 0, 0, 0, 0, v[26:29], s[16:23], s[12:15] ; F0900700 0064B91A image_sample_l v[188:190], 7, 0, 0, 0, 0, 0, 0, 0, v[26:29], s[24:31], s[96:99] ; F0900700 0306BC1A image_sample_l v[191:193], 7, 0, 0, 0, 0, 0, 0, 0, v[26:29], s[32:39], s[52:55] ; F0900700 01A8BF1A image_sample_l v[38:39], 10, 0, 0, 0, 0, 0, 0, 0, v[43:46], s[80:87], s[48:51] ; F0900A00 0194262B image_sample_l v[194:195], 10, 0, 0, 0, 0, 0, 0, 0, v[43:46], s[72:79], s[40:43] ; F0900A00 0152C22B s_waitcnt vmcnt(9) lgkmcnt(0) ; BF8C0079 image_sample_l v[196:197], 10, 0, 0, 0, 0, 0, 0, 0, v[43:46], s[0:7], s[88:91] ; F0900A00 02C0C42B v_readlane_b32 s8, v254, 48 ; 021161FE v_readlane_b32 s9, v254, 49 ; 021363FE v_readlane_b32 s10, v254, 50 ; 021565FE v_readlane_b32 s11, v254, 51 ; 021767FE s_nop 2 ; BF800002 v_readlane_b32 s24, v254, 52 ; 023169FE v_readlane_b32 s25, v254, 53 ; 02336BFE v_readlane_b32 s26, v254, 54 ; 02356DFE v_readlane_b32 s27, v254, 55 ; 02376FFE v_readlane_b32 s28, v254, 56 ; 023971FE v_readlane_b32 s29, v254, 57 ; 023B73FE v_readlane_b32 s30, v254, 58 ; 023D75FE v_readlane_b32 s31, v254, 59 ; 023F77FE s_nop 2 ; BF800002 image_sample_l v[198:199], 10, 0, 0, 0, 0, 0, 0, 0, v[43:46], s[24:31], s[8:11] ; F0900A00 0046C62B v_readlane_b32 s92, v254, 34 ; 02B945FE v_readlane_b32 s93, v254, 35 ; 02BB47FE v_readlane_b32 s94, v254, 36 ; 02BD49FE v_readlane_b32 s95, v254, 37 ; 02BF4BFE s_nop 2 ; BF800002 v_readlane_b32 s16, v254, 38 ; 02214DFE v_readlane_b32 s17, v254, 39 ; 02234FFE v_readlane_b32 s18, v254, 40 ; 022551FE v_readlane_b32 s19, v254, 41 ; 022753FE v_readlane_b32 s20, v254, 42 ; 022955FE v_readlane_b32 s21, v254, 43 ; 022B57FE v_readlane_b32 s22, v254, 44 ; 022D59FE v_readlane_b32 s23, v254, 45 ; 022F5BFE s_nop 2 ; BF800002 image_sample_l v[43:44], 10, 0, 0, 0, 0, 0, 0, 0, v[43:46], s[16:23], s[92:95] ; F0900A00 02E42B2B image_sample_l v[45:46], 10, 0, 0, 0, 0, 0, 0, 0, v[32:35], s[80:87], s[48:51] ; F0900A00 01942D20 image_sample_l v[200:201], 10, 0, 0, 0, 0, 0, 0, 0, v[32:35], s[72:79], s[40:43] ; F0900A00 0152C820 image_sample_l v[202:203], 10, 0, 0, 0, 0, 0, 0, 0, v[32:35], s[0:7], s[88:91] ; F0900A00 02C0CA20 image_sample_l v[204:205], 10, 0, 0, 0, 0, 0, 0, 0, v[32:35], s[24:31], s[8:11] ; F0900A00 0046CC20 s_mov_b32 s12, s8 ; BE8C0308 s_mov_b32 s13, s9 ; BE8D0309 s_mov_b32 s14, s10 ; BE8E030A s_mov_b32 s15, s11 ; BE8F030B image_sample_l v[32:33], 10, 0, 0, 0, 0, 0, 0, 0, v[32:35], s[16:23], s[92:95] ; F0900A00 02E42020 s_mov_b32 s8, s92 ; BE88035C s_mov_b32 s9, s93 ; BE89035D s_mov_b32 s10, s94 ; BE8A035E s_mov_b32 s11, s95 ; BE8B035F s_waitcnt vmcnt(9) ; BF8C0779 image_sample_l v[206:207], 10, 0, 0, 0, 0, 0, 0, 0, v[35:38], s[80:87], s[48:51] ; F0900A00 0194CE23 image_sample_l v[208:209], 10, 0, 0, 0, 0, 0, 0, 0, v[35:38], s[72:79], s[40:43] ; F0900A00 0152D023 image_sample_l v[210:211], 10, 0, 0, 0, 0, 0, 0, 0, v[35:38], s[0:7], s[88:91] ; F0900A00 02C0D223 image_sample_l v[212:213], 10, 0, 0, 0, 0, 0, 0, 0, v[35:38], s[24:31], s[12:15] ; F0900A00 0066D423 image_sample_l v[34:35], 10, 0, 0, 0, 0, 0, 0, 0, v[35:38], s[16:23], s[8:11] ; F0900A00 00442223 image_sample_l v[36:37], 10, 0, 0, 0, 0, 0, 0, 0, v[51:54], s[80:87], s[48:51] ; F0900A00 01942433 image_sample_l v[214:215], 10, 0, 0, 0, 0, 0, 0, 0, v[51:54], s[72:79], s[40:43] ; F0900A00 0152D633 image_sample_l v[216:217], 10, 0, 0, 0, 0, 0, 0, 0, v[51:54], s[0:7], s[88:91] ; F0900A00 02C0D833 image_sample_l v[218:219], 10, 0, 0, 0, 0, 0, 0, 0, v[51:54], s[24:31], s[12:15] ; F0900A00 0066DA33 image_sample_l v[51:52], 10, 0, 0, 0, 0, 0, 0, 0, v[51:54], s[16:23], s[8:11] ; F0900A00 00443333 s_waitcnt vmcnt(10) ; BF8C077A image_sample_l v[53:54], 10, 0, 0, 0, 0, 0, 0, 0, v[29:32], s[80:87], s[48:51] ; F0900A00 0194351D image_sample_l v[220:221], 10, 0, 0, 0, 0, 0, 0, 0, v[29:32], s[72:79], s[40:43] ; F0900A00 0152DC1D image_sample_l v[222:223], 10, 0, 0, 0, 0, 0, 0, 0, v[29:32], s[0:7], s[88:91] ; F0900A00 02C0DE1D image_sample_l v[224:225], 10, 0, 0, 0, 0, 0, 0, 0, v[29:32], s[24:31], s[12:15] ; F0900A00 0066E01D image_sample_l v[29:30], 10, 0, 0, 0, 0, 0, 0, 0, v[29:32], s[16:23], s[8:11] ; F0900A00 00441D1D s_waitcnt vmcnt(5) ; BF8C0775 image_sample_l v[226:227], 10, 0, 0, 0, 0, 0, 0, 0, v[48:51], s[80:87], s[48:51] ; F0900A00 0194E230 image_sample_l v[228:229], 10, 0, 0, 0, 0, 0, 0, 0, v[48:51], s[72:79], s[40:43] ; F0900A00 0152E430 image_sample_l v[230:231], 10, 0, 0, 0, 0, 0, 0, 0, v[48:51], s[0:7], s[88:91] ; F0900A00 02C0E630 image_sample_l v[232:233], 10, 0, 0, 0, 0, 0, 0, 0, v[48:51], s[24:31], s[12:15] ; F0900A00 0066E830 image_sample_l v[47:48], 10, 0, 0, 0, 0, 0, 0, 0, v[48:51], s[16:23], s[8:11] ; F0900A00 00442F30 image_sample_l v[49:50], 10, 0, 0, 0, 0, 0, 0, 0, v[55:58], s[80:87], s[48:51] ; F0900A00 01943137 image_sample_l v[234:235], 10, 0, 0, 0, 0, 0, 0, 0, v[55:58], s[72:79], s[40:43] ; F0900A00 0152EA37 image_sample_l v[236:237], 10, 0, 0, 0, 0, 0, 0, 0, v[55:58], s[0:7], s[88:91] ; F0900A00 02C0EC37 image_sample_l v[238:239], 10, 0, 0, 0, 0, 0, 0, 0, v[55:58], s[24:31], s[12:15] ; F0900A00 0066EE37 image_sample_l v[55:56], 10, 0, 0, 0, 0, 0, 0, 0, v[55:58], s[16:23], s[8:11] ; F0900A00 00443737 s_waitcnt vmcnt(10) ; BF8C077A image_sample_l v[57:58], 10, 0, 0, 0, 0, 0, 0, 0, v[26:29], s[80:87], s[48:51] ; F0900A00 0194391A image_sample_l v[240:241], 10, 0, 0, 0, 0, 0, 0, 0, v[26:29], s[72:79], s[40:43] ; F0900A00 0152F01A image_sample_l v[242:243], 10, 0, 0, 0, 0, 0, 0, 0, v[26:29], s[0:7], s[88:91] ; F0900A00 02C0F21A image_sample_l v[244:245], 10, 0, 0, 0, 0, 0, 0, 0, v[26:29], s[24:31], s[12:15] ; F0900A00 0066F41A image_sample_l v[26:27], 10, 0, 0, 0, 0, 0, 0, 0, v[26:29], s[16:23], s[8:11] ; F0900A00 00441A1A image_sample_l v[246:247], 10, 0, 0, 0, 0, 0, 0, 0, v[40:43], s[80:87], s[48:51] ; F0900A00 0194F628 image_sample_l v[248:249], 10, 0, 0, 0, 0, 0, 0, 0, v[40:43], s[72:79], s[40:43] ; F0900A00 0152F828 image_sample_l v[250:251], 10, 0, 0, 0, 0, 0, 0, 0, v[40:43], s[0:7], s[88:91] ; F0900A00 02C0FA28 v_mov_b32_e32 v28, 0x40400000 ; 7E3802FF 40400000 v_cmp_eq_f32_e64 s[4:5], v18, v28 ; D0040004 00023912 v_cndmask_b32_e64 v18, 0, 1.0, s[4:5] ; D2000012 0011E480 v_readlane_b32 s0, v254, 28 ; 020139FE v_readlane_b32 s1, v254, 29 ; 02033BFE s_nop 2 ; BF800002 v_cndmask_b32_e64 v31, 0, 1.0, s[0:1] ; D200001F 0001E480 v_readlane_b32 s0, v254, 26 ; 020135FE v_readlane_b32 s1, v254, 27 ; 020337FE s_nop 2 ; BF800002 v_cndmask_b32_e64 v252, 0, 1.0, s[0:1] ; D20000FC 0001E480 v_cndmask_b32_e64 v253, 0, 1.0, vcc ; D20000FD 01A9E480 v_readlane_b32 s0, v254, 32 ; 020141FE v_readlane_b32 s1, v254, 33 ; 020343FE s_nop 2 ; BF800002 v_cndmask_b32_e64 v3, 0, 1.0, s[0:1] ; D2000003 0001E480 v_cmp_eq_f32_e64 s[4:5], v23, v28 ; D0040004 00023917 v_cndmask_b32_e64 v23, 0, 1.0, s[4:5] ; D2000017 0011E480 v_readlane_b32 s0, v254, 22 ; 02012DFE v_readlane_b32 s1, v254, 23 ; 02032FFE s_nop 2 ; BF800002 v_cndmask_b32_e64 v4, 0, 1.0, s[0:1] ; D2000004 0001E480 v_readlane_b32 s0, v254, 24 ; 020131FE v_readlane_b32 s1, v254, 25 ; 020333FE s_nop 2 ; BF800002 v_cndmask_b32_e64 v0, 0, 1.0, s[0:1] ; D2000000 0001E480 v_readlane_b32 s0, v254, 30 ; 02013DFE v_readlane_b32 s1, v254, 31 ; 02033FFE s_nop 2 ; BF800002 v_cndmask_b32_e64 v2, 0, 1.0, s[0:1] ; D2000002 0001E480 v_mul_f32_e32 v71, v253, v71 ; 108E8FFD v_mul_f32_e32 v72, v253, v72 ; 109091FD v_mul_f32_e32 v73, v253, v73 ; 109293FD v_mac_f32_e32 v71, v252, v68 ; 3E8E89FC v_mac_f32_e32 v72, v252, v69 ; 3E908BFC v_mac_f32_e32 v73, v252, v70 ; 3E928DFC v_mac_f32_e32 v71, v31, v65 ; 3E8E831F v_mac_f32_e32 v72, v31, v66 ; 3E90851F v_mac_f32_e32 v73, v31, v67 ; 3E92871F v_mac_f32_e32 v71, v18, v62 ; 3E8E7D12 v_mac_f32_e32 v72, v18, v63 ; 3E907F12 v_mac_f32_e32 v73, v18, v64 ; 3E928112 v_mac_f32_e32 v71, v25, v59 ; 3E8E7719 v_mac_f32_e32 v72, v25, v60 ; 3E907919 v_mac_f32_e32 v73, v25, v61 ; 3E927B19 v_mul_f32_e32 v59, v253, v86 ; 1076ADFD v_mul_f32_e32 v60, v253, v87 ; 1078AFFD v_mul_f32_e32 v61, v253, v88 ; 107AB1FD v_mac_f32_e32 v59, v252, v83 ; 3E76A7FC v_mac_f32_e32 v60, v252, v84 ; 3E78A9FC v_mac_f32_e32 v61, v252, v85 ; 3E7AABFC v_mac_f32_e32 v59, v31, v80 ; 3E76A11F v_mac_f32_e32 v60, v31, v81 ; 3E78A31F v_mac_f32_e32 v61, v31, v82 ; 3E7AA51F v_mac_f32_e32 v59, v18, v77 ; 3E769B12 v_mac_f32_e32 v60, v18, v78 ; 3E789D12 v_mac_f32_e32 v61, v18, v79 ; 3E7A9F12 v_mac_f32_e32 v59, v25, v74 ; 3E769519 v_mac_f32_e32 v60, v25, v75 ; 3E789719 v_mac_f32_e32 v61, v25, v76 ; 3E7A9919 v_mul_f32_e32 v62, v253, v101 ; 107CCBFD v_mul_f32_e32 v63, v253, v102 ; 107ECDFD v_mul_f32_e32 v64, v253, v103 ; 1080CFFD v_mac_f32_e32 v62, v252, v98 ; 3E7CC5FC v_mac_f32_e32 v63, v252, v99 ; 3E7EC7FC v_mac_f32_e32 v64, v252, v100 ; 3E80C9FC v_mac_f32_e32 v62, v31, v95 ; 3E7CBF1F v_mac_f32_e32 v63, v31, v96 ; 3E7EC11F v_mac_f32_e32 v64, v31, v97 ; 3E80C31F v_mac_f32_e32 v62, v18, v92 ; 3E7CB912 v_mac_f32_e32 v63, v18, v93 ; 3E7EBB12 v_mac_f32_e32 v64, v18, v94 ; 3E80BD12 v_mac_f32_e32 v62, v25, v89 ; 3E7CB319 v_mac_f32_e32 v63, v25, v90 ; 3E7EB519 v_mac_f32_e32 v64, v25, v91 ; 3E80B719 v_mul_f32_e32 v65, v2, v116 ; 1082E902 v_mul_f32_e32 v66, v2, v117 ; 1084EB02 v_mul_f32_e32 v67, v2, v118 ; 1086ED02 v_mac_f32_e32 v65, v0, v113 ; 3E82E300 v_mac_f32_e32 v66, v0, v114 ; 3E84E500 v_mac_f32_e32 v67, v0, v115 ; 3E86E700 v_mac_f32_e32 v65, v4, v110 ; 3E82DD04 v_mac_f32_e32 v66, v4, v111 ; 3E84DF04 v_mac_f32_e32 v67, v4, v112 ; 3E86E104 v_mac_f32_e32 v65, v23, v107 ; 3E82D717 v_mac_f32_e32 v66, v23, v108 ; 3E84D917 v_mac_f32_e32 v67, v23, v109 ; 3E86DB17 v_mac_f32_e32 v65, v3, v104 ; 3E82D103 v_mac_f32_e32 v66, v3, v105 ; 3E84D303 v_mac_f32_e32 v67, v3, v106 ; 3E86D503 v_mul_f32_e32 v68, v2, v131 ; 10890702 v_mul_f32_e32 v69, v2, v132 ; 108B0902 v_mul_f32_e32 v70, v2, v133 ; 108D0B02 v_mac_f32_e32 v68, v0, v128 ; 3E890100 v_mac_f32_e32 v69, v0, v129 ; 3E8B0300 v_mac_f32_e32 v70, v0, v130 ; 3E8D0500 v_mac_f32_e32 v68, v4, v125 ; 3E88FB04 v_mac_f32_e32 v69, v4, v126 ; 3E8AFD04 v_mac_f32_e32 v70, v4, v127 ; 3E8CFF04 v_mac_f32_e32 v68, v23, v122 ; 3E88F517 v_mac_f32_e32 v69, v23, v123 ; 3E8AF717 v_mac_f32_e32 v70, v23, v124 ; 3E8CF917 v_mac_f32_e32 v68, v3, v119 ; 3E88EF03 v_mac_f32_e32 v69, v3, v120 ; 3E8AF103 v_mac_f32_e32 v70, v3, v121 ; 3E8CF303 v_mul_f32_e32 v74, v2, v146 ; 10952502 v_mul_f32_e32 v75, v2, v147 ; 10972702 v_mul_f32_e32 v76, v2, v148 ; 10992902 v_mac_f32_e32 v74, v0, v143 ; 3E951F00 v_mac_f32_e32 v75, v0, v144 ; 3E972100 v_mac_f32_e32 v76, v0, v145 ; 3E992300 v_mac_f32_e32 v74, v4, v140 ; 3E951904 v_mac_f32_e32 v75, v4, v141 ; 3E971B04 v_mac_f32_e32 v76, v4, v142 ; 3E991D04 v_mac_f32_e32 v74, v23, v137 ; 3E951317 v_mac_f32_e32 v75, v23, v138 ; 3E971517 v_mac_f32_e32 v76, v23, v139 ; 3E991717 v_mac_f32_e32 v74, v3, v134 ; 3E950D03 v_mac_f32_e32 v75, v3, v135 ; 3E970F03 v_mac_f32_e32 v76, v3, v136 ; 3E991103 v_readlane_b32 s0, v254, 20 ; 020129FE v_readlane_b32 s1, v254, 21 ; 02032BFE s_nop 2 ; BF800002 v_cndmask_b32_e64 v77, 0, 1.0, s[0:1] ; D200004D 0001E480 v_mul_f32_e32 v78, v77, v161 ; 109D434D v_mul_f32_e32 v79, v77, v162 ; 109F454D v_mul_f32_e32 v80, v77, v163 ; 10A1474D v_readlane_b32 s0, v254, 18 ; 020125FE v_readlane_b32 s1, v254, 19 ; 020327FE s_nop 2 ; BF800002 v_cndmask_b32_e64 v81, 0, 1.0, s[0:1] ; D2000051 0001E480 v_mac_f32_e32 v78, v81, v158 ; 3E9D3D51 v_mac_f32_e32 v79, v81, v159 ; 3E9F3F51 v_mac_f32_e32 v80, v81, v160 ; 3EA14151 v_readlane_b32 s0, v254, 16 ; 020121FE v_readlane_b32 s1, v254, 17 ; 020323FE s_nop 2 ; BF800002 v_cndmask_b32_e64 v82, 0, 1.0, s[0:1] ; D2000052 0001E480 v_mac_f32_e32 v78, v82, v155 ; 3E9D3752 v_mac_f32_e32 v79, v82, v156 ; 3E9F3952 v_mac_f32_e32 v80, v82, v157 ; 3EA13B52 v_cmp_eq_f32_e64 s[0:1], v24, v28 ; D0040000 00023918 v_cndmask_b32_e64 v24, 0, 1.0, s[0:1] ; D2000018 0001E480 v_mac_f32_e32 v78, v24, v152 ; 3E9D3118 v_mac_f32_e32 v79, v24, v153 ; 3E9F3318 v_mac_f32_e32 v80, v24, v154 ; 3EA13518 v_readlane_b32 s0, v254, 14 ; 02011DFE v_readlane_b32 s1, v254, 15 ; 02031FFE s_nop 2 ; BF800002 v_cndmask_b32_e64 v83, 0, 1.0, s[0:1] ; D2000053 0001E480 v_mac_f32_e32 v78, v83, v149 ; 3E9D2B53 v_mac_f32_e32 v79, v83, v150 ; 3E9F2D53 v_mac_f32_e32 v80, v83, v151 ; 3EA12F53 v_mul_f32_e32 v84, v77, v176 ; 10A9614D v_mul_f32_e32 v85, v77, v177 ; 10AB634D v_mul_f32_e32 v86, v77, v178 ; 10AD654D v_mac_f32_e32 v84, v81, v173 ; 3EA95B51 v_mac_f32_e32 v85, v81, v174 ; 3EAB5D51 v_mac_f32_e32 v86, v81, v175 ; 3EAD5F51 v_mac_f32_e32 v84, v82, v170 ; 3EA95552 v_mac_f32_e32 v85, v82, v171 ; 3EAB5752 v_mac_f32_e32 v86, v82, v172 ; 3EAD5952 v_mac_f32_e32 v84, v24, v167 ; 3EA94F18 v_mac_f32_e32 v85, v24, v168 ; 3EAB5118 v_mac_f32_e32 v86, v24, v169 ; 3EAD5318 v_mac_f32_e32 v84, v83, v164 ; 3EA94953 v_mac_f32_e32 v85, v83, v165 ; 3EAB4B53 v_mac_f32_e32 v86, v83, v166 ; 3EAD4D53 v_mul_f32_e32 v87, v77, v191 ; 10AF7F4D v_mul_f32_e32 v88, v77, v192 ; 10B1814D v_mul_f32_e32 v89, v77, v193 ; 10B3834D v_mac_f32_e32 v87, v81, v188 ; 3EAF7951 v_mac_f32_e32 v88, v81, v189 ; 3EB17B51 v_mac_f32_e32 v89, v81, v190 ; 3EB37D51 v_mac_f32_e32 v87, v82, v185 ; 3EAF7352 v_mac_f32_e32 v88, v82, v186 ; 3EB17552 v_mac_f32_e32 v89, v82, v187 ; 3EB37752 v_mac_f32_e32 v87, v24, v182 ; 3EAF6D18 v_mac_f32_e32 v88, v24, v183 ; 3EB16F18 v_mac_f32_e32 v89, v24, v184 ; 3EB37118 v_mac_f32_e32 v87, v83, v179 ; 3EAF6753 v_mac_f32_e32 v88, v83, v180 ; 3EB16953 v_mac_f32_e32 v89, v83, v181 ; 3EB36B53 v_mul_f32_e32 v43, v253, v43 ; 105657FD v_mul_f32_e32 v44, v253, v44 ; 105859FD v_mac_f32_e32 v43, v252, v198 ; 3E578DFC v_mac_f32_e32 v44, v252, v199 ; 3E598FFC v_mul_f32_e32 v32, v253, v32 ; 104041FD v_mul_f32_e32 v33, v253, v33 ; 104243FD v_mac_f32_e32 v32, v252, v204 ; 3E4199FC v_mac_f32_e32 v33, v252, v205 ; 3E439BFC v_mul_f32_e32 v34, v253, v34 ; 104445FD v_mul_f32_e32 v35, v253, v35 ; 104647FD v_mac_f32_e32 v34, v252, v212 ; 3E45A9FC v_mac_f32_e32 v35, v252, v213 ; 3E47ABFC v_mac_f32_e32 v43, v31, v196 ; 3E57891F v_mac_f32_e32 v44, v31, v197 ; 3E598B1F v_mac_f32_e32 v32, v31, v202 ; 3E41951F v_mac_f32_e32 v33, v31, v203 ; 3E43971F v_mac_f32_e32 v34, v31, v210 ; 3E45A51F v_mac_f32_e32 v35, v31, v211 ; 3E47A71F v_mac_f32_e32 v43, v18, v194 ; 3E578512 v_mac_f32_e32 v44, v18, v195 ; 3E598712 v_mac_f32_e32 v32, v18, v200 ; 3E419112 v_mac_f32_e32 v33, v18, v201 ; 3E439312 v_mac_f32_e32 v34, v18, v208 ; 3E45A112 v_mac_f32_e32 v35, v18, v209 ; 3E47A312 v_mac_f32_e32 v43, v25, v38 ; 3E564D19 v_mac_f32_e32 v44, v25, v39 ; 3E584F19 v_mac_f32_e32 v32, v25, v45 ; 3E405B19 v_mac_f32_e32 v33, v25, v46 ; 3E425D19 v_mac_f32_e32 v34, v25, v206 ; 3E459D19 v_mac_f32_e32 v35, v25, v207 ; 3E479F19 v_mul_f32_e32 v18, v2, v51 ; 10246702 v_mul_f32_e32 v25, v2, v52 ; 10326902 v_mac_f32_e32 v18, v0, v218 ; 3E25B500 v_mac_f32_e32 v25, v0, v219 ; 3E33B700 v_mul_f32_e32 v29, v2, v29 ; 103A3B02 v_mul_f32_e32 v30, v2, v30 ; 103C3D02 v_mac_f32_e32 v29, v0, v224 ; 3E3BC100 v_mac_f32_e32 v30, v0, v225 ; 3E3DC300 s_waitcnt vmcnt(13) ; BF8C077D v_mul_f32_e32 v31, v2, v47 ; 103E5F02 v_mul_f32_e32 v2, v2, v48 ; 10046102 v_mac_f32_e32 v31, v0, v232 ; 3E3FD100 v_mac_f32_e32 v2, v0, v233 ; 3E05D300 v_mac_f32_e32 v18, v4, v216 ; 3E25B104 v_mac_f32_e32 v25, v4, v217 ; 3E33B304 v_mac_f32_e32 v29, v4, v222 ; 3E3BBD04 v_mac_f32_e32 v30, v4, v223 ; 3E3DBF04 v_mac_f32_e32 v31, v4, v230 ; 3E3FCD04 v_mac_f32_e32 v2, v4, v231 ; 3E05CF04 v_mac_f32_e32 v18, v23, v214 ; 3E25AD17 v_mac_f32_e32 v25, v23, v215 ; 3E33AF17 v_mac_f32_e32 v29, v23, v220 ; 3E3BB917 v_mac_f32_e32 v30, v23, v221 ; 3E3DBB17 v_mac_f32_e32 v31, v23, v228 ; 3E3FC917 v_mac_f32_e32 v2, v23, v229 ; 3E05CB17 v_mac_f32_e32 v18, v3, v36 ; 3E244903 v_mac_f32_e32 v25, v3, v37 ; 3E324B03 v_mac_f32_e32 v29, v3, v53 ; 3E3A6B03 v_mac_f32_e32 v30, v3, v54 ; 3E3C6D03 v_mac_f32_e32 v31, v3, v226 ; 3E3FC503 v_mac_f32_e32 v2, v3, v227 ; 3E05C703 s_waitcnt vmcnt(8) ; BF8C0778 v_mul_f32_e32 v0, v77, v55 ; 10006F4D v_mul_f32_e32 v3, v77, v56 ; 1006714D v_mac_f32_e32 v0, v81, v238 ; 3E01DD51 v_mac_f32_e32 v3, v81, v239 ; 3E07DF51 s_waitcnt vmcnt(3) ; BF8C0773 v_mul_f32_e32 v4, v77, v26 ; 1008354D v_mul_f32_e32 v23, v77, v27 ; 102E374D v_mac_f32_e32 v4, v81, v244 ; 3E09E951 v_mac_f32_e32 v23, v81, v245 ; 3E2FEB51 image_sample_l v[26:27], 10, 0, 0, 0, 0, 0, 0, 0, v[40:43], s[24:31], s[12:15] ; F0900A00 00661A28 image_sample_l v[36:37], 10, 0, 0, 0, 0, 0, 0, 0, v[40:43], s[16:23], s[8:11] ; F0900A00 00442428 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v36, v77, v36 ; 1048494D v_mul_f32_e32 v37, v77, v37 ; 104A4B4D v_mac_f32_e32 v36, v81, v26 ; 3E483551 v_mac_f32_e32 v37, v81, v27 ; 3E4A3751 v_mac_f32_e32 v0, v82, v236 ; 3E01D952 v_mac_f32_e32 v3, v82, v237 ; 3E07DB52 v_mac_f32_e32 v4, v82, v242 ; 3E09E552 v_mac_f32_e32 v23, v82, v243 ; 3E2FE752 v_mac_f32_e32 v36, v82, v250 ; 3E49F552 v_mac_f32_e32 v37, v82, v251 ; 3E4BF752 v_mac_f32_e32 v0, v24, v234 ; 3E01D518 v_mac_f32_e32 v3, v24, v235 ; 3E07D718 v_mac_f32_e32 v4, v24, v240 ; 3E09E118 v_mac_f32_e32 v23, v24, v241 ; 3E2FE318 v_mac_f32_e32 v36, v24, v248 ; 3E49F118 v_mac_f32_e32 v37, v24, v249 ; 3E4BF318 v_mac_f32_e32 v0, v83, v49 ; 3E006353 v_mac_f32_e32 v3, v83, v50 ; 3E066553 v_mul_f32_e32 v21, v22, v21 ; 102A2B16 v_mul_f32_e32 v20, v22, v20 ; 10282916 v_mul_f32_e32 v19, v22, v19 ; 10262716 v_mov_b32_e32 v22, 0x3c23d70a ; 7E2C02FF 3C23D70A v_max_f32_e32 v21, v22, v21 ; 202A2B16 v_max_f32_e32 v20, v22, v20 ; 20282916 v_max_f32_e32 v19, v22, v19 ; 20262716 v_add_f32_e32 v22, v20, v21 ; 062C2B14 v_add_f32_e32 v22, v19, v22 ; 062C2D13 v_rcp_f32_e32 v22, v22 ; 7E2C5516 v_mac_f32_e32 v4, v83, v57 ; 3E087353 v_mac_f32_e32 v23, v83, v58 ; 3E2E7553 v_mac_f32_e32 v36, v83, v246 ; 3E49ED53 v_mac_f32_e32 v37, v83, v247 ; 3E4BEF53 v_mul_f32_e32 v21, v22, v21 ; 102A2B16 v_mul_f32_e32 v20, v22, v20 ; 10282916 v_mul_f32_e32 v19, v22, v19 ; 10262716 v_mul_f32_e32 v22, v19, v78 ; 102C9D13 v_mac_f32_e32 v22, v21, v84 ; 3E2CA915 v_mul_f32_e32 v24, v19, v79 ; 10309F13 v_mac_f32_e32 v24, v21, v85 ; 3E30AB15 v_mul_f32_e32 v26, v19, v80 ; 1034A113 v_mac_f32_e32 v26, v21, v86 ; 3E34AD15 v_mac_f32_e32 v22, v20, v87 ; 3E2CAF14 v_mac_f32_e32 v24, v20, v88 ; 3E30B114 v_mac_f32_e32 v26, v20, v89 ; 3E34B314 v_mul_f32_e32 v27, v19, v65 ; 10368313 v_mac_f32_e32 v27, v21, v68 ; 3E368915 v_mul_f32_e32 v38, v19, v66 ; 104C8513 v_mac_f32_e32 v38, v21, v69 ; 3E4C8B15 v_mul_f32_e32 v39, v19, v67 ; 104E8713 v_mac_f32_e32 v39, v21, v70 ; 3E4E8D15 v_mac_f32_e32 v27, v20, v74 ; 3E369514 v_mac_f32_e32 v38, v20, v75 ; 3E4C9714 v_mac_f32_e32 v39, v20, v76 ; 3E4E9914 v_mul_f32_e32 v40, v19, v71 ; 10508F13 v_mac_f32_e32 v40, v21, v59 ; 3E507715 v_mul_f32_e32 v41, v19, v72 ; 10529113 v_mac_f32_e32 v41, v21, v60 ; 3E527915 v_mul_f32_e32 v42, v19, v73 ; 10549313 v_mac_f32_e32 v42, v21, v61 ; 3E547B15 v_mac_f32_e32 v40, v20, v62 ; 3E507D14 v_mac_f32_e32 v41, v20, v63 ; 3E527F14 v_mac_f32_e32 v42, v20, v64 ; 3E548114 v_mad_f32 v43, 2.0, v43, -1.0 ; D282002B 03CE56F4 v_mad_f32 v33, 2.0, v33, -1.0 ; D2820021 03CE42F4 v_mul_f32_e32 v43, v21, v43 ; 10565715 v_mac_f32_e32 v43, v20, v33 ; 3E564314 v_mad_f32 v32, 2.0, v32, -1.0 ; D2820020 03CE40F4 v_mad_f32 v33, 2.0, v35, -1.0 ; D2820021 03CE46F4 v_mul_f32_e32 v35, 0, v21 ; 10462A80 v_mad_f32 v32, v20, v32, v35 ; D2820020 048E4114 v_mac_f32_e32 v32, v19, v33 ; 3E404313 v_mad_f32 v33, 2.0, v44, -1.0 ; D2820021 03CE58F4 v_mad_f32 v34, 2.0, v34, -1.0 ; D2820022 03CE44F4 v_mul_f32_e32 v33, v21, v33 ; 10424315 v_mac_f32_e32 v33, 0, v20 ; 3E422880 v_mac_f32_e32 v33, v19, v34 ; 3E424513 v_mad_f32 v18, 2.0, v18, -1.0 ; D2820012 03CE24F4 v_mad_f32 v30, 2.0, v30, -1.0 ; D282001E 03CE3CF4 v_mul_f32_e32 v18, v21, v18 ; 10242515 v_mac_f32_e32 v18, v20, v30 ; 3E243D14 v_mad_f32 v29, 2.0, v29, -1.0 ; D282001D 03CE3AF4 v_mad_f32 v2, 2.0, v2, -1.0 ; D2820002 03CE04F4 v_mad_f32 v29, v20, v29, v35 ; D282001D 048E3B14 v_mac_f32_e32 v29, v19, v2 ; 3E3A0513 v_mad_f32 v2, 2.0, v25, -1.0 ; D2820002 03CE32F4 v_mad_f32 v25, 2.0, v31, -1.0 ; D2820019 03CE3EF4 v_mul_f32_e32 v2, v21, v2 ; 10040515 v_mac_f32_e32 v2, 0, v20 ; 3E042880 v_mac_f32_e32 v2, v19, v25 ; 3E043313 v_mad_f32 v3, 2.0, v3, -1.0 ; D2820003 03CE06F4 v_mad_f32 v0, 2.0, v0, -1.0 ; D2820000 03CE00F4 v_mul_f32_e32 v3, v21, v3 ; 10060715 v_mul_f32_e32 v0, v21, v0 ; 10000115 v_mad_f32 v4, 2.0, v4, -1.0 ; D2820004 03CE08F4 v_mac_f32_e32 v35, v20, v4 ; 3E460914 v_mad_f32 v4, 2.0, v23, -1.0 ; D2820004 03CE2EF4 v_mac_f32_e32 v0, v20, v4 ; 3E000914 v_mac_f32_e32 v3, 0, v20 ; 3E062880 v_mad_f32 v4, 2.0, v37, -1.0 ; D2820004 03CE4AF4 v_mac_f32_e32 v35, v19, v4 ; 3E460913 v_mad_f32 v4, 2.0, v36, -1.0 ; D2820004 03CE48F4 v_mac_f32_e32 v3, v19, v4 ; 3E060913 v_mac_f32_e32 v43, 0, v19 ; 3E562680 v_mac_f32_e32 v18, 0, v19 ; 3E242680 v_mac_f32_e32 v0, 0, v19 ; 3E002680 v_mul_f32_e32 v4, v40, v8 ; 10081128 v_mul_f32_e32 v19, v41, v8 ; 10261129 v_mul_f32_e32 v20, v42, v8 ; 1028112A v_mul_f32_e32 v21, v32, v8 ; 102A1120 v_mul_f32_e32 v23, v33, v8 ; 102E1121 v_mul_f32_e32 v8, v43, v8 ; 1010112B v_mac_f32_e32 v21, v29, v7 ; 3E2A0F1D v_mac_f32_e32 v23, v2, v7 ; 3E2E0F02 v_mac_f32_e32 v8, v18, v7 ; 3E100F12 v_mac_f32_e32 v21, v35, v6 ; 3E2A0D23 v_mac_f32_e32 v23, v3, v6 ; 3E2E0D03 v_mac_f32_e32 v8, v0, v6 ; 3E100D00 v_mul_f32_e32 v0, v23, v23 ; 10002F17 v_mac_f32_e32 v0, v21, v21 ; 3E002B15 v_mac_f32_e32 v0, v8, v8 ; 3E001108 v_add_f32_e32 v0, 1.0, v0 ; 060000F2 v_rsq_clamp_f32_e32 v0, v0 ; 7E005900 v_mac_f32_e32 v4, v27, v7 ; 3E080F1B v_mac_f32_e32 v19, v38, v7 ; 3E260F26 v_mac_f32_e32 v20, v39, v7 ; 3E280F27 v_mul_f32_e32 v2, v0, v21 ; 10042B00 v_mul_f32_e32 v3, v0, v23 ; 10062F00 v_mul_f32_e32 v0, v0, v8 ; 10001100 v_mad_f32 v2, -v2, v5, v9 ; D2820002 24260B02 v_mad_f32 v3, -v3, v5, v10 ; D2820003 242A0B03 v_mad_f32 v0, -v0, v5, v11 ; D2820000 242E0B00 v_mac_f32_e32 v4, v22, v6 ; 3E080D16 v_mac_f32_e32 v19, v24, v6 ; 3E260D18 v_mul_f32_e32 v5, v2, v2 ; 100A0502 v_mac_f32_e32 v5, v3, v3 ; 3E0A0703 v_mac_f32_e32 v5, v0, v0 ; 3E0A0100 v_rsq_clamp_f32_e32 v5, v5 ; 7E0A5905 v_mul_f32_e32 v7, v16, v16 ; 100E2110 v_mac_f32_e32 v7, v17, v17 ; 3E0E2311 v_mac_f32_e32 v7, v12, v12 ; 3E0E190C v_rsq_clamp_f32_e32 v7, v7 ; 7E0E5907 v_mac_f32_e32 v20, v26, v6 ; 3E280D1A v_mul_f32_e32 v2, v5, v2 ; 10040505 v_mul_f32_e32 v3, v5, v3 ; 10060705 v_mul_f32_e32 v6, v7, v16 ; 100C2107 v_mul_f32_e32 v8, v7, v17 ; 10102307 v_mul_f32_e32 v6, v6, v2 ; 100C0506 v_mac_f32_e32 v6, v8, v3 ; 3E0C0708 v_mul_f32_e32 v2, v15, v2 ; 1004050F v_mac_f32_e32 v2, v14, v3 ; 3E04070E v_mul_f32_e32 v0, v5, v0 ; 10000105 v_mul_f32_e32 v3, v7, v12 ; 10061907 v_mac_f32_e32 v6, v3, v0 ; 3E0C0103 v_mac_f32_e32 v2, v13, v0 ; 3E04010D v_readlane_b32 s0, v254, 13 ; 02011BFE s_nop 2 ; BF800002 v_add_f32_e64 v0, s0, s0 ; D2060000 00000000 v_readlane_b32 s0, v254, 8 ; 020111FE s_nop 2 ; BF800002 v_max_f32_e32 v0, s0, v0 ; 20000000 v_readlane_b32 s0, v254, 11 ; 020117FE s_nop 2 ; BF800002 v_add_f32_e64 v3, s0, s0 ; D2060003 00000000 v_readlane_b32 s0, v254, 9 ; 020113FE s_nop 2 ; BF800002 v_max_f32_e32 v3, s0, v3 ; 20060600 v_readlane_b32 s0, v254, 12 ; 020119FE s_nop 2 ; BF800002 v_add_f32_e64 v5, s0, s0 ; D2060005 00000000 v_readlane_b32 s0, v254, 10 ; 020115FE s_nop 2 ; BF800002 v_max_f32_e32 v5, s0, v5 ; 200A0A00 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_min_f32_e32 v0, 1.0, v0 ; 1E0000F2 v_mul_f32_e32 v0, v4, v0 ; 10000104 v_readlane_b32 s1, v254, 46 ; 02035DFE s_nop 2 ; BF800002 v_mul_f32_e32 v7, s1, v4 ; 100E0801 v_mac_f32_e32 v0, v2, v7 ; 3E000F02 v_min_f32_e32 v3, 1.0, v3 ; 1E0606F2 v_mul_f32_e32 v3, v19, v3 ; 10060713 v_readlane_b32 s2, v254, 47 ; 02055FFE s_nop 2 ; BF800002 v_mul_f32_e32 v7, s2, v19 ; 100E2602 v_mac_f32_e32 v3, v2, v7 ; 3E060F02 v_max_f32_e32 v6, 0x38d1b717, v6 ; 200C0CFF 38D1B717 v_log_f32_e32 v6, v6 ; 7E0C4F06 v_min_f32_e32 v5, 1.0, v5 ; 1E0A0AF2 v_mul_f32_e32 v5, v20, v5 ; 100A0B14 v_readlane_b32 s0, v254, 7 ; 02010FFE s_nop 2 ; BF800002 v_mul_f32_e32 v7, s0, v20 ; 100E2800 v_mac_f32_e32 v5, v2, v7 ; 3E0A0F02 s_mov_b32 s4, SCRATCH_RSRC_DWORD0 ; BE8403FF 00000000 s_mov_b32 s5, SCRATCH_RSRC_DWORD1 ; BE8503FF 00000000 s_mov_b32 s6, -1 ; BE8603C1 s_mov_b32 s7, 0x80f000 ; BE8703FF 0080F000 buffer_load_dword v7, s[4:7], s12 ; E0300000 0C010700 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v2, 0x42000000, v7 ; 10040EFF 42000000 v_mul_legacy_f32_e32 v2, v2, v6 ; 0E040D02 v_exp_f32_e32 v2, v2 ; 7E044B02 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_mac_f32_e32 v28, -2.0, v2 ; 3E3804F5 v_mul_f32_e32 v6, v28, v2 ; 100C051C v_mul_f32_e32 v2, v6, v2 ; 10040506 v_mul_f32_e32 v2, v7, v2 ; 10040507 v_mac_f32_e32 v0, s1, v2 ; 3E000401 v_mac_f32_e32 v3, s2, v2 ; 3E060402 v_mac_f32_e32 v5, s0, v2 ; 3E0A0400 v_mul_f32_e32 v0, 0.5, v0 ; 100000F0 v_mul_f32_e32 v2, 0.5, v3 ; 100406F0 v_mul_f32_e32 v3, 0.5, v5 ; 10060AF0 s_mov_b32 s4, SCRATCH_RSRC_DWORD0 ; BE8403FF 00000000 s_mov_b32 s5, SCRATCH_RSRC_DWORD1 ; BE8503FF 00000000 s_mov_b32 s6, -1 ; BE8603C1 s_mov_b32 s7, 0x80f000 ; BE8703FF 0080F000 buffer_load_dword v5, s[4:7], s12 offset:4 ; E0300004 0C010500 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v0, v5, v4 ; 3E000905 s_mov_b32 s4, SCRATCH_RSRC_DWORD0 ; BE8403FF 00000000 s_mov_b32 s5, SCRATCH_RSRC_DWORD1 ; BE8503FF 00000000 s_mov_b32 s6, -1 ; BE8603C1 s_mov_b32 s7, 0x80f000 ; BE8703FF 0080F000 buffer_load_dword v4, s[4:7], s12 offset:8 ; E0300008 0C010400 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v2, v4, v19 ; 3E042704 s_mov_b32 s4, SCRATCH_RSRC_DWORD0 ; BE8403FF 00000000 s_mov_b32 s5, SCRATCH_RSRC_DWORD1 ; BE8503FF 00000000 s_mov_b32 s6, -1 ; BE8603C1 s_mov_b32 s7, 0x80f000 ; BE8703FF 0080F000 buffer_load_dword v4, s[4:7], s12 offset:12 ; E030000C 0C010400 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v3, v4, v20 ; 3E062904 v_sub_f32_e32 v4, 1.0, v1 ; 080802F2 v_readlane_b32 s0, v254, 6 ; 02010DFE s_nop 2 ; BF800002 v_mul_f32_e32 v5, s0, v4 ; 100A0800 v_mac_f32_e32 v5, v0, v1 ; 3E0A0300 v_readlane_b32 s0, v254, 5 ; 02010BFE s_nop 2 ; BF800002 v_mul_f32_e32 v0, s0, v4 ; 10000800 v_mac_f32_e32 v0, v2, v1 ; 3E000302 v_readlane_b32 s0, v254, 4 ; 020109FE s_nop 2 ; BF800002 v_mul_f32_e32 v2, s0, v4 ; 10040800 v_mac_f32_e32 v2, v3, v1 ; 3E040303 v_cvt_pkrtz_f16_f32_e32 v0, v5, v0 ; 5E000105 v_cvt_pkrtz_f16_f32_e64 v1, v2, 1.0 ; D25E0001 0001E502 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 104 VGPRS: 256 Code Size: 5240 bytes LDS: 0 blocks Scratch: 18432 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..9] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 0.5000, -1.0000, 1.0000, 0.0000} 0: MUL TEMP[0], CONST[2], IN[0].xxxx 1: MAD TEMP[0], CONST[3], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[4], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[5], IN[0].wwww, TEMP[0] 4: MUL TEMP[1].xyw, TEMP[0], IMM[0].xxxx 5: MOV TEMP[2].x, TEMP[1].xxxx 6: MUL TEMP[3].x, TEMP[1].yyyy, CONST[0].xxxx 7: MOV TEMP[2].y, TEMP[3].xxxx 8: ADD TEMP[1].xy, TEMP[2].xyyy, TEMP[1].wwww 9: MOV TEMP[1].zw, TEMP[0].wwzw 10: MUL TEMP[2], CONST[6], IN[0].xxxx 11: MAD TEMP[2], CONST[7], IN[0].yyyy, TEMP[2] 12: MAD TEMP[2], CONST[8], IN[0].zzzz, TEMP[2] 13: MAD TEMP[2].xyz, CONST[9], IN[0].wwww, TEMP[2] 14: MUL TEMP[2].xyz, TEMP[2].xyzz, IMM[0].yyzz 15: LRP TEMP[2].xyz, CONST[1].xxxx, IN[1].xyzz, TEMP[2].xyzz 16: MOV OUT[1], TEMP[1] 17: MOV OUT[2], TEMP[2] 18: MOV OUT[0], TEMP[0] 19: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %46 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %47 = load <16 x i8>, <16 x i8> addrspace(2)* %46, align 16, !tbaa !0 %48 = add i32 %5, %7 %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %48) %50 = extractelement <4 x float> %49, i32 0 %51 = extractelement <4 x float> %49, i32 1 %52 = extractelement <4 x float> %49, i32 2 %53 = extractelement <4 x float> %49, i32 3 %54 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %55 = load <16 x i8>, <16 x i8> addrspace(2)* %54, align 16, !tbaa !0 %56 = add i32 %5, %7 %57 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %55, i32 0, i32 %56) %58 = extractelement <4 x float> %57, i32 0 %59 = extractelement <4 x float> %57, i32 1 %60 = extractelement <4 x float> %57, i32 2 %61 = fmul float %15, %50 %62 = fmul float %16, %50 %63 = fmul float %17, %50 %64 = fmul float %18, %50 %65 = fmul float %19, %51 %66 = fadd float %65, %61 %67 = fmul float %20, %51 %68 = fadd float %67, %62 %69 = fmul float %21, %51 %70 = fadd float %69, %63 %71 = fmul float %22, %51 %72 = fadd float %71, %64 %73 = fmul float %23, %52 %74 = fadd float %73, %66 %75 = fmul float %24, %52 %76 = fadd float %75, %68 %77 = fmul float %25, %52 %78 = fadd float %77, %70 %79 = fmul float %26, %52 %80 = fadd float %79, %72 %81 = fmul float %27, %53 %82 = fadd float %81, %74 %83 = fmul float %28, %53 %84 = fadd float %83, %76 %85 = fmul float %29, %53 %86 = fadd float %85, %78 %87 = fmul float %30, %53 %88 = fadd float %87, %80 %89 = fmul float %82, 5.000000e-01 %90 = fmul float %84, 5.000000e-01 %91 = fmul float %88, 5.000000e-01 %92 = fmul float %90, %13 %93 = fadd float %89, %91 %94 = fadd float %92, %91 %95 = fmul float %31, %50 %96 = fmul float %32, %50 %97 = fmul float %33, %50 %98 = fmul float %34, %50 %99 = fmul float %35, %51 %100 = fadd float %99, %95 %101 = fmul float %36, %51 %102 = fadd float %101, %96 %103 = fmul float %37, %51 %104 = fadd float %103, %97 %105 = fmul float %38, %51 %106 = fadd float %105, %98 %107 = fmul float %39, %52 %108 = fadd float %107, %100 %109 = fmul float %40, %52 %110 = fadd float %109, %102 %111 = fmul float %41, %52 %112 = fadd float %111, %104 %113 = fmul float %42, %52 %114 = fadd float %113, %106 %115 = fmul float %43, %53 %116 = fadd float %115, %108 %117 = fmul float %44, %53 %118 = fadd float %117, %110 %119 = fmul float %45, %53 %120 = fadd float %119, %112 %121 = fsub float -0.000000e+00, %116 %122 = fsub float -0.000000e+00, %118 %123 = call float @llvm.AMDGPU.lrp(float %14, float %58, float %121) %124 = call float @llvm.AMDGPU.lrp(float %14, float %59, float %122) %125 = call float @llvm.AMDGPU.lrp(float %14, float %60, float %120) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %93, float %94, float %86, float %88) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %123, float %124, float %125, float %114) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %82, float %84, float %86, float %88) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[8:11], 0 idxen ; E00C2000 80020500 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_buffer_load_dword s5, s[0:3], 0xc ; C202810C s_buffer_load_dword s6, s[0:3], 0x9 ; C2030109 s_buffer_load_dword s7, s[0:3], 0xd ; C203810D s_buffer_load_dword s8, s[0:3], 0xa ; C204010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s4, v1 ; 10000204 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_buffer_load_dword s9, s[0:3], 0xf ; C204810F v_mac_f32_e32 v0, s5, v2 ; 3E000405 v_mul_f32_e32 v8, s6, v1 ; 10100206 v_mac_f32_e32 v8, s7, v2 ; 3E100407 s_buffer_load_dword s5, s[0:3], 0xb ; C202810B v_mul_f32_e32 v9, s8, v1 ; 10120208 s_buffer_load_dword s6, s[0:3], 0x18 ; C2030118 s_buffer_load_dword s7, s[0:3], 0x1c ; C203811C s_buffer_load_dword s8, s[0:3], 0x19 ; C2040119 s_buffer_load_dword s10, s[0:3], 0x1d ; C205011D s_buffer_load_dword s11, s[0:3], 0x1a ; C205811A s_buffer_load_dword s12, s[0:3], 0x1e ; C206011E s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v9, s4, v2 ; 3E120404 s_buffer_load_dword s4, s[0:3], 0x1b ; C202011B v_mul_f32_e32 v10, s5, v1 ; 10140205 v_mac_f32_e32 v10, s9, v2 ; 3E140409 v_mul_f32_e32 v11, s6, v1 ; 10160206 v_mac_f32_e32 v11, s7, v2 ; 3E160407 v_mul_f32_e32 v12, s8, v1 ; 10180208 v_mac_f32_e32 v12, s10, v2 ; 3E18040A v_mul_f32_e32 v13, s11, v1 ; 101A020B v_mac_f32_e32 v13, s12, v2 ; 3E1A040C s_buffer_load_dword s5, s[0:3], 0x1f ; C202811F s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s4, v1 ; 10020204 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_buffer_load_dword s6, s[0:3], 0x11 ; C2030111 s_buffer_load_dword s7, s[0:3], 0x12 ; C2038112 s_buffer_load_dword s8, s[0:3], 0x13 ; C2040113 s_buffer_load_dword s9, s[0:3], 0x20 ; C2048120 s_buffer_load_dword s10, s[0:3], 0x21 ; C2050121 s_buffer_load_dword s11, s[0:3], 0x22 ; C2058122 s_buffer_load_dword s12, s[0:3], 0x23 ; C2060123 v_mac_f32_e32 v1, s5, v2 ; 3E020405 s_buffer_load_dword s5, s[0:3], 0x14 ; C2028114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v0, s4, v3 ; 3E000604 v_mac_f32_e32 v8, s6, v3 ; 3E100606 v_mac_f32_e32 v9, s7, v3 ; 3E120607 v_mac_f32_e32 v10, s8, v3 ; 3E140608 v_mac_f32_e32 v11, s9, v3 ; 3E160609 v_mac_f32_e32 v12, s10, v3 ; 3E18060A s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_buffer_load_dword s6, s[0:3], 0x16 ; C2030116 s_buffer_load_dword s7, s[0:3], 0x17 ; C2038117 s_buffer_load_dword s8, s[0:3], 0x24 ; C2040124 s_buffer_load_dword s9, s[0:3], 0x25 ; C2048125 s_buffer_load_dword s10, s[0:3], 0x26 ; C2050126 v_mac_f32_e32 v13, s11, v3 ; 3E1A060B s_buffer_load_dword s11, s[0:3], 0x4 ; C2058104 v_mac_f32_e32 v1, s12, v3 ; 3E02060C v_mac_f32_e32 v0, s5, v4 ; 3E000805 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v8, s4, v4 ; 3E100804 v_mac_f32_e32 v9, s6, v4 ; 3E120806 v_mac_f32_e32 v10, s7, v4 ; 3E140807 v_mac_f32_e32 v11, s8, v4 ; 3E160808 v_mac_f32_e32 v12, s9, v4 ; 3E180809 v_mac_f32_e32 v13, s10, v4 ; 3E1A080A s_buffer_load_dword s0, s[0:3], 0x0 ; C2000100 v_sub_f32_e64 v2, 1.0, s11 ; D2080002 000016F2 v_mul_f32_e32 v3, v11, v2 ; 1006050B v_mul_f32_e32 v4, v12, v2 ; 1008050C v_mul_f32_e32 v2, v13, v2 ; 1004050D v_mad_f32 v3, s11, v5, -v3 ; D2820003 840E0A0B v_mad_f32 v4, s11, v6, -v4 ; D2820004 84120C0B v_mac_f32_e32 v2, s11, v7 ; 3E040E0B v_mul_f32_e32 v5, 0.5, v8 ; 100A10F0 v_mul_f32_e32 v6, 0.5, v10 ; 100C14F0 v_mad_f32 v7, 0.5, v0, v6 ; D2820007 041A00F0 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v6, s0, v5 ; 3E0C0A00 exp 15, 32, 0, 0, 0, v7, v6, v9, v10 ; F800020F 0A090607 exp 15, 33, 0, 0, 0, v3, v4, v2, v1 ; F800021F 01020403 exp 15, 12, 0, 1, 0, v0, v8, v9, v10 ; F80008CF 0A090800 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 16 Code Size: 408 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL CONST[0..4] DCL CONST[6..12] DCL TEMP[0..9], LOCAL IMM[0] FLT32 { 1.0000, 2.0000, -1.0000, 0.0000} IMM[1] FLT32 { 128.0000, 0.0000, 0.0000, 0.0000} 0: RCP TEMP[0].x, IN[0].wwww 1: MUL TEMP[0].xy, IN[0].xyyy, TEMP[0].xxxx 2: RCP TEMP[1].x, IN[1].zzzz 3: MUL TEMP[2].x, CONST[1].zzzz, TEMP[1].xxxx 4: MUL TEMP[2].xyz, IN[1].xyzz, TEMP[2].xxxx 5: MOV TEMP[3].xy, TEMP[0].xyyy 6: TEX TEMP[3].x, TEMP[3], SAMP[0], 2D 7: MAD TEMP[3].x, CONST[2].xxxx, TEMP[3].xxxx, CONST[2].yyyy 8: RCP TEMP[3].x, TEMP[3].xxxx 9: MUL TEMP[1].xyz, TEMP[2].xyzz, TEMP[3].xxxx 10: MUL TEMP[2], CONST[9], TEMP[1].xxxx 11: MAD TEMP[2], CONST[10], TEMP[1].yyyy, TEMP[2] 12: MAD TEMP[2], CONST[11], TEMP[1].zzzz, TEMP[2] 13: ADD TEMP[2].xyz, TEMP[2], CONST[12] 14: ADD TEMP[3].xyz, TEMP[2].xyzz, -CONST[3].xyzz 15: ADD TEMP[4].xyz, TEMP[2].xyzz, -CONST[6].xyzz 16: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz 17: RSQ TEMP[5].x, TEMP[5].xxxx 18: MUL TEMP[5].xyz, TEMP[4].xyzz, TEMP[5].xxxx 19: MOV TEMP[5].xyz, -TEMP[5].xyzx 20: DP3 TEMP[4].x, TEMP[4].xyzz, TEMP[4].xyzz 21: MUL TEMP[4].x, TEMP[4].xxxx, CONST[6].wwww 22: MOV TEMP[4].xy, TEMP[4].xxxx 23: TEX TEMP[4].w, TEMP[4], SAMP[1], 2D 24: MOV TEMP[0].xy, TEMP[0].xyyy 25: TEX TEMP[0], TEMP[0], SAMP[2], 2D 26: MAD TEMP[6].xyz, TEMP[0].xyzz, IMM[0].yyyy, IMM[0].zzzz 27: DP3 TEMP[7].x, TEMP[6].xyzz, TEMP[6].xyzz 28: RSQ TEMP[7].x, TEMP[7].xxxx 29: MUL TEMP[6].xyz, TEMP[6].xyzz, TEMP[7].xxxx 30: DP3 TEMP[7].x, TEMP[5].xyzz, TEMP[6].xyzz 31: MAX TEMP[7].x, IMM[0].wwww, TEMP[7].xxxx 32: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[4].wwww 33: MUL TEMP[7].xyz, CONST[7].xyzz, TEMP[7].xxxx 34: MUL TEMP[8].xyz, CONST[7].xyzz, CONST[4].xyzz 35: ADD TEMP[2].xyz, TEMP[2].xyzz, -CONST[0].xyzz 36: DP3 TEMP[9].x, TEMP[2].xyzz, TEMP[2].xyzz 37: RSQ TEMP[9].x, TEMP[9].xxxx 38: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[9].xxxx 39: ADD TEMP[2].xyz, TEMP[5].xyzz, -TEMP[2].xyzz 40: DP3 TEMP[5].x, TEMP[2].xyzz, TEMP[2].xyzz 41: RSQ TEMP[5].x, TEMP[5].xxxx 42: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[5].xxxx 43: DP3 TEMP[2].x, TEMP[2].xyzz, TEMP[6].xyzz 44: MAX TEMP[2].x, IMM[0].wwww, TEMP[2].xxxx 45: MUL TEMP[0].x, TEMP[0].wwww, IMM[1].xxxx 46: POW TEMP[0].x, TEMP[2].xxxx, TEMP[0].xxxx 47: MOV_SAT TEMP[2].x, TEMP[4].wwww 48: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[2].xxxx 49: ADD TEMP[2].x, TEMP[8].xxxx, TEMP[8].zzzz 50: MUL TEMP[2].x, TEMP[8].yyyy, TEMP[2].xxxx 51: SQRT TEMP[2].x, TEMP[2].xxxx 52: MUL TEMP[2].x, IMM[0].yyyy, TEMP[2].xxxx 53: ADD TEMP[4].x, TEMP[8].xxxx, TEMP[8].yyyy 54: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[8].zzzz 55: MAD TEMP[2].x, TEMP[2].xxxx, CONST[4].wwww, TEMP[4].xxxx 56: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[2].xxxx 57: MOV TEMP[7].w, TEMP[0].xxxx 58: DP3 TEMP[0].x, TEMP[3].xyzz, TEMP[3].xyzz 59: SQRT TEMP[0].x, TEMP[0].xxxx 60: LRP TEMP[0].x, CONST[3].wwww, TEMP[0].xxxx, TEMP[1].zzzz 61: MAD TEMP[0].x, TEMP[0].xxxx, CONST[8].zzzz, CONST[8].wwww 62: ADD TEMP[0].x, IMM[0].xxxx, -TEMP[0].xxxx 63: MOV_SAT TEMP[0].x, TEMP[0].xxxx 64: MUL TEMP[0], TEMP[7], TEMP[0].xxxx 65: EX2 TEMP[1].x, -TEMP[0].xxxx 66: EX2 TEMP[1].y, -TEMP[0].yyyy 67: EX2 TEMP[1].z, -TEMP[0].zzzz 68: EX2 TEMP[1].w, -TEMP[0].wwww 69: MOV OUT[0], TEMP[1] 70: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 140) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 184) %56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200) %59 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %60 = load <32 x i8>, <32 x i8> addrspace(2)* %59, align 32, !tbaa !0 %61 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %62 = load <16 x i8>, <16 x i8> addrspace(2)* %61, align 16, !tbaa !0 %63 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %64 = bitcast <8 x i32> addrspace(2)* %63 to <32 x i8> addrspace(2)* %65 = load <32 x i8>, <32 x i8> addrspace(2)* %64, align 32, !tbaa !0 %66 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %67 = bitcast <4 x i32> addrspace(2)* %66 to <16 x i8> addrspace(2)* %68 = load <16 x i8>, <16 x i8> addrspace(2)* %67, align 16, !tbaa !0 %69 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %70 = bitcast <8 x i32> addrspace(2)* %69 to <32 x i8> addrspace(2)* %71 = load <32 x i8>, <32 x i8> addrspace(2)* %70, align 32, !tbaa !0 %72 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %73 = bitcast <4 x i32> addrspace(2)* %72 to <16 x i8> addrspace(2)* %74 = load <16 x i8>, <16 x i8> addrspace(2)* %73, align 16, !tbaa !0 %75 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %76 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %77 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %78 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %79 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %80 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %81 = fdiv float 1.000000e+00, %77 %82 = fmul float %75, %81 %83 = fmul float %76, %81 %84 = fdiv float 1.000000e+00, %80 %85 = fmul float %27, %84 %86 = fmul float %78, %85 %87 = fmul float %79, %85 %88 = fmul float %80, %85 %89 = bitcast float %82 to i32 %90 = bitcast float %83 to i32 %91 = insertelement <2 x i32> undef, i32 %89, i32 0 %92 = insertelement <2 x i32> %91, i32 %90, i32 1 %93 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %92, <32 x i8> %60, <16 x i8> %62, i32 2) %94 = extractelement <4 x float> %93, i32 0 %95 = fmul float %28, %94 %96 = fadd float %95, %29 %97 = fdiv float 1.000000e+00, %96 %98 = fmul float %86, %97 %99 = fmul float %87, %97 %100 = fmul float %88, %97 %101 = fmul float %47, %98 %102 = fmul float %48, %98 %103 = fmul float %49, %98 %104 = fmul float %50, %99 %105 = fadd float %104, %101 %106 = fmul float %51, %99 %107 = fadd float %106, %102 %108 = fmul float %52, %99 %109 = fadd float %108, %103 %110 = fmul float %53, %100 %111 = fadd float %110, %105 %112 = fmul float %54, %100 %113 = fadd float %112, %107 %114 = fmul float %55, %100 %115 = fadd float %114, %109 %116 = fadd float %111, %56 %117 = fadd float %113, %57 %118 = fadd float %115, %58 %119 = fsub float %116, %30 %120 = fsub float %117, %31 %121 = fsub float %118, %32 %122 = fsub float %116, %38 %123 = fsub float %117, %39 %124 = fsub float %118, %40 %125 = fmul float %122, %122 %126 = fmul float %123, %123 %127 = fadd float %126, %125 %128 = fmul float %124, %124 %129 = fadd float %127, %128 %130 = call float @llvm.AMDGPU.rsq.clamped.f32(float %129) %131 = fmul float %122, %130 %132 = fmul float %123, %130 %133 = fmul float %124, %130 %134 = fmul float %122, %122 %135 = fmul float %123, %123 %136 = fadd float %135, %134 %137 = fmul float %124, %124 %138 = fadd float %136, %137 %139 = fmul float %138, %41 %140 = bitcast float %139 to i32 %141 = bitcast float %139 to i32 %142 = insertelement <2 x i32> undef, i32 %140, i32 0 %143 = insertelement <2 x i32> %142, i32 %141, i32 1 %144 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %143, <32 x i8> %65, <16 x i8> %68, i32 2) %145 = extractelement <4 x float> %144, i32 3 %146 = bitcast float %82 to i32 %147 = bitcast float %83 to i32 %148 = insertelement <2 x i32> undef, i32 %146, i32 0 %149 = insertelement <2 x i32> %148, i32 %147, i32 1 %150 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %149, <32 x i8> %71, <16 x i8> %74, i32 2) %151 = extractelement <4 x float> %150, i32 0 %152 = extractelement <4 x float> %150, i32 1 %153 = extractelement <4 x float> %150, i32 2 %154 = extractelement <4 x float> %150, i32 3 %155 = fmul float %151, 2.000000e+00 %156 = fadd float %155, -1.000000e+00 %157 = fmul float %152, 2.000000e+00 %158 = fadd float %157, -1.000000e+00 %159 = fmul float %153, 2.000000e+00 %160 = fadd float %159, -1.000000e+00 %161 = fmul float %156, %156 %162 = fmul float %158, %158 %163 = fadd float %162, %161 %164 = fmul float %160, %160 %165 = fadd float %163, %164 %166 = call float @llvm.AMDGPU.rsq.clamped.f32(float %165) %167 = fmul float %156, %166 %168 = fmul float %158, %166 %169 = fmul float %160, %166 %170 = fmul float %131, %167 %171 = fsub float -0.000000e+00, %170 %172 = fmul float %132, %168 %173 = fsub float %171, %172 %174 = fmul float %133, %169 %175 = fsub float %173, %174 %176 = call float @llvm.maxnum.f32(float %175, float 0.000000e+00) %177 = fmul float %176, %145 %178 = fmul float %42, %177 %179 = fmul float %43, %177 %180 = fmul float %44, %177 %181 = fmul float %42, %34 %182 = fmul float %43, %35 %183 = fmul float %44, %36 %184 = fsub float %116, %24 %185 = fsub float %117, %25 %186 = fsub float %118, %26 %187 = fmul float %184, %184 %188 = fmul float %185, %185 %189 = fadd float %188, %187 %190 = fmul float %186, %186 %191 = fadd float %189, %190 %192 = call float @llvm.AMDGPU.rsq.clamped.f32(float %191) %193 = fmul float %184, %192 %194 = fmul float %185, %192 %195 = fmul float %186, %192 %196 = fsub float -0.000000e+00, %193 %197 = fsub float %196, %131 %198 = fsub float -0.000000e+00, %194 %199 = fsub float %198, %132 %200 = fsub float -0.000000e+00, %195 %201 = fsub float %200, %133 %202 = fmul float %197, %197 %203 = fmul float %199, %199 %204 = fadd float %203, %202 %205 = fmul float %201, %201 %206 = fadd float %204, %205 %207 = call float @llvm.AMDGPU.rsq.clamped.f32(float %206) %208 = fmul float %197, %207 %209 = fmul float %199, %207 %210 = fmul float %201, %207 %211 = fmul float %208, %167 %212 = fmul float %209, %168 %213 = fadd float %212, %211 %214 = fmul float %210, %169 %215 = fadd float %213, %214 %216 = call float @llvm.maxnum.f32(float %215, float 0.000000e+00) %217 = fmul float %154, 1.280000e+02 %218 = call float @llvm.pow.f32(float %216, float %217) %219 = call float @llvm.AMDIL.clamp.(float %145, float 0.000000e+00, float 1.000000e+00) %220 = fmul float %218, %219 %221 = fadd float %181, %183 %222 = fmul float %182, %221 %223 = call float @llvm.sqrt.f32(float %222) %224 = fmul float %223, 2.000000e+00 %225 = fadd float %181, %182 %226 = fadd float %225, %183 %227 = fmul float %224, %37 %228 = fadd float %227, %226 %229 = fmul float %220, %228 %230 = fmul float %119, %119 %231 = fmul float %120, %120 %232 = fadd float %231, %230 %233 = fmul float %121, %121 %234 = fadd float %232, %233 %235 = call float @llvm.sqrt.f32(float %234) %236 = call float @llvm.AMDGPU.lrp(float %33, float %235, float %100) %237 = fmul float %236, %45 %238 = fadd float %237, %46 %239 = fsub float 1.000000e+00, %238 %240 = call float @llvm.AMDIL.clamp.(float %239, float 0.000000e+00, float 1.000000e+00) %241 = fmul float %178, %240 %242 = fmul float %179, %240 %243 = fmul float %180, %240 %244 = fmul float %229, %240 %245 = fsub float -0.000000e+00, %241 %246 = call float @llvm.AMDIL.exp.(float %245) %247 = fsub float -0.000000e+00, %242 %248 = call float @llvm.AMDIL.exp.(float %247) %249 = fsub float -0.000000e+00, %243 %250 = call float @llvm.AMDIL.exp.(float %249) %251 = fsub float -0.000000e+00, %244 %252 = call float @llvm.AMDIL.exp.(float %251) %253 = call i32 @llvm.SI.packf16(float %246, float %248) %254 = bitcast i32 %253 to float %255 = call i32 @llvm.SI.packf16(float %250, float %252) %256 = bitcast i32 %255 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %254, float %256, float %254, float %256) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 3, 0, [m0] ; C8100300 v_interp_p2_f32 v4, [v4], v1, 3, 0, [m0] ; C8110301 v_interp_p1_f32 v5, v0, 0, 1, [m0] ; C8140400 v_interp_p2_f32 v5, [v5], v1, 0, 1, [m0] ; C8150401 v_interp_p1_f32 v6, v0, 1, 1, [m0] ; C8180500 v_interp_p2_f32 v6, [v6], v1, 1, 1, [m0] ; C8190501 v_interp_p1_f32 v0, v0, 2, 1, [m0] ; C8000600 v_rcp_f32_e32 v4, v4 ; 7E085504 v_interp_p2_f32 v0, [v0], v1, 2, 1, [m0] ; C8010601 s_load_dwordx4 s[24:27], s[4:5], 0x0 ; C08C0500 s_load_dwordx8 s[28:35], s[6:7], 0x0 ; C0CE0700 v_mul_f32_e32 v1, v4, v2 ; 10020504 v_mul_f32_e32 v2, v4, v3 ; 10040704 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s36, s[0:3], 0x9 ; C2120109 s_buffer_load_dword s37, s[0:3], 0x6 ; C2128106 s_buffer_load_dword s38, s[0:3], 0x8 ; C2130108 s_load_dwordx4 s[40:43], s[4:5], 0x4 ; C0940504 s_load_dwordx4 s[44:47], s[4:5], 0x8 ; C0960508 s_load_dwordx8 s[8:15], s[6:7], 0x8 ; C0C40708 s_load_dwordx8 s[16:23], s[6:7], 0x10 ; C0C80710 image_sample v3, 1, 0, 0, 0, 0, 0, 0, 0, v[1:2], s[28:35], s[24:27] ; F0800100 00C70301 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_buffer_load_dword s5, s[0:3], 0xd ; C202810D s_buffer_load_dword s6, s[0:3], 0xe ; C203010E s_buffer_load_dword s7, s[0:3], 0xf ; C203810F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v4, s36 ; 7E080224 v_rcp_f32_e32 v7, v0 ; 7E0E5500 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v4, s38, v3 ; 3E080626 s_buffer_load_dword s24, s[0:3], 0x24 ; C20C0124 s_buffer_load_dword s25, s[0:3], 0x25 ; C20C8125 v_mul_f32_e32 v3, s37, v7 ; 10060E25 s_buffer_load_dword s26, s[0:3], 0x26 ; C20D0126 v_rcp_f32_e32 v4, v4 ; 7E085504 v_mul_f32_e32 v5, v3, v5 ; 100A0B03 s_buffer_load_dword s27, s[0:3], 0x28 ; C20D8128 s_buffer_load_dword s28, s[0:3], 0x29 ; C20E0129 v_mul_f32_e32 v5, v4, v5 ; 100A0B04 s_buffer_load_dword s29, s[0:3], 0x2a ; C20E812A s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s24, v5 ; 100E0A18 v_mul_f32_e32 v8, s25, v5 ; 10100A19 s_buffer_load_dword s24, s[0:3], 0x2c ; C20C012C v_mul_f32_e32 v5, s26, v5 ; 100A0A1A v_mul_f32_e32 v6, v3, v6 ; 100C0D03 v_mul_f32_e32 v6, v4, v6 ; 100C0D04 v_mac_f32_e32 v7, s27, v6 ; 3E0E0C1B v_mac_f32_e32 v8, s28, v6 ; 3E100C1C s_buffer_load_dword s25, s[0:3], 0x2d ; C20C812D v_mac_f32_e32 v5, s29, v6 ; 3E0A0C1D v_mul_f32_e32 v0, v3, v0 ; 10000103 s_buffer_load_dword s26, s[0:3], 0x2e ; C20D012E s_buffer_load_dword s27, s[0:3], 0x30 ; C20D8130 s_buffer_load_dword s28, s[0:3], 0x31 ; C20E0131 v_mul_f32_e32 v0, v4, v0 ; 10000104 s_buffer_load_dword s29, s[0:3], 0x32 ; C20E8132 s_buffer_load_dword s30, s[0:3], 0x18 ; C20F0118 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v7, s24, v0 ; 3E0E0018 v_mac_f32_e32 v8, s25, v0 ; 3E100019 s_buffer_load_dword s24, s[0:3], 0x19 ; C20C0119 s_buffer_load_dword s25, s[0:3], 0x1a ; C20C811A v_mac_f32_e32 v5, s26, v0 ; 3E0A001A v_add_f32_e32 v3, s27, v7 ; 06060E1B v_add_f32_e32 v4, s28, v8 ; 0608101C s_buffer_load_dword s26, s[0:3], 0x1b ; C20D011B v_add_f32_e32 v5, s29, v5 ; 060A0A1D v_subrev_f32_e32 v6, s30, v3 ; 0A0C061E s_buffer_load_dword s27, s[0:3], 0x1c ; C20D811C s_buffer_load_dword s28, s[0:3], 0x1d ; C20E011D s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v7, s24, v4 ; 0A0E0818 v_subrev_f32_e32 v8, s25, v5 ; 0A100A19 v_mul_f32_e32 v9, v6, v6 ; 10120D06 v_mac_f32_e32 v9, v7, v7 ; 3E120F07 v_mac_f32_e32 v9, v8, v8 ; 3E121108 v_mul_f32_e32 v10, s26, v9 ; 1014121A v_mov_b32_e32 v11, v10 ; 7E16030A s_buffer_load_dword s24, s[0:3], 0x12 ; C20C0112 s_buffer_load_dword s25, s[0:3], 0x1e ; C20C811E s_buffer_load_dword s26, s[0:3], 0x10 ; C20D0110 s_buffer_load_dword s29, s[0:3], 0x11 ; C20E8111 image_sample v10, 8, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[8:15], s[40:43] ; F0800800 01420A0A image_sample v[11:14], 15, 0, 0, 0, 0, 0, 0, 0, v[1:2], s[16:23], s[44:47] ; F0800F00 01640B01 s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100 s_buffer_load_dword s9, s[0:3], 0x1 ; C2048101 s_buffer_load_dword s10, s[0:3], 0x2 ; C2050102 s_buffer_load_dword s11, s[0:3], 0x13 ; C2058113 s_buffer_load_dword s12, s[0:3], 0x22 ; C2060122 s_buffer_load_dword s0, s[0:3], 0x23 ; C2000123 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s24 ; 7E020218 v_mul_f32_e32 v1, s25, v1 ; 10020219 v_mov_b32_e32 v2, s26 ; 7E04021A v_mac_f32_e32 v1, s27, v2 ; 3E02041B v_mov_b32_e32 v2, s26 ; 7E04021A v_mov_b32_e32 v15, s29 ; 7E1E021D v_mul_f32_e32 v15, s28, v15 ; 101E1E1C v_mul_f32_e32 v1, v1, v15 ; 10021F01 v_mac_f32_e32 v15, s27, v2 ; 3E1E041B v_mov_b32_e32 v2, s24 ; 7E040218 v_mac_f32_e32 v15, s25, v2 ; 3E1E0419 s_waitcnt vmcnt(0) ; BF8C0770 v_subrev_f32_e32 v2, s8, v3 ; 0A040608 v_subrev_f32_e32 v16, s9, v4 ; 0A200809 v_subrev_f32_e32 v17, s10, v5 ; 0A220A0A v_mul_f32_e32 v18, v2, v2 ; 10240502 v_mac_f32_e32 v18, v16, v16 ; 3E242110 v_mac_f32_e32 v18, v17, v17 ; 3E242311 v_rsq_clamp_f32_e32 v18, v18 ; 7E245912 v_sqrt_f32_e32 v1, v1 ; 7E026701 v_add_f32_e32 v1, v1, v1 ; 06020301 v_mac_f32_e32 v15, s11, v1 ; 3E1E020B v_mul_f32_e32 v1, v18, v2 ; 10020512 v_mul_f32_e32 v2, v18, v16 ; 10042112 v_mul_f32_e32 v16, v18, v17 ; 10202312 v_rsq_clamp_f32_e32 v9, v9 ; 7E125909 v_mad_f32 v11, 2.0, v11, -1.0 ; D282000B 03CE16F4 v_mad_f32 v12, 2.0, v12, -1.0 ; D282000C 03CE18F4 v_mul_f32_e32 v17, v11, v11 ; 1022170B v_mac_f32_e32 v17, v12, v12 ; 3E22190C v_mad_f32 v13, 2.0, v13, -1.0 ; D282000D 03CE1AF4 v_mac_f32_e32 v17, v13, v13 ; 3E221B0D v_rsq_clamp_f32_e32 v17, v17 ; 7E225911 v_mad_f32 v1, -v6, v9, -v1 ; D2820001 A4061306 v_mul_f32_e32 v6, v9, v6 ; 100C0D09 v_mad_f32 v2, -v7, v9, -v2 ; D2820002 A40A1307 v_mul_f32_e32 v7, v9, v7 ; 100E0F09 v_mul_f32_e32 v11, v17, v11 ; 10161711 v_mad_f32 v16, -v8, v9, -v16 ; D2820010 A4421308 v_mul_f32_e32 v18, v1, v1 ; 10240301 v_mac_f32_e32 v18, v2, v2 ; 3E240502 v_mac_f32_e32 v18, v16, v16 ; 3E242110 v_rsq_clamp_f32_e32 v18, v18 ; 7E245912 v_mul_f32_e32 v12, v17, v12 ; 10181911 v_mul_f32_e32 v6, v11, v6 ; 100C0D0B v_mad_f32 v6, -v7, v12, -v6 ; D2820006 A41A1907 v_mul_f32_e32 v1, v18, v1 ; 10020312 v_mul_f32_e32 v1, v11, v1 ; 1002030B v_mul_f32_e32 v2, v18, v2 ; 10040512 v_mac_f32_e32 v1, v12, v2 ; 3E02050C v_mul_f32_e32 v2, v9, v8 ; 10041109 v_mul_f32_e32 v7, v17, v13 ; 100E1B11 v_mad_f32 v2, -v2, v7, v6 ; D2820002 241A0F02 v_mul_f32_e32 v6, v18, v16 ; 100C2112 v_mac_f32_e32 v1, v7, v6 ; 3E020D07 v_subrev_f32_e32 v3, s4, v3 ; 0A060604 v_subrev_f32_e32 v4, s5, v4 ; 0A080805 v_subrev_f32_e32 v5, s6, v5 ; 0A0A0A06 v_max_f32_e32 v2, 0, v2 ; 20040480 v_mul_f32_e32 v2, v10, v2 ; 1004050A v_mul_f32_e32 v6, s27, v2 ; 100C041B v_mul_f32_e32 v7, s28, v2 ; 100E041C v_mul_f32_e32 v2, s25, v2 ; 10040419 v_mul_f32_e32 v3, v3, v3 ; 10060703 v_mac_f32_e32 v3, v4, v4 ; 3E060904 v_mac_f32_e32 v3, v5, v5 ; 3E060B05 v_sub_f32_e64 v4, 1.0, s7 ; D2080004 00000EF2 v_mul_f32_e32 v0, v0, v4 ; 10000900 v_max_f32_e32 v1, 0, v1 ; 20020280 v_log_f32_e32 v1, v1 ; 7E024F01 v_sqrt_f32_e32 v3, v3 ; 7E066703 v_mac_f32_e32 v0, s7, v3 ; 3E000607 v_mov_b32_e32 v3, s0 ; 7E060200 v_mac_f32_e32 v3, s12, v0 ; 3E06000C v_mul_f32_e32 v0, 0x43000000, v14 ; 10001CFF 43000000 v_mul_legacy_f32_e32 v0, v0, v1 ; 0E000300 v_exp_f32_e32 v0, v0 ; 7E004B00 v_add_f32_e64 v1, 0, v10 clamp ; D2060801 00021480 v_mul_f32_e32 v0, v1, v0 ; 10000101 v_mul_f32_e32 v0, v15, v0 ; 1000010F v_sub_f32_e32 v1, 1.0, v3 ; 080206F2 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_mul_f32_e32 v3, v1, v6 ; 10060D01 v_mul_f32_e32 v4, v1, v7 ; 10080F01 v_mul_f32_e32 v2, v1, v2 ; 10040501 v_mul_f32_e32 v0, v1, v0 ; 10000101 v_exp_f32_e64 v1, -v3 ; D34A0001 20000103 v_exp_f32_e64 v3, -v4 ; D34A0003 20000104 v_cvt_pkrtz_f16_f32_e32 v1, v1, v3 ; 5E020701 v_exp_f32_e64 v2, -v2 ; D34A0002 20000102 v_exp_f32_e64 v0, -v0 ; D34A0000 20000100 v_cvt_pkrtz_f16_f32_e32 v0, v2, v0 ; 5E000102 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 48 VGPRS: 20 Code Size: 856 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..9] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 0.5000, -1.0000, 1.0000, 0.0000} 0: MUL TEMP[0], CONST[2], IN[0].xxxx 1: MAD TEMP[0], CONST[3], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[4], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[5], IN[0].wwww, TEMP[0] 4: MUL TEMP[1].xyw, TEMP[0], IMM[0].xxxx 5: MOV TEMP[2].x, TEMP[1].xxxx 6: MUL TEMP[3].x, TEMP[1].yyyy, CONST[0].xxxx 7: MOV TEMP[2].y, TEMP[3].xxxx 8: ADD TEMP[1].xy, TEMP[2].xyyy, TEMP[1].wwww 9: MOV TEMP[1].zw, TEMP[0].wwzw 10: MUL TEMP[2], CONST[6], IN[0].xxxx 11: MAD TEMP[2], CONST[7], IN[0].yyyy, TEMP[2] 12: MAD TEMP[2], CONST[8], IN[0].zzzz, TEMP[2] 13: MAD TEMP[2].xyz, CONST[9], IN[0].wwww, TEMP[2] 14: MUL TEMP[2].xyz, TEMP[2].xyzz, IMM[0].yyzz 15: LRP TEMP[2].xyz, CONST[1].xxxx, IN[1].xyzz, TEMP[2].xyzz 16: MOV OUT[1], TEMP[1] 17: MOV OUT[2], TEMP[2] 18: MOV OUT[0], TEMP[0] 19: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %46 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %47 = load <16 x i8>, <16 x i8> addrspace(2)* %46, align 16, !tbaa !0 %48 = add i32 %5, %7 %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %48) %50 = extractelement <4 x float> %49, i32 0 %51 = extractelement <4 x float> %49, i32 1 %52 = extractelement <4 x float> %49, i32 2 %53 = extractelement <4 x float> %49, i32 3 %54 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %55 = load <16 x i8>, <16 x i8> addrspace(2)* %54, align 16, !tbaa !0 %56 = add i32 %5, %7 %57 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %55, i32 0, i32 %56) %58 = extractelement <4 x float> %57, i32 0 %59 = extractelement <4 x float> %57, i32 1 %60 = extractelement <4 x float> %57, i32 2 %61 = fmul float %15, %50 %62 = fmul float %16, %50 %63 = fmul float %17, %50 %64 = fmul float %18, %50 %65 = fmul float %19, %51 %66 = fadd float %65, %61 %67 = fmul float %20, %51 %68 = fadd float %67, %62 %69 = fmul float %21, %51 %70 = fadd float %69, %63 %71 = fmul float %22, %51 %72 = fadd float %71, %64 %73 = fmul float %23, %52 %74 = fadd float %73, %66 %75 = fmul float %24, %52 %76 = fadd float %75, %68 %77 = fmul float %25, %52 %78 = fadd float %77, %70 %79 = fmul float %26, %52 %80 = fadd float %79, %72 %81 = fmul float %27, %53 %82 = fadd float %81, %74 %83 = fmul float %28, %53 %84 = fadd float %83, %76 %85 = fmul float %29, %53 %86 = fadd float %85, %78 %87 = fmul float %30, %53 %88 = fadd float %87, %80 %89 = fmul float %82, 5.000000e-01 %90 = fmul float %84, 5.000000e-01 %91 = fmul float %88, 5.000000e-01 %92 = fmul float %90, %13 %93 = fadd float %89, %91 %94 = fadd float %92, %91 %95 = fmul float %31, %50 %96 = fmul float %32, %50 %97 = fmul float %33, %50 %98 = fmul float %34, %50 %99 = fmul float %35, %51 %100 = fadd float %99, %95 %101 = fmul float %36, %51 %102 = fadd float %101, %96 %103 = fmul float %37, %51 %104 = fadd float %103, %97 %105 = fmul float %38, %51 %106 = fadd float %105, %98 %107 = fmul float %39, %52 %108 = fadd float %107, %100 %109 = fmul float %40, %52 %110 = fadd float %109, %102 %111 = fmul float %41, %52 %112 = fadd float %111, %104 %113 = fmul float %42, %52 %114 = fadd float %113, %106 %115 = fmul float %43, %53 %116 = fadd float %115, %108 %117 = fmul float %44, %53 %118 = fadd float %117, %110 %119 = fmul float %45, %53 %120 = fadd float %119, %112 %121 = fsub float -0.000000e+00, %116 %122 = fsub float -0.000000e+00, %118 %123 = call float @llvm.AMDGPU.lrp(float %14, float %58, float %121) %124 = call float @llvm.AMDGPU.lrp(float %14, float %59, float %122) %125 = call float @llvm.AMDGPU.lrp(float %14, float %60, float %120) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %93, float %94, float %86, float %88) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %123, float %124, float %125, float %114) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %82, float %84, float %86, float %88) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[8:11], 0 idxen ; E00C2000 80020500 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_buffer_load_dword s5, s[0:3], 0xc ; C202810C s_buffer_load_dword s6, s[0:3], 0x9 ; C2030109 s_buffer_load_dword s7, s[0:3], 0xd ; C203810D s_buffer_load_dword s8, s[0:3], 0xa ; C204010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s4, v1 ; 10000204 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_buffer_load_dword s9, s[0:3], 0xf ; C204810F v_mac_f32_e32 v0, s5, v2 ; 3E000405 v_mul_f32_e32 v8, s6, v1 ; 10100206 v_mac_f32_e32 v8, s7, v2 ; 3E100407 s_buffer_load_dword s5, s[0:3], 0xb ; C202810B v_mul_f32_e32 v9, s8, v1 ; 10120208 s_buffer_load_dword s6, s[0:3], 0x18 ; C2030118 s_buffer_load_dword s7, s[0:3], 0x1c ; C203811C s_buffer_load_dword s8, s[0:3], 0x19 ; C2040119 s_buffer_load_dword s10, s[0:3], 0x1d ; C205011D s_buffer_load_dword s11, s[0:3], 0x1a ; C205811A s_buffer_load_dword s12, s[0:3], 0x1e ; C206011E s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v9, s4, v2 ; 3E120404 s_buffer_load_dword s4, s[0:3], 0x1b ; C202011B v_mul_f32_e32 v10, s5, v1 ; 10140205 v_mac_f32_e32 v10, s9, v2 ; 3E140409 v_mul_f32_e32 v11, s6, v1 ; 10160206 v_mac_f32_e32 v11, s7, v2 ; 3E160407 v_mul_f32_e32 v12, s8, v1 ; 10180208 v_mac_f32_e32 v12, s10, v2 ; 3E18040A v_mul_f32_e32 v13, s11, v1 ; 101A020B v_mac_f32_e32 v13, s12, v2 ; 3E1A040C s_buffer_load_dword s5, s[0:3], 0x1f ; C202811F s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s4, v1 ; 10020204 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_buffer_load_dword s6, s[0:3], 0x11 ; C2030111 s_buffer_load_dword s7, s[0:3], 0x12 ; C2038112 s_buffer_load_dword s8, s[0:3], 0x13 ; C2040113 s_buffer_load_dword s9, s[0:3], 0x20 ; C2048120 s_buffer_load_dword s10, s[0:3], 0x21 ; C2050121 s_buffer_load_dword s11, s[0:3], 0x22 ; C2058122 s_buffer_load_dword s12, s[0:3], 0x23 ; C2060123 v_mac_f32_e32 v1, s5, v2 ; 3E020405 s_buffer_load_dword s5, s[0:3], 0x14 ; C2028114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v0, s4, v3 ; 3E000604 v_mac_f32_e32 v8, s6, v3 ; 3E100606 v_mac_f32_e32 v9, s7, v3 ; 3E120607 v_mac_f32_e32 v10, s8, v3 ; 3E140608 v_mac_f32_e32 v11, s9, v3 ; 3E160609 v_mac_f32_e32 v12, s10, v3 ; 3E18060A s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_buffer_load_dword s6, s[0:3], 0x16 ; C2030116 s_buffer_load_dword s7, s[0:3], 0x17 ; C2038117 s_buffer_load_dword s8, s[0:3], 0x24 ; C2040124 s_buffer_load_dword s9, s[0:3], 0x25 ; C2048125 s_buffer_load_dword s10, s[0:3], 0x26 ; C2050126 v_mac_f32_e32 v13, s11, v3 ; 3E1A060B s_buffer_load_dword s11, s[0:3], 0x4 ; C2058104 v_mac_f32_e32 v1, s12, v3 ; 3E02060C v_mac_f32_e32 v0, s5, v4 ; 3E000805 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v8, s4, v4 ; 3E100804 v_mac_f32_e32 v9, s6, v4 ; 3E120806 v_mac_f32_e32 v10, s7, v4 ; 3E140807 v_mac_f32_e32 v11, s8, v4 ; 3E160808 v_mac_f32_e32 v12, s9, v4 ; 3E180809 v_mac_f32_e32 v13, s10, v4 ; 3E1A080A s_buffer_load_dword s0, s[0:3], 0x0 ; C2000100 v_sub_f32_e64 v2, 1.0, s11 ; D2080002 000016F2 v_mul_f32_e32 v3, v11, v2 ; 1006050B v_mul_f32_e32 v4, v12, v2 ; 1008050C v_mul_f32_e32 v2, v13, v2 ; 1004050D v_mad_f32 v3, s11, v5, -v3 ; D2820003 840E0A0B v_mad_f32 v4, s11, v6, -v4 ; D2820004 84120C0B v_mac_f32_e32 v2, s11, v7 ; 3E040E0B v_mul_f32_e32 v5, 0.5, v8 ; 100A10F0 v_mul_f32_e32 v6, 0.5, v10 ; 100C14F0 v_mad_f32 v7, 0.5, v0, v6 ; D2820007 041A00F0 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v6, s0, v5 ; 3E0C0A00 exp 15, 32, 0, 0, 0, v7, v6, v9, v10 ; F800020F 0A090607 exp 15, 33, 0, 0, 0, v3, v4, v2, v1 ; F800021F 01020403 exp 15, 12, 0, 1, 0, v0, v8, v9, v10 ; F80008CF 0A090800 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 16 Code Size: 408 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL CONST[0..4] DCL CONST[6..12] DCL TEMP[0..8], LOCAL IMM[0] FLT32 { 1.0000, 2.0000, -1.0000, 0.0000} IMM[1] FLT32 { 128.0000, 0.0000, 0.0000, 0.0000} 0: RCP TEMP[0].x, IN[0].wwww 1: MUL TEMP[0].xy, IN[0].xyyy, TEMP[0].xxxx 2: RCP TEMP[1].x, IN[1].zzzz 3: MUL TEMP[2].x, CONST[1].zzzz, TEMP[1].xxxx 4: MUL TEMP[2].xyz, IN[1].xyzz, TEMP[2].xxxx 5: MOV TEMP[3].xy, TEMP[0].xyyy 6: TEX TEMP[3].x, TEMP[3], SAMP[0], 2D 7: MAD TEMP[3].x, CONST[2].xxxx, TEMP[3].xxxx, CONST[2].yyyy 8: RCP TEMP[3].x, TEMP[3].xxxx 9: MUL TEMP[1].xyz, TEMP[2].xyzz, TEMP[3].xxxx 10: MUL TEMP[2], CONST[9], TEMP[1].xxxx 11: MAD TEMP[2], CONST[10], TEMP[1].yyyy, TEMP[2] 12: MAD TEMP[2], CONST[11], TEMP[1].zzzz, TEMP[2] 13: ADD TEMP[2].xyz, TEMP[2], CONST[12] 14: ADD TEMP[3].xyz, TEMP[2].xyzz, -CONST[3].xyzz 15: MOV TEMP[4].xyz, -CONST[6].xyzx 16: MOV TEMP[0].xy, TEMP[0].xyyy 17: TEX TEMP[0], TEMP[0], SAMP[1], 2D 18: MAD TEMP[5].xyz, TEMP[0].xyzz, IMM[0].yyyy, IMM[0].zzzz 19: DP3 TEMP[6].x, TEMP[5].xyzz, TEMP[5].xyzz 20: RSQ TEMP[6].x, TEMP[6].xxxx 21: MUL TEMP[5].xyz, TEMP[5].xyzz, TEMP[6].xxxx 22: DP3 TEMP[6].x, TEMP[4].xyzz, TEMP[5].xyzz 23: MAX TEMP[6].x, IMM[0].wwww, TEMP[6].xxxx 24: MUL TEMP[6].xyz, CONST[7].xyzz, TEMP[6].xxxx 25: MUL TEMP[7].xyz, CONST[7].xyzz, CONST[4].xyzz 26: ADD TEMP[2].xyz, TEMP[2].xyzz, -CONST[0].xyzz 27: DP3 TEMP[8].x, TEMP[2].xyzz, TEMP[2].xyzz 28: RSQ TEMP[8].x, TEMP[8].xxxx 29: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[8].xxxx 30: ADD TEMP[2].xyz, TEMP[4].xyzz, -TEMP[2].xyzz 31: DP3 TEMP[4].x, TEMP[2].xyzz, TEMP[2].xyzz 32: RSQ TEMP[4].x, TEMP[4].xxxx 33: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[4].xxxx 34: DP3 TEMP[2].x, TEMP[2].xyzz, TEMP[5].xyzz 35: MAX TEMP[2].x, IMM[0].wwww, TEMP[2].xxxx 36: MUL TEMP[0].x, TEMP[0].wwww, IMM[1].xxxx 37: POW TEMP[0].x, TEMP[2].xxxx, TEMP[0].xxxx 38: ADD TEMP[2].x, TEMP[7].xxxx, TEMP[7].zzzz 39: MUL TEMP[2].x, TEMP[7].yyyy, TEMP[2].xxxx 40: SQRT TEMP[2].x, TEMP[2].xxxx 41: MUL TEMP[2].x, IMM[0].yyyy, TEMP[2].xxxx 42: ADD TEMP[4].x, TEMP[7].xxxx, TEMP[7].yyyy 43: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[7].zzzz 44: MAD TEMP[2].x, TEMP[2].xxxx, CONST[4].wwww, TEMP[4].xxxx 45: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[2].xxxx 46: MOV TEMP[6].w, TEMP[0].xxxx 47: DP3 TEMP[0].x, TEMP[3].xyzz, TEMP[3].xyzz 48: SQRT TEMP[0].x, TEMP[0].xxxx 49: LRP TEMP[0].x, CONST[3].wwww, TEMP[0].xxxx, TEMP[1].zzzz 50: MAD TEMP[0].x, TEMP[0].xxxx, CONST[8].zzzz, CONST[8].wwww 51: ADD TEMP[0].x, IMM[0].xxxx, -TEMP[0].xxxx 52: MOV_SAT TEMP[0].x, TEMP[0].xxxx 53: MUL TEMP[0], TEMP[6], TEMP[0].xxxx 54: EX2 TEMP[1].x, -TEMP[0].xxxx 55: EX2 TEMP[1].y, -TEMP[0].yyyy 56: EX2 TEMP[1].z, -TEMP[0].zzzz 57: EX2 TEMP[1].w, -TEMP[0].wwww 58: MOV OUT[0], TEMP[1] 59: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 140) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 184) %55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200) %58 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %59 = load <32 x i8>, <32 x i8> addrspace(2)* %58, align 32, !tbaa !0 %60 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %61 = load <16 x i8>, <16 x i8> addrspace(2)* %60, align 16, !tbaa !0 %62 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %63 = bitcast <8 x i32> addrspace(2)* %62 to <32 x i8> addrspace(2)* %64 = load <32 x i8>, <32 x i8> addrspace(2)* %63, align 32, !tbaa !0 %65 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %66 = bitcast <4 x i32> addrspace(2)* %65 to <16 x i8> addrspace(2)* %67 = load <16 x i8>, <16 x i8> addrspace(2)* %66, align 16, !tbaa !0 %68 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %69 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %70 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %71 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %72 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %73 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %74 = fdiv float 1.000000e+00, %70 %75 = fmul float %68, %74 %76 = fmul float %69, %74 %77 = fdiv float 1.000000e+00, %73 %78 = fmul float %27, %77 %79 = fmul float %71, %78 %80 = fmul float %72, %78 %81 = fmul float %73, %78 %82 = bitcast float %75 to i32 %83 = bitcast float %76 to i32 %84 = insertelement <2 x i32> undef, i32 %82, i32 0 %85 = insertelement <2 x i32> %84, i32 %83, i32 1 %86 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %85, <32 x i8> %59, <16 x i8> %61, i32 2) %87 = extractelement <4 x float> %86, i32 0 %88 = fmul float %28, %87 %89 = fadd float %88, %29 %90 = fdiv float 1.000000e+00, %89 %91 = fmul float %79, %90 %92 = fmul float %80, %90 %93 = fmul float %81, %90 %94 = fmul float %46, %91 %95 = fmul float %47, %91 %96 = fmul float %48, %91 %97 = fmul float %49, %92 %98 = fadd float %97, %94 %99 = fmul float %50, %92 %100 = fadd float %99, %95 %101 = fmul float %51, %92 %102 = fadd float %101, %96 %103 = fmul float %52, %93 %104 = fadd float %103, %98 %105 = fmul float %53, %93 %106 = fadd float %105, %100 %107 = fmul float %54, %93 %108 = fadd float %107, %102 %109 = fadd float %104, %55 %110 = fadd float %106, %56 %111 = fadd float %108, %57 %112 = fsub float %109, %30 %113 = fsub float %110, %31 %114 = fsub float %111, %32 %115 = bitcast float %75 to i32 %116 = bitcast float %76 to i32 %117 = insertelement <2 x i32> undef, i32 %115, i32 0 %118 = insertelement <2 x i32> %117, i32 %116, i32 1 %119 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %118, <32 x i8> %64, <16 x i8> %67, i32 2) %120 = extractelement <4 x float> %119, i32 0 %121 = extractelement <4 x float> %119, i32 1 %122 = extractelement <4 x float> %119, i32 2 %123 = extractelement <4 x float> %119, i32 3 %124 = fmul float %120, 2.000000e+00 %125 = fadd float %124, -1.000000e+00 %126 = fmul float %121, 2.000000e+00 %127 = fadd float %126, -1.000000e+00 %128 = fmul float %122, 2.000000e+00 %129 = fadd float %128, -1.000000e+00 %130 = fmul float %125, %125 %131 = fmul float %127, %127 %132 = fadd float %131, %130 %133 = fmul float %129, %129 %134 = fadd float %132, %133 %135 = call float @llvm.AMDGPU.rsq.clamped.f32(float %134) %136 = fmul float %125, %135 %137 = fmul float %127, %135 %138 = fmul float %129, %135 %139 = fmul float %38, %136 %140 = fsub float -0.000000e+00, %139 %141 = fmul float %39, %137 %142 = fsub float %140, %141 %143 = fmul float %40, %138 %144 = fsub float %142, %143 %145 = call float @llvm.maxnum.f32(float %144, float 0.000000e+00) %146 = fmul float %41, %145 %147 = fmul float %42, %145 %148 = fmul float %43, %145 %149 = fmul float %41, %34 %150 = fmul float %42, %35 %151 = fmul float %43, %36 %152 = fsub float %109, %24 %153 = fsub float %110, %25 %154 = fsub float %111, %26 %155 = fmul float %152, %152 %156 = fmul float %153, %153 %157 = fadd float %156, %155 %158 = fmul float %154, %154 %159 = fadd float %157, %158 %160 = call float @llvm.AMDGPU.rsq.clamped.f32(float %159) %161 = fmul float %152, %160 %162 = fmul float %153, %160 %163 = fmul float %154, %160 %164 = fsub float -0.000000e+00, %161 %165 = fsub float %164, %38 %166 = fsub float -0.000000e+00, %162 %167 = fsub float %166, %39 %168 = fsub float -0.000000e+00, %163 %169 = fsub float %168, %40 %170 = fmul float %165, %165 %171 = fmul float %167, %167 %172 = fadd float %171, %170 %173 = fmul float %169, %169 %174 = fadd float %172, %173 %175 = call float @llvm.AMDGPU.rsq.clamped.f32(float %174) %176 = fmul float %165, %175 %177 = fmul float %167, %175 %178 = fmul float %169, %175 %179 = fmul float %176, %136 %180 = fmul float %177, %137 %181 = fadd float %180, %179 %182 = fmul float %178, %138 %183 = fadd float %181, %182 %184 = call float @llvm.maxnum.f32(float %183, float 0.000000e+00) %185 = fmul float %123, 1.280000e+02 %186 = call float @llvm.pow.f32(float %184, float %185) %187 = fadd float %149, %151 %188 = fmul float %150, %187 %189 = call float @llvm.sqrt.f32(float %188) %190 = fmul float %189, 2.000000e+00 %191 = fadd float %149, %150 %192 = fadd float %191, %151 %193 = fmul float %190, %37 %194 = fadd float %193, %192 %195 = fmul float %186, %194 %196 = fmul float %112, %112 %197 = fmul float %113, %113 %198 = fadd float %197, %196 %199 = fmul float %114, %114 %200 = fadd float %198, %199 %201 = call float @llvm.sqrt.f32(float %200) %202 = call float @llvm.AMDGPU.lrp(float %33, float %201, float %93) %203 = fmul float %202, %44 %204 = fadd float %203, %45 %205 = fsub float 1.000000e+00, %204 %206 = call float @llvm.AMDIL.clamp.(float %205, float 0.000000e+00, float 1.000000e+00) %207 = fmul float %146, %206 %208 = fmul float %147, %206 %209 = fmul float %148, %206 %210 = fmul float %195, %206 %211 = fsub float -0.000000e+00, %207 %212 = call float @llvm.AMDIL.exp.(float %211) %213 = fsub float -0.000000e+00, %208 %214 = call float @llvm.AMDIL.exp.(float %213) %215 = fsub float -0.000000e+00, %209 %216 = call float @llvm.AMDIL.exp.(float %215) %217 = fsub float -0.000000e+00, %210 %218 = call float @llvm.AMDIL.exp.(float %217) %219 = call i32 @llvm.SI.packf16(float %212, float %214) %220 = bitcast i32 %219 to float %221 = call i32 @llvm.SI.packf16(float %216, float %218) %222 = bitcast i32 %221 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %220, float %222, float %220, float %222) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 3, 0, [m0] ; C8100300 v_interp_p2_f32 v4, [v4], v1, 3, 0, [m0] ; C8110301 v_interp_p1_f32 v5, v0, 0, 1, [m0] ; C8140400 v_interp_p2_f32 v5, [v5], v1, 0, 1, [m0] ; C8150401 v_interp_p1_f32 v6, v0, 1, 1, [m0] ; C8180500 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504 s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700 s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708 v_interp_p2_f32 v6, [v6], v1, 1, 1, [m0] ; C8190501 v_interp_p1_f32 v0, v0, 2, 1, [m0] ; C8000600 v_rcp_f32_e32 v4, v4 ; 7E085504 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_buffer_load_dword s5, s[0:3], 0x8 ; C2028108 v_interp_p2_f32 v0, [v0], v1, 2, 1, [m0] ; C8010601 v_mul_f32_e32 v1, v4, v2 ; 10020504 v_mul_f32_e32 v2, v4, v3 ; 10040704 image_sample v3, 1, 0, 0, 0, 0, 0, 0, 0, v[1:2], s[16:23], s[8:11] ; F0800100 00440301 image_sample v[7:10], 15, 0, 0, 0, 0, 0, 0, 0, v[1:2], s[24:31], s[12:15] ; F0800F00 00660701 s_buffer_load_dword s6, s[0:3], 0xc ; C203010C s_buffer_load_dword s7, s[0:3], 0xd ; C203810D s_buffer_load_dword s8, s[0:3], 0xe ; C204010E s_buffer_load_dword s9, s[0:3], 0xf ; C204810F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s4 ; 7E020204 s_waitcnt vmcnt(1) ; BF8C0771 v_mac_f32_e32 v1, s5, v3 ; 3E020605 v_rcp_f32_e32 v1, v1 ; 7E025501 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s10, s[0:3], 0x2 ; C2050102 s_buffer_load_dword s11, s[0:3], 0x6 ; C2058106 s_buffer_load_dword s12, s[0:3], 0x10 ; C2060110 s_buffer_load_dword s13, s[0:3], 0x11 ; C2068111 s_buffer_load_dword s14, s[0:3], 0x12 ; C2070112 s_buffer_load_dword s15, s[0:3], 0x13 ; C2078113 s_buffer_load_dword s16, s[0:3], 0x18 ; C2080118 s_buffer_load_dword s17, s[0:3], 0x19 ; C2088119 s_buffer_load_dword s18, s[0:3], 0x1a ; C209011A s_buffer_load_dword s19, s[0:3], 0x1c ; C209811C s_buffer_load_dword s20, s[0:3], 0x1d ; C20A011D s_buffer_load_dword s21, s[0:3], 0x1e ; C20A811E s_buffer_load_dword s22, s[0:3], 0x22 ; C20B0122 s_buffer_load_dword s23, s[0:3], 0x23 ; C20B8123 s_buffer_load_dword s24, s[0:3], 0x24 ; C20C0124 s_buffer_load_dword s25, s[0:3], 0x25 ; C20C8125 s_buffer_load_dword s26, s[0:3], 0x26 ; C20D0126 s_buffer_load_dword s27, s[0:3], 0x28 ; C20D8128 s_buffer_load_dword s28, s[0:3], 0x29 ; C20E0129 s_buffer_load_dword s29, s[0:3], 0x2a ; C20E812A s_buffer_load_dword s30, s[0:3], 0x2c ; C20F012C s_buffer_load_dword s31, s[0:3], 0x2d ; C20F812D v_rcp_f32_e32 v2, v0 ; 7E045500 s_buffer_load_dword s32, s[0:3], 0x2e ; C210012E s_buffer_load_dword s33, s[0:3], 0x30 ; C2108130 s_buffer_load_dword s34, s[0:3], 0x31 ; C2110131 s_buffer_load_dword s0, s[0:3], 0x32 ; C2000132 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v2, s11, v2 ; 1004040B v_mul_f32_e32 v3, v2, v5 ; 10060B02 v_mul_f32_e32 v3, v1, v3 ; 10060701 v_mul_f32_e32 v4, s24, v3 ; 10080618 v_mul_f32_e32 v5, s25, v3 ; 100A0619 v_mul_f32_e32 v3, s26, v3 ; 1006061A v_mul_f32_e32 v6, v2, v6 ; 100C0D02 v_mul_f32_e32 v6, v1, v6 ; 100C0D01 v_mac_f32_e32 v4, s27, v6 ; 3E080C1B v_mac_f32_e32 v5, s28, v6 ; 3E0A0C1C v_mac_f32_e32 v3, s29, v6 ; 3E060C1D v_mov_b32_e32 v6, s14 ; 7E0C020E v_mul_f32_e32 v6, s21, v6 ; 100C0C15 v_mov_b32_e32 v11, s12 ; 7E16020C v_mac_f32_e32 v6, s19, v11 ; 3E0C1613 v_mov_b32_e32 v11, s12 ; 7E16020C v_mov_b32_e32 v12, s13 ; 7E18020D v_mul_f32_e32 v12, s20, v12 ; 10181814 v_mul_f32_e32 v6, v6, v12 ; 100C1906 v_mac_f32_e32 v12, s19, v11 ; 3E181613 v_mov_b32_e32 v11, s14 ; 7E16020E v_mac_f32_e32 v12, s21, v11 ; 3E181615 v_sqrt_f32_e32 v6, v6 ; 7E0C6706 v_add_f32_e32 v6, v6, v6 ; 060C0D06 v_mac_f32_e32 v12, s15, v6 ; 3E180C0F v_mul_f32_e32 v0, v2, v0 ; 10000102 v_mul_f32_e32 v0, v1, v0 ; 10000101 v_mac_f32_e32 v4, s30, v0 ; 3E08001E v_mac_f32_e32 v5, s31, v0 ; 3E0A001F v_mac_f32_e32 v3, s32, v0 ; 3E060020 v_add_f32_e32 v1, s33, v4 ; 06020821 v_add_f32_e32 v2, s34, v5 ; 06040A22 v_add_f32_e32 v3, s0, v3 ; 06060600 v_subrev_f32_e32 v4, s4, v1 ; 0A080204 v_subrev_f32_e32 v5, s5, v2 ; 0A0A0405 v_subrev_f32_e32 v6, s10, v3 ; 0A0C060A v_mad_f32 v7, 2.0, v7, -1.0 ; D2820007 03CE0EF4 v_mad_f32 v8, 2.0, v8, -1.0 ; D2820008 03CE10F4 v_mul_f32_e32 v11, v7, v7 ; 10160F07 v_mac_f32_e32 v11, v8, v8 ; 3E161108 v_mul_f32_e32 v13, v4, v4 ; 101A0904 v_mac_f32_e32 v13, v5, v5 ; 3E1A0B05 v_mac_f32_e32 v13, v6, v6 ; 3E1A0D06 v_rsq_clamp_f32_e32 v13, v13 ; 7E1A590D v_mad_f32 v9, 2.0, v9, -1.0 ; D2820009 03CE12F4 v_mac_f32_e32 v11, v9, v9 ; 3E161309 v_rsq_clamp_f32_e32 v11, v11 ; 7E16590B v_mad_f32 v4, -v4, v13, -s16 ; D2820004 A0421B04 v_mad_f32 v5, -v5, v13, -s17 ; D2820005 A0461B05 v_mad_f32 v6, -v6, v13, -s18 ; D2820006 A04A1B06 v_mul_f32_e32 v7, v11, v7 ; 100E0F0B v_mul_f32_e32 v13, v4, v4 ; 101A0904 v_mac_f32_e32 v13, v5, v5 ; 3E1A0B05 v_mac_f32_e32 v13, v6, v6 ; 3E1A0D06 v_rsq_clamp_f32_e32 v13, v13 ; 7E1A590D v_mul_f32_e32 v14, s16, v7 ; 101C0E10 v_mul_f32_e32 v8, v11, v8 ; 1010110B v_mad_f32 v14, -s17, v8, -v14 ; D282000E A43A1011 v_mul_f32_e32 v4, v13, v4 ; 1008090D v_mul_f32_e32 v4, v7, v4 ; 10080907 v_mul_f32_e32 v5, v13, v5 ; 100A0B0D v_mac_f32_e32 v4, v8, v5 ; 3E080B08 v_subrev_f32_e32 v1, s6, v1 ; 0A020206 v_subrev_f32_e32 v2, s7, v2 ; 0A040407 v_subrev_f32_e32 v3, s8, v3 ; 0A060608 v_mul_f32_e32 v5, v11, v9 ; 100A130B v_mad_f32 v7, -s18, v5, v14 ; D2820007 243A0A12 v_max_f32_e32 v7, 0, v7 ; 200E0E80 v_mul_f32_e32 v8, s19, v7 ; 10100E13 v_mul_f32_e32 v9, s20, v7 ; 10120E14 v_mul_f32_e32 v7, s21, v7 ; 100E0E15 v_mul_f32_e32 v6, v13, v6 ; 100C0D0D v_mac_f32_e32 v4, v5, v6 ; 3E080D05 v_mul_f32_e32 v1, v1, v1 ; 10020301 v_mac_f32_e32 v1, v2, v2 ; 3E020502 v_mac_f32_e32 v1, v3, v3 ; 3E020703 v_sub_f32_e64 v2, 1.0, s9 ; D2080002 000012F2 v_mul_f32_e32 v0, v0, v2 ; 10000500 v_max_f32_e32 v2, 0, v4 ; 20040880 v_log_f32_e32 v2, v2 ; 7E044F02 v_sqrt_f32_e32 v1, v1 ; 7E026701 v_mac_f32_e32 v0, s9, v1 ; 3E000209 v_mov_b32_e32 v1, s23 ; 7E020217 v_mac_f32_e32 v1, s22, v0 ; 3E020016 v_mul_f32_e32 v0, 0x43000000, v10 ; 100014FF 43000000 v_mul_legacy_f32_e32 v0, v0, v2 ; 0E000500 v_exp_f32_e32 v0, v0 ; 7E004B00 v_mul_f32_e32 v0, v12, v0 ; 1000010C v_sub_f32_e32 v1, 1.0, v1 ; 080202F2 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_mul_f32_e32 v2, v1, v8 ; 10041101 v_mul_f32_e32 v3, v1, v9 ; 10061301 v_mul_f32_e32 v4, v1, v7 ; 10080F01 v_mul_f32_e32 v0, v1, v0 ; 10000101 v_exp_f32_e64 v1, -v2 ; D34A0001 20000102 v_exp_f32_e64 v2, -v3 ; D34A0002 20000103 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_exp_f32_e64 v2, -v4 ; D34A0002 20000104 v_exp_f32_e64 v0, -v0 ; D34A0000 20000100 v_cvt_pkrtz_f16_f32_e32 v0, v2, v0 ; 5E000102 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 16 Code Size: 744 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL CONST[0..3] DCL TEMP[0..1], LOCAL 0: MUL TEMP[0], CONST[0], IN[0].xxxx 1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0] 4: MOV TEMP[1].xy, IN[1].xyxx 5: MOV OUT[1], TEMP[1] 6: MOV OUT[0], TEMP[0] 7: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = fmul float %13, %33 %44 = fmul float %14, %33 %45 = fmul float %15, %33 %46 = fmul float %16, %33 %47 = fmul float %17, %34 %48 = fadd float %47, %43 %49 = fmul float %18, %34 %50 = fadd float %49, %44 %51 = fmul float %19, %34 %52 = fadd float %51, %45 %53 = fmul float %20, %34 %54 = fadd float %53, %46 %55 = fmul float %21, %35 %56 = fadd float %55, %48 %57 = fmul float %22, %35 %58 = fadd float %57, %50 %59 = fmul float %23, %35 %60 = fadd float %59, %52 %61 = fmul float %24, %35 %62 = fadd float %61, %54 %63 = fmul float %25, %36 %64 = fadd float %63, %56 %65 = fmul float %26, %36 %66 = fadd float %65, %58 %67 = fmul float %27, %36 %68 = fadd float %67, %60 %69 = fmul float %28, %36 %70 = fadd float %69, %62 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %64, float %66, float %68, float %70) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_mov_b32_e32 v1, 0 ; 7E020280 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[0:3], 0x0 ; C2060100 s_buffer_load_dword s13, s[0:3], 0x1 ; C2068101 buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[8:11], 0 idxen ; E00C2000 80020600 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_buffer_load_dword s5, s[0:3], 0x3 ; C2028103 s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104 s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105 s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106 s_buffer_load_dword s9, s[0:3], 0x7 ; C2048107 s_buffer_load_dword s10, s[0:3], 0x8 ; C2050108 s_buffer_load_dword s11, s[0:3], 0x9 ; C2058109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 v_mul_f32_e32 v0, s12, v2 ; 1000040C s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v0, s6, v3 ; 3E000606 v_mul_f32_e32 v8, s13, v2 ; 1010040D v_mac_f32_e32 v8, s7, v3 ; 3E100607 v_mul_f32_e32 v9, s4, v2 ; 10120404 v_mac_f32_e32 v9, s8, v3 ; 3E120608 v_mul_f32_e32 v2, s5, v2 ; 10040405 v_mac_f32_e32 v2, s9, v3 ; 3E040609 v_mac_f32_e32 v0, s10, v4 ; 3E00080A v_mac_f32_e32 v8, s11, v4 ; 3E10080B v_mac_f32_e32 v9, s14, v4 ; 3E12080E v_mac_f32_e32 v2, s15, v4 ; 3E04080F v_mac_f32_e32 v0, s16, v5 ; 3E000A10 v_mac_f32_e32 v8, s17, v5 ; 3E100A11 v_mac_f32_e32 v9, s18, v5 ; 3E120A12 v_mac_f32_e32 v2, s0, v5 ; 3E040A00 exp 15, 32, 0, 0, 0, v6, v7, v1, v1 ; F800020F 01010706 exp 15, 12, 0, 1, 0, v0, v8, v9, v2 ; F80008CF 02090800 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 196 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[0] DCL CONST[2..7] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: RCP TEMP[0].x, CONST[2].xxxx 1: ADD TEMP[1].x, IN[0].xxxx, -CONST[4].xxxx 2: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[0].xxxx 3: ADD TEMP[1].x, IMM[0].xxxx, -TEMP[1].xxxx 4: ADD TEMP[2].x, IN[0].yyyy, -CONST[5].xxxx 5: MUL TEMP[0].x, TEMP[2].xxxx, TEMP[0].xxxx 6: ADD TEMP[0].x, IMM[0].xxxx, -TEMP[0].xxxx 7: MUL TEMP[0].x, TEMP[1].xxxx, TEMP[0].xxxx 8: MUL TEMP[0].x, TEMP[0].xxxx, CONST[3].xxxx 9: MOV_SAT TEMP[0].x, TEMP[0].xxxx 10: MUL TEMP[0].x, TEMP[0].xxxx, CONST[6].xxxx 11: MOV TEMP[0].w, TEMP[0].xxxx 12: MOV TEMP[1].xy, IN[0].xyyy 13: TEX TEMP[1], TEMP[1], SAMP[0], 2D 14: MUL TEMP[1].xyz, TEMP[1], CONST[0] 15: MUL TEMP[0].xyz, TEMP[1].xyzz, CONST[7].xxxx 16: MOV OUT[0], TEMP[0] 17: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %33 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %34 = load <32 x i8>, <32 x i8> addrspace(2)* %33, align 32, !tbaa !0 %35 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0 %37 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %38 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %39 = fdiv float 1.000000e+00, %27 %40 = fsub float %37, %29 %41 = fmul float %40, %39 %42 = fsub float 1.000000e+00, %41 %43 = fsub float %38, %30 %44 = fmul float %43, %39 %45 = fsub float 1.000000e+00, %44 %46 = fmul float %42, %45 %47 = fmul float %46, %28 %48 = call float @llvm.AMDIL.clamp.(float %47, float 0.000000e+00, float 1.000000e+00) %49 = fmul float %48, %31 %50 = bitcast float %37 to i32 %51 = bitcast float %38 to i32 %52 = insertelement <2 x i32> undef, i32 %50, i32 0 %53 = insertelement <2 x i32> %52, i32 %51, i32 1 %54 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %53, <32 x i8> %34, <16 x i8> %36, i32 2) %55 = extractelement <4 x float> %54, i32 0 %56 = extractelement <4 x float> %54, i32 1 %57 = extractelement <4 x float> %54, i32 2 %58 = fmul float %55, %24 %59 = fmul float %56, %25 %60 = fmul float %57, %26 %61 = fmul float %58, %32 %62 = fmul float %59, %32 %63 = fmul float %60, %32 %64 = call i32 @llvm.SI.packf16(float %61, float %62) %65 = bitcast i32 %64 to float %66 = call i32 @llvm.SI.packf16(float %63, float %49) %67 = bitcast i32 %66 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %65, float %67, float %65, float %67) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_buffer_load_dword s5, s[0:3], 0x0 ; C2028100 s_buffer_load_dword s6, s[0:3], 0x1 ; C2030101 s_buffer_load_dword s7, s[0:3], 0x2 ; C2038102 s_buffer_load_dword s20, s[0:3], 0x10 ; C20A0110 s_buffer_load_dword s21, s[0:3], 0x14 ; C20A8114 image_sample v[4:6], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[8:11] ; F0800700 00430402 s_buffer_load_dword s8, s[0:3], 0xc ; C204010C s_buffer_load_dword s9, s[0:3], 0x18 ; C2048118 s_buffer_load_dword s0, s[0:3], 0x1c ; C200011C s_waitcnt lgkmcnt(0) ; BF8C007F v_rcp_f32_e32 v0, s4 ; 7E005404 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v1, s5, v4 ; 10020805 v_mul_f32_e32 v4, s6, v5 ; 10080A06 v_mul_f32_e32 v5, s7, v6 ; 100A0C07 v_subrev_f32_e32 v2, s20, v2 ; 0A040414 v_subrev_f32_e32 v3, s21, v3 ; 0A060615 v_mad_f32 v2, -v2, v0, 1.0 ; D2820002 23CA0102 v_mad_f32 v0, -v3, v0, 1.0 ; D2820000 23CA0103 v_mul_f32_e32 v0, v0, v2 ; 10000500 v_mul_f32_e32 v0, s8, v0 ; 10000008 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_mul_f32_e32 v0, s9, v0 ; 10000009 v_mul_f32_e32 v1, s0, v1 ; 10020200 v_mul_f32_e32 v2, s0, v4 ; 10040800 v_mul_f32_e32 v3, s0, v5 ; 10060A00 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_cvt_pkrtz_f16_f32_e32 v0, v3, v0 ; 5E000103 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 8 Code Size: 184 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[0..7] DCL CONST[9..12] DCL TEMP[0..6], LOCAL IMM[0] FLT32 { 0.2500, -1.0000, 10.0000, 0.4999} IMM[1] INT32 {256, 0, 1, 2} IMM[2] FLT32 { 1.0000, 0.0000, 0.1000, 0.0039} IMM[3] FLT32 { 16.0000, -8.0000, 4.0000, -2.0000} IMM[4] INT32 {4, 0, 0, 0} 0: MUL TEMP[0].x, IN[2].xxxx, IMM[0].xxxx 1: F2I TEMP[0].x, TEMP[0].xxxx 2: F2I TEMP[1].x, IN[2].yyyy 3: IDIV TEMP[2].x, TEMP[1].xxxx, IMM[1].xxxx 4: I2F TEMP[3].x, TEMP[0].xxxx 5: I2F TEMP[4].x, TEMP[2].xxxx 6: MOV TEMP[3].y, TEMP[4].xxxx 7: UMUL TEMP[2].x, TEMP[2].xxxx, IMM[1].xxxx 8: INEG TEMP[2].x, TEMP[2].xxxx 9: UADD TEMP[2].x, TEMP[1].xxxx, TEMP[2].xxxx 10: I2F TEMP[2].x, TEMP[2].xxxx 11: MOV TEMP[3].z, TEMP[2].xxxx 12: ADD TEMP[2].xyz, TEMP[3].xyzz, IMM[0].yyyy 13: I2F TEMP[1].x, TEMP[1].xxxx 14: ADD TEMP[1].x, IN[2].yyyy, -TEMP[1].xxxx 15: MAD TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz, IMM[0].wwww 16: F2I TEMP[1].x, TEMP[1].xxxx 17: USEQ TEMP[4].x, TEMP[1].xxxx, IMM[1].yyyy 18: AND TEMP[4].x, TEMP[4].xxxx, IMM[2].xxxx 19: USEQ TEMP[5].x, TEMP[1].xxxx, IMM[1].zzzz 20: AND TEMP[5].x, TEMP[5].xxxx, IMM[2].xxxx 21: MOV TEMP[4].y, TEMP[5].xxxx 22: USEQ TEMP[1].x, TEMP[1].xxxx, IMM[1].wwww 23: AND TEMP[1].x, TEMP[1].xxxx, IMM[2].xxxx 24: MOV TEMP[4].z, TEMP[1].xxxx 25: MOV TEMP[1].xyz, TEMP[4].xyzx 26: MOV TEMP[4].w, IMM[2].yyyy 27: MOV TEMP[4].xyz, TEMP[3].xyzx 28: MOV TEMP[3].y, IMM[2].yzyy 29: DP4 TEMP[4].x, TEMP[1], TEMP[4] 30: MUL TEMP[3].x, TEMP[4].xxxx, IMM[2].wwww 31: MOV TEMP[3].xy, TEMP[3].xyyy 32: MOV TEMP[3].w, IMM[2].yyyy 33: TXL TEMP[3].xy, TEMP[3], SAMP[0], 2D 34: MAD TEMP[4].x, TEMP[3].xxxx, IMM[3].xxxx, IMM[3].yyyy 35: MOV TEMP[2].w, TEMP[4].xxxx 36: MUL TEMP[3].x, TEMP[3].yyyy, IMM[3].zzzz 37: MOV TEMP[1].w, TEMP[3].xxxx 38: UMUL TEMP[0].x, IMM[4].xxxx, TEMP[0].xxxx 39: I2F TEMP[0].x, TEMP[0].xxxx 40: ADD TEMP[0].x, IN[2].xxxx, -TEMP[0].xxxx 41: ADD TEMP[0].x, TEMP[0].xxxx, IMM[3].wwww 42: MUL TEMP[3], CONST[9], IN[0].xxxx 43: MAD TEMP[3], CONST[10], IN[0].yyyy, TEMP[3] 44: MAD TEMP[3], CONST[11], IN[0].zzzz, TEMP[3] 45: MAD TEMP[3], CONST[12], IN[0].wwww, TEMP[3] 46: MOV TEMP[4].x, CONST[4].xxxx 47: MOV TEMP[4].y, CONST[5].xxxx 48: MOV TEMP[4].z, CONST[6].xxxx 49: MOV TEMP[5].x, CONST[4].yyyy 50: MOV TEMP[5].y, CONST[5].yyyy 51: MOV TEMP[5].z, CONST[6].yyyy 52: MOV TEMP[6].x, CONST[4].zzzz 53: MOV TEMP[6].y, CONST[5].zzzz 54: MOV TEMP[6].z, CONST[6].zzzz 55: MUL TEMP[4].xyz, TEMP[4].xyzz, IN[1].xxxx 56: MAD TEMP[4].xyz, TEMP[5].xyzz, IN[1].yyyy, TEMP[4].xyzz 57: MAD TEMP[0].xyz, TEMP[6].xyzz, TEMP[0].xxxx, TEMP[4].xyzz 58: DP3 TEMP[4].x, TEMP[0].xyzz, TEMP[0].xyzz 59: RSQ TEMP[4].x, TEMP[4].xxxx 60: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[4].xxxx 61: MOV TEMP[0].yzw, TEMP[0].yxyz 62: MUL TEMP[4], CONST[0], IN[0].xxxx 63: MAD TEMP[4], CONST[1], IN[0].yyyy, TEMP[4] 64: MAD TEMP[4], CONST[2], IN[0].zzzz, TEMP[4] 65: MAD TEMP[4].xyz, CONST[3], IN[0].wwww, TEMP[4] 66: MOV TEMP[4].xyz, TEMP[4].xyzx 67: MOV TEMP[0].x, TEMP[3].zzzz 68: MOV OUT[1], TEMP[2] 69: MOV OUT[4], TEMP[4] 70: MOV OUT[2], TEMP[1] 71: MOV OUT[3], TEMP[0] 72: MOV OUT[0], TEMP[3] 73: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 156) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 172) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 204) %53 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %54 = load <32 x i8>, <32 x i8> addrspace(2)* %53, align 32, !tbaa !0 %55 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %56 = load <16 x i8>, <16 x i8> addrspace(2)* %55, align 16, !tbaa !0 %57 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %58 = load <16 x i8>, <16 x i8> addrspace(2)* %57, align 16, !tbaa !0 %59 = add i32 %5, %7 %60 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %58, i32 0, i32 %59) %61 = extractelement <4 x float> %60, i32 0 %62 = extractelement <4 x float> %60, i32 1 %63 = extractelement <4 x float> %60, i32 2 %64 = extractelement <4 x float> %60, i32 3 %65 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %66 = load <16 x i8>, <16 x i8> addrspace(2)* %65, align 16, !tbaa !0 %67 = add i32 %5, %7 %68 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %66, i32 0, i32 %67) %69 = extractelement <4 x float> %68, i32 0 %70 = extractelement <4 x float> %68, i32 1 %71 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %72 = load <16 x i8>, <16 x i8> addrspace(2)* %71, align 16, !tbaa !0 %73 = add i32 %5, %7 %74 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %72, i32 0, i32 %73) %75 = extractelement <4 x float> %74, i32 0 %76 = extractelement <4 x float> %74, i32 1 %77 = fmul float %75, 2.500000e-01 %78 = fptosi float %77 to i32 %79 = fptosi float %76 to i32 %80 = sdiv i32 %79, 256 %81 = sitofp i32 %78 to float %82 = sitofp i32 %80 to float %83 = shl nsw i32 %80, 8 %84 = sub i32 %79, %83 %85 = sitofp i32 %84 to float %86 = fadd float %81, -1.000000e+00 %87 = fadd float %82, -1.000000e+00 %88 = fadd float %85, -1.000000e+00 %89 = sitofp i32 %79 to float %90 = fsub float %76, %89 %91 = fmul float %90, 1.000000e+01 %92 = fadd float %91, 0x3FDFFE5CA0000000 %93 = fptosi float %92 to i32 %94 = icmp eq i32 %93, 0 %95 = select i1 %94, float 1.000000e+00, float 0.000000e+00 %96 = icmp eq i32 %93, 1 %97 = select i1 %96, float 1.000000e+00, float 0.000000e+00 %98 = icmp eq i32 %93, 2 %99 = select i1 %98, float 1.000000e+00, float 0.000000e+00 %100 = fmul float %95, %81 %101 = fmul float %97, %82 %102 = fadd float %100, %101 %103 = fmul float %99, %85 %104 = fadd float %102, %103 %105 = fadd float %104, 0.000000e+00 %106 = fmul float %105, 0x3F70101020000000 %107 = bitcast float %106 to i32 %108 = insertelement <4 x i32> undef, i32 %107, i32 0 %109 = insertelement <4 x i32> %108, i32 1036831949, i32 1 %110 = insertelement <4 x i32> %109, i32 0, i32 2 %111 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %110, <32 x i8> %54, <16 x i8> %56, i32 2) %112 = extractelement <4 x float> %111, i32 0 %113 = extractelement <4 x float> %111, i32 1 %114 = fmul float %112, 1.600000e+01 %115 = fadd float %114, -8.000000e+00 %116 = fmul float %113, 4.000000e+00 %117 = shl i32 %78, 2 %118 = sitofp i32 %117 to float %119 = fsub float %75, %118 %120 = fadd float %119, -2.000000e+00 %121 = fmul float %37, %61 %122 = fmul float %38, %61 %123 = fmul float %39, %61 %124 = fmul float %40, %61 %125 = fmul float %41, %62 %126 = fadd float %125, %121 %127 = fmul float %42, %62 %128 = fadd float %127, %122 %129 = fmul float %43, %62 %130 = fadd float %129, %123 %131 = fmul float %44, %62 %132 = fadd float %131, %124 %133 = fmul float %45, %63 %134 = fadd float %133, %126 %135 = fmul float %46, %63 %136 = fadd float %135, %128 %137 = fmul float %47, %63 %138 = fadd float %137, %130 %139 = fmul float %48, %63 %140 = fadd float %139, %132 %141 = fmul float %49, %64 %142 = fadd float %141, %134 %143 = fmul float %50, %64 %144 = fadd float %143, %136 %145 = fmul float %51, %64 %146 = fadd float %145, %138 %147 = fmul float %52, %64 %148 = fadd float %147, %140 %149 = fmul float %28, %69 %150 = fmul float %31, %69 %151 = fmul float %34, %69 %152 = fmul float %29, %70 %153 = fadd float %152, %149 %154 = fmul float %32, %70 %155 = fadd float %154, %150 %156 = fmul float %35, %70 %157 = fadd float %156, %151 %158 = fmul float %30, %120 %159 = fadd float %158, %153 %160 = fmul float %33, %120 %161 = fadd float %160, %155 %162 = fmul float %36, %120 %163 = fadd float %162, %157 %164 = fmul float %159, %159 %165 = fmul float %161, %161 %166 = fadd float %165, %164 %167 = fmul float %163, %163 %168 = fadd float %166, %167 %169 = call float @llvm.AMDGPU.rsq.clamped.f32(float %168) %170 = fmul float %159, %169 %171 = fmul float %161, %169 %172 = fmul float %163, %169 %173 = fmul float %13, %61 %174 = fmul float %14, %61 %175 = fmul float %15, %61 %176 = fmul float %16, %61 %177 = fmul float %17, %62 %178 = fadd float %177, %173 %179 = fmul float %18, %62 %180 = fadd float %179, %174 %181 = fmul float %19, %62 %182 = fadd float %181, %175 %183 = fmul float %20, %62 %184 = fadd float %183, %176 %185 = fmul float %21, %63 %186 = fadd float %185, %178 %187 = fmul float %22, %63 %188 = fadd float %187, %180 %189 = fmul float %23, %63 %190 = fadd float %189, %182 %191 = fmul float %24, %63 %192 = fadd float %191, %184 %193 = fmul float %25, %64 %194 = fadd float %193, %186 %195 = fmul float %26, %64 %196 = fadd float %195, %188 %197 = fmul float %27, %64 %198 = fadd float %197, %190 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %86, float %87, float %88, float %115) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %95, float %97, float %99, float %116) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %146, float %170, float %171, float %172) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %194, float %196, float %198, float %192) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %142, float %144, float %146, float %148) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0x3efff2e5 ; 7E0202FF 3EFFF2E5 v_mov_b32_e32 v2, 0xc1000000 ; 7E0402FF C1000000 v_mov_b32_e32 v5, 0 ; 7E0A0280 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500 s_load_dwordx4 s[16:19], s[8:9], 0x0 ; C0880900 s_load_dwordx4 s[20:23], s[8:9], 0x4 ; C08A0904 s_load_dwordx4 s[24:27], s[8:9], 0x8 ; C08C0908 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s28, s[0:3], 0x10 ; C20E0110 s_buffer_load_dword s29, s[0:3], 0x11 ; C20E8111 buffer_load_format_xyzw v[6:9], v0, s[16:19], 0 idxen ; E00C2000 80040600 buffer_load_format_xyzw v[10:13], v0, s[20:23], 0 idxen ; E00C2000 80050A00 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[12:15], v0, s[24:27], 0 idxen ; E00C2000 80060C00 s_buffer_load_dword s16, s[0:3], 0x12 ; C2080112 s_buffer_load_dword s17, s[0:3], 0x14 ; C2088114 s_buffer_load_dword s18, s[0:3], 0x15 ; C2090115 s_buffer_load_dword s19, s[0:3], 0x16 ; C2098116 s_buffer_load_dword s20, s[0:3], 0x18 ; C20A0118 s_buffer_load_dword s21, s[0:3], 0x19 ; C20A8119 s_buffer_load_dword s22, s[0:3], 0x1a ; C20B011A s_buffer_load_dword s23, s[0:3], 0x24 ; C20B8124 s_buffer_load_dword s24, s[0:3], 0x25 ; C20C0125 s_buffer_load_dword s25, s[0:3], 0x26 ; C20C8126 s_buffer_load_dword s26, s[0:3], 0x27 ; C20D0127 s_buffer_load_dword s27, s[0:3], 0x28 ; C20D8128 s_buffer_load_dword s30, s[0:3], 0x29 ; C20F0129 s_buffer_load_dword s31, s[0:3], 0x0 ; C20F8100 s_buffer_load_dword s32, s[0:3], 0x1 ; C2100101 s_buffer_load_dword s33, s[0:3], 0x2 ; C2108102 s_buffer_load_dword s34, s[0:3], 0x3 ; C2110103 s_buffer_load_dword s35, s[0:3], 0x4 ; C2118104 s_buffer_load_dword s36, s[0:3], 0x2a ; C212012A s_buffer_load_dword s37, s[0:3], 0x2b ; C212812B s_buffer_load_dword s38, s[0:3], 0x2c ; C213012C s_buffer_load_dword s39, s[0:3], 0x2d ; C213812D s_buffer_load_dword s40, s[0:3], 0x2e ; C214012E s_buffer_load_dword s41, s[0:3], 0x5 ; C2148105 s_buffer_load_dword s42, s[0:3], 0x6 ; C2150106 s_buffer_load_dword s43, s[0:3], 0x7 ; C2158107 s_buffer_load_dword s44, s[0:3], 0x8 ; C2160108 s_buffer_load_dword s45, s[0:3], 0x9 ; C2168109 s_buffer_load_dword s46, s[0:3], 0xa ; C217010A s_buffer_load_dword s47, s[0:3], 0xb ; C217810B s_buffer_load_dword s48, s[0:3], 0xc ; C218010C s_buffer_load_dword s49, s[0:3], 0xd ; C218810D s_buffer_load_dword s50, s[0:3], 0xe ; C219010E s_buffer_load_dword s51, s[0:3], 0x2f ; C219812F s_buffer_load_dword s52, s[0:3], 0x30 ; C21A0130 s_buffer_load_dword s53, s[0:3], 0x31 ; C21A8131 s_buffer_load_dword s54, s[0:3], 0x32 ; C21B0132 s_buffer_load_dword s0, s[0:3], 0x33 ; C2000133 v_mul_f32_e32 v0, s28, v10 ; 1000141C s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v14, s17, v10 ; 101C1411 v_mul_f32_e32 v10, s20, v10 ; 10141414 v_mac_f32_e32 v0, s29, v11 ; 3E00161D v_mac_f32_e32 v14, s18, v11 ; 3E1C1612 v_mac_f32_e32 v10, s21, v11 ; 3E141615 v_mul_f32_e32 v11, s23, v6 ; 10160C17 v_mac_f32_e32 v11, s27, v7 ; 3E160E1B v_mul_f32_e32 v15, s24, v6 ; 101E0C18 v_mac_f32_e32 v15, s30, v7 ; 3E1E0E1E v_mul_f32_e32 v16, s25, v6 ; 10200C19 v_mac_f32_e32 v16, s36, v7 ; 3E200E24 v_mul_f32_e32 v17, s26, v6 ; 10220C1A v_mac_f32_e32 v17, s37, v7 ; 3E220E25 v_mul_f32_e32 v18, s31, v6 ; 10240C1F v_mac_f32_e32 v18, s35, v7 ; 3E240E23 v_mul_f32_e32 v19, s32, v6 ; 10260C20 v_mac_f32_e32 v19, s41, v7 ; 3E260E29 v_mul_f32_e32 v20, s33, v6 ; 10280C21 v_mac_f32_e32 v20, s42, v7 ; 3E280E2A v_mul_f32_e32 v6, s34, v6 ; 100C0C22 v_mac_f32_e32 v6, s43, v7 ; 3E0C0E2B v_mac_f32_e32 v11, s38, v8 ; 3E161026 v_mac_f32_e32 v15, s39, v8 ; 3E1E1027 v_mac_f32_e32 v16, s40, v8 ; 3E201028 v_mac_f32_e32 v17, s51, v8 ; 3E221033 v_mac_f32_e32 v18, s44, v8 ; 3E24102C v_mac_f32_e32 v19, s45, v8 ; 3E26102D v_mac_f32_e32 v20, s46, v8 ; 3E28102E v_mac_f32_e32 v6, s47, v8 ; 3E0C102F v_mac_f32_e32 v11, s52, v9 ; 3E161234 v_mac_f32_e32 v15, s53, v9 ; 3E1E1235 v_mac_f32_e32 v16, s54, v9 ; 3E201236 v_mul_f32_e32 v3, 0x3e800000, v12 ; 100618FF 3E800000 v_cvt_i32_f32_e32 v4, v13 ; 7E08110D v_cvt_i32_f32_e32 v3, v3 ; 7E061103 v_mac_f32_e32 v17, s0, v9 ; 3E221200 v_mac_f32_e32 v18, s48, v9 ; 3E241230 v_cvt_f32_i32_e32 v7, v4 ; 7E0E0B04 v_lshlrev_b32_e32 v8, 2, v3 ; 34100682 v_cvt_f32_i32_e32 v8, v8 ; 7E100B08 v_mac_f32_e32 v19, s49, v9 ; 3E261231 v_mac_f32_e32 v20, s50, v9 ; 3E281232 v_subrev_f32_e32 v7, v7, v13 ; 0A0E1B07 v_subrev_f32_e32 v8, v8, v12 ; 0A101908 v_madmk_f32_e32 v1, v7, v1, 0x41200000 ; 40020307 41200000 v_ashrrev_i32_e32 v7, 31, v4 ; 300E089F v_lshrrev_b32_e32 v7, 24, v7 ; 2C0E0E98 v_cvt_i32_f32_e32 v1, v1 ; 7E021101 v_add_i32_e32 v7, v4, v7 ; 4A0E0F04 v_and_b32_e32 v9, 0xffffff00, v7 ; 36120EFF FFFFFF00 v_sub_i32_e32 v4, v4, v9 ; 4C081304 v_cmp_eq_i32_e32 vcc, 0, v1 ; 7D040280 v_cndmask_b32_e64 v9, 0, 1.0, vcc ; D2000009 01A9E480 v_cvt_f32_i32_e32 v12, v3 ; 7E180B03 v_ashrrev_i32_e32 v3, 8, v7 ; 30060E88 v_cvt_f32_i32_e32 v7, v3 ; 7E0E0B03 v_cmp_eq_i32_e32 vcc, 1, v1 ; 7D040281 v_cmp_eq_i32_e64 s[0:1], 2, v1 ; D1040000 00020282 v_cndmask_b32_e64 v1, 0, 1.0, vcc ; D2000001 01A9E480 v_cvt_f32_i32_e32 v13, v4 ; 7E1A0B04 v_mul_f32_e32 v3, v7, v1 ; 10060307 v_mac_f32_e32 v3, v12, v9 ; 3E06130C v_cndmask_b32_e64 v21, 0, 1.0, s[0:1] ; D2000015 0001E480 v_mac_f32_e32 v3, v13, v21 ; 3E062B0D v_add_f32_e32 v3, 0, v3 ; 06060680 v_mul_f32_e32 v3, 0x3b808081, v3 ; 100606FF 3B808081 v_mov_b32_e32 v4, 0x3dcccccd ; 7E0802FF 3DCCCCCD image_sample_l v[3:4], 3, 0, 0, 0, 0, 0, 0, 0, v[3:6], s[4:11], s[12:15] ; F0900300 00610303 v_add_f32_e32 v5, -2.0, v8 ; 060A10F5 v_mac_f32_e32 v0, s16, v5 ; 3E000A10 v_mac_f32_e32 v14, s19, v5 ; 3E1C0A13 v_mac_f32_e32 v10, s22, v5 ; 3E140A16 v_add_f32_e32 v5, -1.0, v12 ; 060A18F3 v_add_f32_e32 v7, -1.0, v7 ; 060E0EF3 v_mul_f32_e32 v8, v0, v0 ; 10100100 v_mac_f32_e32 v8, v14, v14 ; 3E101D0E v_mac_f32_e32 v8, v10, v10 ; 3E10150A v_rsq_clamp_f32_e32 v8, v8 ; 7E105908 v_add_f32_e32 v12, -1.0, v13 ; 06181AF3 s_waitcnt vmcnt(0) ; BF8C0770 v_madmk_f32_e32 v2, v3, v2, 0x41800000 ; 40040503 41800000 exp 15, 32, 0, 0, 0, v5, v7, v12, v2 ; F800020F 020C0705 v_mul_f32_e32 v0, v8, v0 ; 10000108 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v2, v8, v14 ; 10041D08 v_mul_f32_e32 v3, 4.0, v4 ; 100608F6 v_mul_f32_e32 v4, v8, v10 ; 10081508 exp 15, 33, 0, 0, 0, v9, v1, v21, v3 ; F800021F 03150109 exp 15, 34, 0, 0, 0, v16, v0, v2, v4 ; F800022F 04020010 exp 15, 35, 0, 0, 0, v18, v19, v20, v6 ; F800023F 06141312 exp 15, 12, 0, 1, 0, v11, v15, v16, v17 ; F80008CF 11100F0B s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 64 VGPRS: 24 Code Size: 680 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL SAMP[7] DCL SAMP[8] DCL SAMP[9] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL SVIEW[4], 2D, FLOAT DCL SVIEW[5], 2D, FLOAT DCL SVIEW[6], 2D, FLOAT DCL SVIEW[7], 2D, FLOAT DCL SVIEW[8], 2D, FLOAT DCL SVIEW[9], 2D, FLOAT DCL CONST[0..3] DCL CONST[14..22] DCL TEMP[0..36], LOCAL IMM[0] FLT32 { -0.2000, 7.0000, 0.0100, 0.5000} IMM[1] FLT32 { 64.0000, -64.0000, 4.0000, 0.6931} IMM[2] FLT32 { 0.0039, 0.0020, 1.0000, 2.0000} IMM[3] FLT32 { 3.0000, 0.0000, -1.0000, 0.0001} IMM[4] FLT32 { 32.0000, 1.0000, 0.0000, 0.0000} 0: DP3 TEMP[0].x, CONST[1].xyzz, CONST[1].xyzz 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MUL TEMP[0].xyz, CONST[1].xyzz, TEMP[0].xxxx 3: ADD TEMP[1].xyz, CONST[0].xyzz, -IN[3].xyzz 4: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz 5: RSQ TEMP[2].x, TEMP[2].xxxx 6: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx 7: ABS TEMP[2].xyz, IN[2].yzww 8: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz 9: RSQ TEMP[3].x, TEMP[3].xxxx 10: MAD TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx, IMM[0].xxxx 11: MUL TEMP[2].xyz, TEMP[2].xyzz, IMM[0].yyyy 12: MAX TEMP[2].xyz, TEMP[2].xyzz, IMM[0].zzzz 13: ADD TEMP[3].x, TEMP[2].xxxx, TEMP[2].yyyy 14: ADD TEMP[3].x, TEMP[3].xxxx, TEMP[2].zzzz 15: RCP TEMP[3].xyz, TEMP[3].xxxx 16: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xyzz 17: ADD TEMP[3], IN[0], IMM[0].wwww 18: FLR TEMP[3].xyz, TEMP[3] 19: MOV TEMP[4].x, CONST[14].xxxx 20: MUL TEMP[5].x, TEMP[3].xxxx, CONST[14].xxxx 21: MOV TEMP[6].x, TEMP[5].xxxx 22: FLR TEMP[5].x, TEMP[5].xxxx 23: MUL TEMP[5].x, TEMP[5].xxxx, CONST[14].xxxx 24: FSGE TEMP[7].x, TEMP[3].xxxx, IMM[1].xxxx 25: UIF TEMP[7].xxxx :0 26: MOV TEMP[4].x, CONST[15].xxxx 27: ADD TEMP[7].x, TEMP[3].xxxx, IMM[1].yyyy 28: MUL TEMP[7].x, TEMP[7].xxxx, CONST[15].xxxx 29: MOV TEMP[6].x, TEMP[7].xxxx 30: FLR TEMP[8].x, TEMP[7].xxxx 31: MUL TEMP[8].x, TEMP[8].xxxx, CONST[15].xxxx 32: MOV TEMP[5].x, TEMP[8].xxxx 33: FRC TEMP[7].x, TEMP[7].xxxx 34: FRC TEMP[9].x, TEMP[8].xxxx 35: MOV TEMP[7].y, TEMP[9].xxxx 36: FLR TEMP[8].x, TEMP[8].xxxx 37: ADD TEMP[8].x, TEMP[8].xxxx, IMM[1].zzzz 38: MOV TEMP[7].z, TEMP[8].xxxx 39: MOV TEMP[7].xyz, TEMP[7].xyzx 40: ELSE :0 41: FRC TEMP[6].x, TEMP[6].xxxx 42: FRC TEMP[8].x, TEMP[5].xxxx 43: MOV TEMP[6].y, TEMP[8].xxxx 44: FLR TEMP[5].x, TEMP[5].xxxx 45: MOV TEMP[6].z, TEMP[5].xxxx 46: MOV TEMP[7].xyz, TEMP[6].xyzx 47: ENDIF 48: MOV TEMP[5].x, CONST[14].xxxx 49: MUL TEMP[6].x, TEMP[3].yyyy, CONST[14].xxxx 50: MOV TEMP[8].x, TEMP[6].xxxx 51: FLR TEMP[6].x, TEMP[6].xxxx 52: MUL TEMP[6].x, TEMP[6].xxxx, CONST[14].xxxx 53: FSGE TEMP[9].x, TEMP[3].yyyy, IMM[1].xxxx 54: UIF TEMP[9].xxxx :0 55: MOV TEMP[5].x, CONST[15].xxxx 56: ADD TEMP[9].x, TEMP[3].yyyy, IMM[1].yyyy 57: MUL TEMP[9].x, TEMP[9].xxxx, CONST[15].xxxx 58: MOV TEMP[8].x, TEMP[9].xxxx 59: FLR TEMP[10].x, TEMP[9].xxxx 60: MUL TEMP[10].x, TEMP[10].xxxx, CONST[15].xxxx 61: MOV TEMP[6].x, TEMP[10].xxxx 62: FRC TEMP[9].x, TEMP[9].xxxx 63: FRC TEMP[11].x, TEMP[10].xxxx 64: MOV TEMP[9].y, TEMP[11].xxxx 65: FLR TEMP[10].x, TEMP[10].xxxx 66: ADD TEMP[10].x, TEMP[10].xxxx, IMM[1].zzzz 67: MOV TEMP[9].z, TEMP[10].xxxx 68: MOV TEMP[9].xyz, TEMP[9].xyzx 69: ELSE :0 70: FRC TEMP[8].x, TEMP[8].xxxx 71: FRC TEMP[10].x, TEMP[6].xxxx 72: MOV TEMP[8].y, TEMP[10].xxxx 73: FLR TEMP[6].x, TEMP[6].xxxx 74: MOV TEMP[8].z, TEMP[6].xxxx 75: MOV TEMP[9].xyz, TEMP[8].xyzx 76: ENDIF 77: MOV TEMP[6].x, CONST[14].xxxx 78: MUL TEMP[8].x, TEMP[3].zzzz, CONST[14].xxxx 79: MOV TEMP[10].x, TEMP[8].xxxx 80: FLR TEMP[8].x, TEMP[8].xxxx 81: MUL TEMP[8].x, TEMP[8].xxxx, CONST[14].xxxx 82: FSGE TEMP[11].x, TEMP[3].zzzz, IMM[1].xxxx 83: UIF TEMP[11].xxxx :0 84: MOV TEMP[6].x, CONST[15].xxxx 85: ADD TEMP[3].x, TEMP[3].zzzz, IMM[1].yyyy 86: MUL TEMP[3].x, TEMP[3].xxxx, CONST[15].xxxx 87: MOV TEMP[10].x, TEMP[3].xxxx 88: FLR TEMP[11].x, TEMP[3].xxxx 89: MUL TEMP[11].x, TEMP[11].xxxx, CONST[15].xxxx 90: MOV TEMP[8].x, TEMP[11].xxxx 91: FRC TEMP[3].x, TEMP[3].xxxx 92: FRC TEMP[12].x, TEMP[11].xxxx 93: MOV TEMP[3].y, TEMP[12].xxxx 94: FLR TEMP[11].x, TEMP[11].xxxx 95: ADD TEMP[11].x, TEMP[11].xxxx, IMM[1].zzzz 96: MOV TEMP[3].z, TEMP[11].xxxx 97: MOV TEMP[3].xyz, TEMP[3].xyzx 98: ELSE :0 99: FRC TEMP[10].x, TEMP[10].xxxx 100: FRC TEMP[11].x, TEMP[8].xxxx 101: MOV TEMP[10].y, TEMP[11].xxxx 102: FLR TEMP[8].x, TEMP[8].xxxx 103: MOV TEMP[10].z, TEMP[8].xxxx 104: MOV TEMP[3].xyz, TEMP[10].xyzx 105: ENDIF 106: ADD TEMP[8].xyz, IN[3].xyzz, -CONST[0].xyzz 107: DP3 TEMP[8].x, TEMP[8].xyzz, TEMP[8].xyzz 108: MUL TEMP[8].x, CONST[20].xxxx, TEMP[8].xxxx 109: LG2 TEMP[8].x, TEMP[8].xxxx 110: MUL TEMP[8].x, TEMP[8].xxxx, IMM[1].wwww 111: MUL TEMP[8].x, TEMP[8].xxxx, CONST[19].xxxx 112: MOV TEMP[10].xy, IN[3].xyxx 113: MOV TEMP[11].x, IMM[2].xxxx 114: FSNE TEMP[12].x, CONST[14].xxxx, TEMP[4].xxxx 115: UIF TEMP[12].xxxx :0 116: MOV TEMP[11].x, IMM[2].yyyy 117: RCP TEMP[12].x, CONST[17].xxxx 118: MUL TEMP[10].xy, IN[3].xyyy, TEMP[12].xxxx 119: ELSE :0 120: RCP TEMP[12].x, CONST[16].xxxx 121: MUL TEMP[10].xy, TEMP[10].xyyy, TEMP[12].xxxx 122: ENDIF 123: FRC TEMP[10].xy, TEMP[10].xyyy 124: MUL TEMP[12].x, CONST[18].xxxx, IMM[2].wwww 125: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[11].xxxx 126: ADD TEMP[12].x, IMM[2].zzzz, -TEMP[12].xxxx 127: MUL TEMP[11].x, TEMP[11].xxxx, CONST[18].xxxx 128: MAD TEMP[10].xy, TEMP[10].xyyy, TEMP[12].xxxx, TEMP[11].xxxx 129: MAD TEMP[10].xy, TEMP[10].xyyy, TEMP[4].xxxx, TEMP[7].xyyy 130: MOV TEMP[11].xy, TEMP[10].xyyy 131: MOV TEMP[11].w, TEMP[8].xxxx 132: TXL TEMP[11], TEMP[11], SAMP[8], 2D 133: FSEQ TEMP[12].x, TEMP[7].zzzz, IMM[1].zzzz 134: AND TEMP[12].x, TEMP[12].xxxx, IMM[2].zzzz 135: MOV TEMP[13].xy, TEMP[10].xyyy 136: MOV TEMP[13].w, TEMP[8].xxxx 137: TXL TEMP[13], TEMP[13], SAMP[6], 2D 138: FSEQ TEMP[14].x, TEMP[7].zzzz, IMM[3].xxxx 139: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz 140: MOV TEMP[15].xy, TEMP[10].xyyy 141: MOV TEMP[15].w, TEMP[8].xxxx 142: TXL TEMP[15], TEMP[15], SAMP[4], 2D 143: FSEQ TEMP[16].x, TEMP[7].zzzz, IMM[2].wwww 144: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz 145: MOV TEMP[17].xy, TEMP[10].xyyy 146: MOV TEMP[17].w, TEMP[8].xxxx 147: TXL TEMP[17], TEMP[17], SAMP[2], 2D 148: FSEQ TEMP[18].x, TEMP[7].zzzz, IMM[2].zzzz 149: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz 150: MOV TEMP[10].xy, TEMP[10].xyyy 151: MOV TEMP[10].w, TEMP[8].xxxx 152: TXL TEMP[10], TEMP[10], SAMP[0], 2D 153: FSEQ TEMP[19].x, TEMP[7].zzzz, IMM[3].yyyy 154: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz 155: MUL TEMP[10], TEMP[10], TEMP[19].xxxx 156: MAD TEMP[10], TEMP[17], TEMP[18].xxxx, TEMP[10] 157: MAD TEMP[10], TEMP[15], TEMP[16].xxxx, TEMP[10] 158: MAD TEMP[10], TEMP[13], TEMP[14].xxxx, TEMP[10] 159: MAD TEMP[10], TEMP[11], TEMP[12].xxxx, TEMP[10] 160: MOV TEMP[11].xy, IN[3].zyzz 161: MOV TEMP[12].x, IMM[2].xxxx 162: FSNE TEMP[13].x, CONST[14].xxxx, TEMP[4].xxxx 163: UIF TEMP[13].xxxx :0 164: MOV TEMP[12].x, IMM[2].yyyy 165: RCP TEMP[13].x, CONST[17].xxxx 166: MUL TEMP[11].xy, IN[3].zyyy, TEMP[13].xxxx 167: ELSE :0 168: RCP TEMP[13].x, CONST[16].xxxx 169: MUL TEMP[11].xy, TEMP[11].xyyy, TEMP[13].xxxx 170: ENDIF 171: FRC TEMP[11].xy, TEMP[11].xyyy 172: MUL TEMP[13].x, CONST[18].xxxx, IMM[2].wwww 173: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[12].xxxx 174: ADD TEMP[13].x, IMM[2].zzzz, -TEMP[13].xxxx 175: MUL TEMP[12].x, TEMP[12].xxxx, CONST[18].xxxx 176: MAD TEMP[11].xy, TEMP[11].xyyy, TEMP[13].xxxx, TEMP[12].xxxx 177: MAD TEMP[11].xy, TEMP[11].xyyy, TEMP[4].xxxx, TEMP[7].xyyy 178: MOV TEMP[12].xy, TEMP[11].xyyy 179: MOV TEMP[12].w, TEMP[8].xxxx 180: TXL TEMP[12], TEMP[12], SAMP[8], 2D 181: FSEQ TEMP[13].x, TEMP[7].zzzz, IMM[1].zzzz 182: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz 183: MOV TEMP[14].xy, TEMP[11].xyyy 184: MOV TEMP[14].w, TEMP[8].xxxx 185: TXL TEMP[14], TEMP[14], SAMP[6], 2D 186: FSEQ TEMP[15].x, TEMP[7].zzzz, IMM[3].xxxx 187: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz 188: MOV TEMP[16].xy, TEMP[11].xyyy 189: MOV TEMP[16].w, TEMP[8].xxxx 190: TXL TEMP[16], TEMP[16], SAMP[4], 2D 191: FSEQ TEMP[17].x, TEMP[7].zzzz, IMM[2].wwww 192: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz 193: MOV TEMP[18].xy, TEMP[11].xyyy 194: MOV TEMP[18].w, TEMP[8].xxxx 195: TXL TEMP[18], TEMP[18], SAMP[2], 2D 196: FSEQ TEMP[19].x, TEMP[7].zzzz, IMM[2].zzzz 197: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz 198: MOV TEMP[11].xy, TEMP[11].xyyy 199: MOV TEMP[11].w, TEMP[8].xxxx 200: TXL TEMP[11], TEMP[11], SAMP[0], 2D 201: FSEQ TEMP[20].x, TEMP[7].zzzz, IMM[3].yyyy 202: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz 203: MUL TEMP[11], TEMP[11], TEMP[20].xxxx 204: MAD TEMP[11], TEMP[18], TEMP[19].xxxx, TEMP[11] 205: MAD TEMP[11], TEMP[16], TEMP[17].xxxx, TEMP[11] 206: MAD TEMP[11], TEMP[14], TEMP[15].xxxx, TEMP[11] 207: MAD TEMP[11], TEMP[12], TEMP[13].xxxx, TEMP[11] 208: MOV TEMP[12].xy, IN[3].zxzz 209: MOV TEMP[13].x, IMM[2].xxxx 210: FSNE TEMP[14].x, CONST[14].xxxx, TEMP[4].xxxx 211: UIF TEMP[14].xxxx :0 212: MOV TEMP[13].x, IMM[2].yyyy 213: RCP TEMP[14].x, CONST[17].xxxx 214: MUL TEMP[12].xy, IN[3].zxxx, TEMP[14].xxxx 215: ELSE :0 216: RCP TEMP[14].x, CONST[16].xxxx 217: MUL TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx 218: ENDIF 219: FRC TEMP[12].xy, TEMP[12].xyyy 220: MUL TEMP[14].x, CONST[18].xxxx, IMM[2].wwww 221: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[13].xxxx 222: ADD TEMP[14].x, IMM[2].zzzz, -TEMP[14].xxxx 223: MUL TEMP[13].x, TEMP[13].xxxx, CONST[18].xxxx 224: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx, TEMP[13].xxxx 225: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[4].xxxx, TEMP[7].xyyy 226: MOV TEMP[13].xy, TEMP[12].xyyy 227: MOV TEMP[13].w, TEMP[8].xxxx 228: TXL TEMP[13], TEMP[13], SAMP[8], 2D 229: FSEQ TEMP[14].x, TEMP[7].zzzz, IMM[1].zzzz 230: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz 231: MOV TEMP[15].xy, TEMP[12].xyyy 232: MOV TEMP[15].w, TEMP[8].xxxx 233: TXL TEMP[15], TEMP[15], SAMP[6], 2D 234: FSEQ TEMP[16].x, TEMP[7].zzzz, IMM[3].xxxx 235: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz 236: MOV TEMP[17].xy, TEMP[12].xyyy 237: MOV TEMP[17].w, TEMP[8].xxxx 238: TXL TEMP[17], TEMP[17], SAMP[4], 2D 239: FSEQ TEMP[18].x, TEMP[7].zzzz, IMM[2].wwww 240: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz 241: MOV TEMP[19].xy, TEMP[12].xyyy 242: MOV TEMP[19].w, TEMP[8].xxxx 243: TXL TEMP[19], TEMP[19], SAMP[2], 2D 244: FSEQ TEMP[20].x, TEMP[7].zzzz, IMM[2].zzzz 245: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz 246: MOV TEMP[12].xy, TEMP[12].xyyy 247: MOV TEMP[12].w, TEMP[8].xxxx 248: TXL TEMP[12], TEMP[12], SAMP[0], 2D 249: FSEQ TEMP[21].x, TEMP[7].zzzz, IMM[3].yyyy 250: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz 251: MUL TEMP[12], TEMP[12], TEMP[21].xxxx 252: MAD TEMP[12], TEMP[19], TEMP[20].xxxx, TEMP[12] 253: MAD TEMP[12], TEMP[17], TEMP[18].xxxx, TEMP[12] 254: MAD TEMP[12], TEMP[15], TEMP[16].xxxx, TEMP[12] 255: MAD TEMP[12], TEMP[13], TEMP[14].xxxx, TEMP[12] 256: MOV TEMP[13].xy, IN[3].xyxx 257: MOV TEMP[14].x, IMM[2].xxxx 258: FSNE TEMP[15].x, CONST[14].xxxx, TEMP[5].xxxx 259: UIF TEMP[15].xxxx :0 260: MOV TEMP[14].x, IMM[2].yyyy 261: RCP TEMP[15].x, CONST[17].xxxx 262: MUL TEMP[13].xy, IN[3].xyyy, TEMP[15].xxxx 263: ELSE :0 264: RCP TEMP[15].x, CONST[16].xxxx 265: MUL TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx 266: ENDIF 267: FRC TEMP[13].xy, TEMP[13].xyyy 268: MUL TEMP[15].x, CONST[18].xxxx, IMM[2].wwww 269: MUL TEMP[15].x, TEMP[15].xxxx, TEMP[14].xxxx 270: ADD TEMP[15].x, IMM[2].zzzz, -TEMP[15].xxxx 271: MUL TEMP[14].x, TEMP[14].xxxx, CONST[18].xxxx 272: MAD TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx, TEMP[14].xxxx 273: MAD TEMP[13].xy, TEMP[13].xyyy, TEMP[5].xxxx, TEMP[9].xyyy 274: MOV TEMP[14].xy, TEMP[13].xyyy 275: MOV TEMP[14].w, TEMP[8].xxxx 276: TXL TEMP[14], TEMP[14], SAMP[8], 2D 277: FSEQ TEMP[15].x, TEMP[9].zzzz, IMM[1].zzzz 278: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz 279: MOV TEMP[16].xy, TEMP[13].xyyy 280: MOV TEMP[16].w, TEMP[8].xxxx 281: TXL TEMP[16], TEMP[16], SAMP[6], 2D 282: FSEQ TEMP[17].x, TEMP[9].zzzz, IMM[3].xxxx 283: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz 284: MOV TEMP[18].xy, TEMP[13].xyyy 285: MOV TEMP[18].w, TEMP[8].xxxx 286: TXL TEMP[18], TEMP[18], SAMP[4], 2D 287: FSEQ TEMP[19].x, TEMP[9].zzzz, IMM[2].wwww 288: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz 289: MOV TEMP[20].xy, TEMP[13].xyyy 290: MOV TEMP[20].w, TEMP[8].xxxx 291: TXL TEMP[20], TEMP[20], SAMP[2], 2D 292: FSEQ TEMP[21].x, TEMP[9].zzzz, IMM[2].zzzz 293: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz 294: MOV TEMP[13].xy, TEMP[13].xyyy 295: MOV TEMP[13].w, TEMP[8].xxxx 296: TXL TEMP[13], TEMP[13], SAMP[0], 2D 297: FSEQ TEMP[22].x, TEMP[9].zzzz, IMM[3].yyyy 298: AND TEMP[22].x, TEMP[22].xxxx, IMM[2].zzzz 299: MUL TEMP[13], TEMP[13], TEMP[22].xxxx 300: MAD TEMP[13], TEMP[20], TEMP[21].xxxx, TEMP[13] 301: MAD TEMP[13], TEMP[18], TEMP[19].xxxx, TEMP[13] 302: MAD TEMP[13], TEMP[16], TEMP[17].xxxx, TEMP[13] 303: MAD TEMP[13], TEMP[14], TEMP[15].xxxx, TEMP[13] 304: MOV TEMP[14].xy, IN[3].zyzz 305: MOV TEMP[15].x, IMM[2].xxxx 306: FSNE TEMP[16].x, CONST[14].xxxx, TEMP[5].xxxx 307: UIF TEMP[16].xxxx :0 308: MOV TEMP[15].x, IMM[2].yyyy 309: RCP TEMP[16].x, CONST[17].xxxx 310: MUL TEMP[14].xy, IN[3].zyyy, TEMP[16].xxxx 311: ELSE :0 312: RCP TEMP[16].x, CONST[16].xxxx 313: MUL TEMP[14].xy, TEMP[14].xyyy, TEMP[16].xxxx 314: ENDIF 315: FRC TEMP[14].xy, TEMP[14].xyyy 316: MUL TEMP[16].x, CONST[18].xxxx, IMM[2].wwww 317: MUL TEMP[16].x, TEMP[16].xxxx, TEMP[15].xxxx 318: ADD TEMP[16].x, IMM[2].zzzz, -TEMP[16].xxxx 319: MUL TEMP[15].x, TEMP[15].xxxx, CONST[18].xxxx 320: MAD TEMP[14].xy, TEMP[14].xyyy, TEMP[16].xxxx, TEMP[15].xxxx 321: MAD TEMP[14].xy, TEMP[14].xyyy, TEMP[5].xxxx, TEMP[9].xyyy 322: MOV TEMP[15].xy, TEMP[14].xyyy 323: MOV TEMP[15].w, TEMP[8].xxxx 324: TXL TEMP[15], TEMP[15], SAMP[8], 2D 325: FSEQ TEMP[16].x, TEMP[9].zzzz, IMM[1].zzzz 326: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz 327: MOV TEMP[17].xy, TEMP[14].xyyy 328: MOV TEMP[17].w, TEMP[8].xxxx 329: TXL TEMP[17], TEMP[17], SAMP[6], 2D 330: FSEQ TEMP[18].x, TEMP[9].zzzz, IMM[3].xxxx 331: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz 332: MOV TEMP[19].xy, TEMP[14].xyyy 333: MOV TEMP[19].w, TEMP[8].xxxx 334: TXL TEMP[19], TEMP[19], SAMP[4], 2D 335: FSEQ TEMP[20].x, TEMP[9].zzzz, IMM[2].wwww 336: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz 337: MOV TEMP[21].xy, TEMP[14].xyyy 338: MOV TEMP[21].w, TEMP[8].xxxx 339: TXL TEMP[21], TEMP[21], SAMP[2], 2D 340: FSEQ TEMP[22].x, TEMP[9].zzzz, IMM[2].zzzz 341: AND TEMP[22].x, TEMP[22].xxxx, IMM[2].zzzz 342: MOV TEMP[14].xy, TEMP[14].xyyy 343: MOV TEMP[14].w, TEMP[8].xxxx 344: TXL TEMP[14], TEMP[14], SAMP[0], 2D 345: FSEQ TEMP[23].x, TEMP[9].zzzz, IMM[3].yyyy 346: AND TEMP[23].x, TEMP[23].xxxx, IMM[2].zzzz 347: MUL TEMP[14], TEMP[14], TEMP[23].xxxx 348: MAD TEMP[14], TEMP[21], TEMP[22].xxxx, TEMP[14] 349: MAD TEMP[14], TEMP[19], TEMP[20].xxxx, TEMP[14] 350: MAD TEMP[14], TEMP[17], TEMP[18].xxxx, TEMP[14] 351: MAD TEMP[14], TEMP[15], TEMP[16].xxxx, TEMP[14] 352: MOV TEMP[15].xy, IN[3].zxzz 353: MOV TEMP[16].x, IMM[2].xxxx 354: FSNE TEMP[17].x, CONST[14].xxxx, TEMP[5].xxxx 355: UIF TEMP[17].xxxx :0 356: MOV TEMP[16].x, IMM[2].yyyy 357: RCP TEMP[17].x, CONST[17].xxxx 358: MUL TEMP[15].xy, IN[3].zxxx, TEMP[17].xxxx 359: ELSE :0 360: RCP TEMP[17].x, CONST[16].xxxx 361: MUL TEMP[15].xy, TEMP[15].xyyy, TEMP[17].xxxx 362: ENDIF 363: FRC TEMP[15].xy, TEMP[15].xyyy 364: MUL TEMP[17].x, CONST[18].xxxx, IMM[2].wwww 365: MUL TEMP[17].x, TEMP[17].xxxx, TEMP[16].xxxx 366: ADD TEMP[17].x, IMM[2].zzzz, -TEMP[17].xxxx 367: MUL TEMP[16].x, TEMP[16].xxxx, CONST[18].xxxx 368: MAD TEMP[15].xy, TEMP[15].xyyy, TEMP[17].xxxx, TEMP[16].xxxx 369: MAD TEMP[15].xy, TEMP[15].xyyy, TEMP[5].xxxx, TEMP[9].xyyy 370: MOV TEMP[16].xy, TEMP[15].xyyy 371: MOV TEMP[16].w, TEMP[8].xxxx 372: TXL TEMP[16], TEMP[16], SAMP[8], 2D 373: FSEQ TEMP[17].x, TEMP[9].zzzz, IMM[1].zzzz 374: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz 375: MOV TEMP[18].xy, TEMP[15].xyyy 376: MOV TEMP[18].w, TEMP[8].xxxx 377: TXL TEMP[18], TEMP[18], SAMP[6], 2D 378: FSEQ TEMP[19].x, TEMP[9].zzzz, IMM[3].xxxx 379: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz 380: MOV TEMP[20].xy, TEMP[15].xyyy 381: MOV TEMP[20].w, TEMP[8].xxxx 382: TXL TEMP[20], TEMP[20], SAMP[4], 2D 383: FSEQ TEMP[21].x, TEMP[9].zzzz, IMM[2].wwww 384: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz 385: MOV TEMP[22].xy, TEMP[15].xyyy 386: MOV TEMP[22].w, TEMP[8].xxxx 387: TXL TEMP[22], TEMP[22], SAMP[2], 2D 388: FSEQ TEMP[23].x, TEMP[9].zzzz, IMM[2].zzzz 389: AND TEMP[23].x, TEMP[23].xxxx, IMM[2].zzzz 390: MOV TEMP[15].xy, TEMP[15].xyyy 391: MOV TEMP[15].w, TEMP[8].xxxx 392: TXL TEMP[15], TEMP[15], SAMP[0], 2D 393: FSEQ TEMP[24].x, TEMP[9].zzzz, IMM[3].yyyy 394: AND TEMP[24].x, TEMP[24].xxxx, IMM[2].zzzz 395: MUL TEMP[15], TEMP[15], TEMP[24].xxxx 396: MAD TEMP[15], TEMP[22], TEMP[23].xxxx, TEMP[15] 397: MAD TEMP[15], TEMP[20], TEMP[21].xxxx, TEMP[15] 398: MAD TEMP[15], TEMP[18], TEMP[19].xxxx, TEMP[15] 399: MAD TEMP[15], TEMP[16], TEMP[17].xxxx, TEMP[15] 400: MOV TEMP[16].xy, IN[3].xyxx 401: MOV TEMP[17].x, IMM[2].xxxx 402: FSNE TEMP[18].x, CONST[14].xxxx, TEMP[6].xxxx 403: UIF TEMP[18].xxxx :0 404: MOV TEMP[17].x, IMM[2].yyyy 405: RCP TEMP[18].x, CONST[17].xxxx 406: MUL TEMP[16].xy, IN[3].xyyy, TEMP[18].xxxx 407: ELSE :0 408: RCP TEMP[18].x, CONST[16].xxxx 409: MUL TEMP[16].xy, TEMP[16].xyyy, TEMP[18].xxxx 410: ENDIF 411: FRC TEMP[16].xy, TEMP[16].xyyy 412: MUL TEMP[18].x, CONST[18].xxxx, IMM[2].wwww 413: MUL TEMP[18].x, TEMP[18].xxxx, TEMP[17].xxxx 414: ADD TEMP[18].x, IMM[2].zzzz, -TEMP[18].xxxx 415: MUL TEMP[17].x, TEMP[17].xxxx, CONST[18].xxxx 416: MAD TEMP[16].xy, TEMP[16].xyyy, TEMP[18].xxxx, TEMP[17].xxxx 417: MAD TEMP[16].xy, TEMP[16].xyyy, TEMP[6].xxxx, TEMP[3].xyyy 418: MOV TEMP[17].xy, TEMP[16].xyyy 419: MOV TEMP[17].w, TEMP[8].xxxx 420: TXL TEMP[17], TEMP[17], SAMP[8], 2D 421: FSEQ TEMP[18].x, TEMP[3].zzzz, IMM[1].zzzz 422: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz 423: MOV TEMP[19].xy, TEMP[16].xyyy 424: MOV TEMP[19].w, TEMP[8].xxxx 425: TXL TEMP[19], TEMP[19], SAMP[6], 2D 426: FSEQ TEMP[20].x, TEMP[3].zzzz, IMM[3].xxxx 427: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz 428: MOV TEMP[21].xy, TEMP[16].xyyy 429: MOV TEMP[21].w, TEMP[8].xxxx 430: TXL TEMP[21], TEMP[21], SAMP[4], 2D 431: FSEQ TEMP[22].x, TEMP[3].zzzz, IMM[2].wwww 432: AND TEMP[22].x, TEMP[22].xxxx, IMM[2].zzzz 433: MOV TEMP[23].xy, TEMP[16].xyyy 434: MOV TEMP[23].w, TEMP[8].xxxx 435: TXL TEMP[23], TEMP[23], SAMP[2], 2D 436: FSEQ TEMP[24].x, TEMP[3].zzzz, IMM[2].zzzz 437: AND TEMP[24].x, TEMP[24].xxxx, IMM[2].zzzz 438: MOV TEMP[16].xy, TEMP[16].xyyy 439: MOV TEMP[16].w, TEMP[8].xxxx 440: TXL TEMP[16], TEMP[16], SAMP[0], 2D 441: FSEQ TEMP[25].x, TEMP[3].zzzz, IMM[3].yyyy 442: AND TEMP[25].x, TEMP[25].xxxx, IMM[2].zzzz 443: MUL TEMP[16], TEMP[16], TEMP[25].xxxx 444: MAD TEMP[16], TEMP[23], TEMP[24].xxxx, TEMP[16] 445: MAD TEMP[16], TEMP[21], TEMP[22].xxxx, TEMP[16] 446: MAD TEMP[16], TEMP[19], TEMP[20].xxxx, TEMP[16] 447: MAD TEMP[16], TEMP[17], TEMP[18].xxxx, TEMP[16] 448: MOV TEMP[17].xy, IN[3].zyzz 449: MOV TEMP[18].x, IMM[2].xxxx 450: FSNE TEMP[19].x, CONST[14].xxxx, TEMP[6].xxxx 451: UIF TEMP[19].xxxx :0 452: MOV TEMP[18].x, IMM[2].yyyy 453: RCP TEMP[19].x, CONST[17].xxxx 454: MUL TEMP[17].xy, IN[3].zyyy, TEMP[19].xxxx 455: ELSE :0 456: RCP TEMP[19].x, CONST[16].xxxx 457: MUL TEMP[17].xy, TEMP[17].xyyy, TEMP[19].xxxx 458: ENDIF 459: FRC TEMP[17].xy, TEMP[17].xyyy 460: MUL TEMP[19].x, CONST[18].xxxx, IMM[2].wwww 461: MUL TEMP[19].x, TEMP[19].xxxx, TEMP[18].xxxx 462: ADD TEMP[19].x, IMM[2].zzzz, -TEMP[19].xxxx 463: MUL TEMP[18].x, TEMP[18].xxxx, CONST[18].xxxx 464: MAD TEMP[17].xy, TEMP[17].xyyy, TEMP[19].xxxx, TEMP[18].xxxx 465: MAD TEMP[17].xy, TEMP[17].xyyy, TEMP[6].xxxx, TEMP[3].xyyy 466: MOV TEMP[18].xy, TEMP[17].xyyy 467: MOV TEMP[18].w, TEMP[8].xxxx 468: TXL TEMP[18], TEMP[18], SAMP[8], 2D 469: FSEQ TEMP[19].x, TEMP[3].zzzz, IMM[1].zzzz 470: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz 471: MOV TEMP[20].xy, TEMP[17].xyyy 472: MOV TEMP[20].w, TEMP[8].xxxx 473: TXL TEMP[20], TEMP[20], SAMP[6], 2D 474: FSEQ TEMP[21].x, TEMP[3].zzzz, IMM[3].xxxx 475: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz 476: MOV TEMP[22].xy, TEMP[17].xyyy 477: MOV TEMP[22].w, TEMP[8].xxxx 478: TXL TEMP[22], TEMP[22], SAMP[4], 2D 479: FSEQ TEMP[23].x, TEMP[3].zzzz, IMM[2].wwww 480: AND TEMP[23].x, TEMP[23].xxxx, IMM[2].zzzz 481: MOV TEMP[24].xy, TEMP[17].xyyy 482: MOV TEMP[24].w, TEMP[8].xxxx 483: TXL TEMP[24], TEMP[24], SAMP[2], 2D 484: FSEQ TEMP[25].x, TEMP[3].zzzz, IMM[2].zzzz 485: AND TEMP[25].x, TEMP[25].xxxx, IMM[2].zzzz 486: MOV TEMP[17].xy, TEMP[17].xyyy 487: MOV TEMP[17].w, TEMP[8].xxxx 488: TXL TEMP[17], TEMP[17], SAMP[0], 2D 489: FSEQ TEMP[26].x, TEMP[3].zzzz, IMM[3].yyyy 490: AND TEMP[26].x, TEMP[26].xxxx, IMM[2].zzzz 491: MUL TEMP[17], TEMP[17], TEMP[26].xxxx 492: MAD TEMP[17], TEMP[24], TEMP[25].xxxx, TEMP[17] 493: MAD TEMP[17], TEMP[22], TEMP[23].xxxx, TEMP[17] 494: MAD TEMP[17], TEMP[20], TEMP[21].xxxx, TEMP[17] 495: MAD TEMP[17], TEMP[18], TEMP[19].xxxx, TEMP[17] 496: MOV TEMP[18].xy, IN[3].zxzz 497: MOV TEMP[19].x, IMM[2].xxxx 498: FSNE TEMP[20].x, CONST[14].xxxx, TEMP[6].xxxx 499: UIF TEMP[20].xxxx :0 500: MOV TEMP[19].x, IMM[2].yyyy 501: RCP TEMP[20].x, CONST[17].xxxx 502: MUL TEMP[18].xy, IN[3].zxxx, TEMP[20].xxxx 503: ELSE :0 504: RCP TEMP[20].x, CONST[16].xxxx 505: MUL TEMP[18].xy, TEMP[18].xyyy, TEMP[20].xxxx 506: ENDIF 507: FRC TEMP[18].xy, TEMP[18].xyyy 508: MUL TEMP[20].x, CONST[18].xxxx, IMM[2].wwww 509: MUL TEMP[20].x, TEMP[20].xxxx, TEMP[19].xxxx 510: ADD TEMP[20].x, IMM[2].zzzz, -TEMP[20].xxxx 511: MUL TEMP[19].x, TEMP[19].xxxx, CONST[18].xxxx 512: MAD TEMP[18].xy, TEMP[18].xyyy, TEMP[20].xxxx, TEMP[19].xxxx 513: MAD TEMP[18].xy, TEMP[18].xyyy, TEMP[6].xxxx, TEMP[3].xyyy 514: MOV TEMP[19].xy, TEMP[18].xyyy 515: MOV TEMP[19].w, TEMP[8].xxxx 516: TXL TEMP[19], TEMP[19], SAMP[8], 2D 517: FSEQ TEMP[20].x, TEMP[3].zzzz, IMM[1].zzzz 518: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz 519: MOV TEMP[21].xy, TEMP[18].xyyy 520: MOV TEMP[21].w, TEMP[8].xxxx 521: TXL TEMP[21], TEMP[21], SAMP[6], 2D 522: FSEQ TEMP[22].x, TEMP[3].zzzz, IMM[3].xxxx 523: AND TEMP[22].x, TEMP[22].xxxx, IMM[2].zzzz 524: MOV TEMP[23].xy, TEMP[18].xyyy 525: MOV TEMP[23].w, TEMP[8].xxxx 526: TXL TEMP[23], TEMP[23], SAMP[4], 2D 527: FSEQ TEMP[24].x, TEMP[3].zzzz, IMM[2].wwww 528: AND TEMP[24].x, TEMP[24].xxxx, IMM[2].zzzz 529: MOV TEMP[25].xy, TEMP[18].xyyy 530: MOV TEMP[25].w, TEMP[8].xxxx 531: TXL TEMP[25], TEMP[25], SAMP[2], 2D 532: FSEQ TEMP[26].x, TEMP[3].zzzz, IMM[2].zzzz 533: AND TEMP[26].x, TEMP[26].xxxx, IMM[2].zzzz 534: MOV TEMP[18].xy, TEMP[18].xyyy 535: MOV TEMP[18].w, TEMP[8].xxxx 536: TXL TEMP[18], TEMP[18], SAMP[0], 2D 537: FSEQ TEMP[27].x, TEMP[3].zzzz, IMM[3].yyyy 538: AND TEMP[27].x, TEMP[27].xxxx, IMM[2].zzzz 539: MUL TEMP[18], TEMP[18], TEMP[27].xxxx 540: MAD TEMP[18], TEMP[25], TEMP[26].xxxx, TEMP[18] 541: MAD TEMP[18], TEMP[23], TEMP[24].xxxx, TEMP[18] 542: MAD TEMP[18], TEMP[21], TEMP[22].xxxx, TEMP[18] 543: MAD TEMP[18], TEMP[19], TEMP[20].xxxx, TEMP[18] 544: MUL TEMP[16], TEMP[16], TEMP[2].zzzz 545: MAD TEMP[16], TEMP[17], TEMP[2].xxxx, TEMP[16] 546: MAD TEMP[16], TEMP[18], TEMP[2].yyyy, TEMP[16] 547: MUL TEMP[13], TEMP[13], TEMP[2].zzzz 548: MAD TEMP[13], TEMP[14], TEMP[2].xxxx, TEMP[13] 549: MAD TEMP[13], TEMP[15], TEMP[2].yyyy, TEMP[13] 550: MUL TEMP[10], TEMP[10], TEMP[2].zzzz 551: MAD TEMP[10], TEMP[11], TEMP[2].xxxx, TEMP[10] 552: MAD TEMP[10], TEMP[12], TEMP[2].yyyy, TEMP[10] 553: MUL TEMP[10], IN[1].xxxx, TEMP[10] 554: MAD TEMP[10], IN[1].yyyy, TEMP[13], TEMP[10] 555: MAD TEMP[10].xyz, IN[1].zzzz, TEMP[16], TEMP[10] 556: MOV TEMP[11].xy, IN[3].zyzz 557: MOV TEMP[12].x, IMM[2].xxxx 558: FSNE TEMP[13].x, CONST[14].xxxx, TEMP[4].xxxx 559: UIF TEMP[13].xxxx :0 560: MOV TEMP[12].x, IMM[2].yyyy 561: RCP TEMP[13].x, CONST[17].xxxx 562: MUL TEMP[11].xy, IN[3].zyyy, TEMP[13].xxxx 563: ELSE :0 564: RCP TEMP[13].x, CONST[16].xxxx 565: MUL TEMP[11].xy, TEMP[11].xyyy, TEMP[13].xxxx 566: ENDIF 567: FRC TEMP[11].xy, TEMP[11].xyyy 568: MUL TEMP[13].x, CONST[18].xxxx, IMM[2].wwww 569: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[12].xxxx 570: ADD TEMP[13].x, IMM[2].zzzz, -TEMP[13].xxxx 571: MUL TEMP[12].x, TEMP[12].xxxx, CONST[18].xxxx 572: MAD TEMP[11].xy, TEMP[11].xyyy, TEMP[13].xxxx, TEMP[12].xxxx 573: MAD TEMP[11].xy, TEMP[11].xyyy, TEMP[4].xxxx, TEMP[7].xyyy 574: MOV TEMP[12].xy, TEMP[11].xyyy 575: MOV TEMP[12].w, TEMP[8].xxxx 576: TXL TEMP[12], TEMP[12], SAMP[9], 2D 577: FSEQ TEMP[13].x, TEMP[7].zzzz, IMM[1].zzzz 578: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz 579: MOV TEMP[14].xy, TEMP[11].xyyy 580: MOV TEMP[14].w, TEMP[8].xxxx 581: TXL TEMP[14], TEMP[14], SAMP[7], 2D 582: FSEQ TEMP[15].x, TEMP[7].zzzz, IMM[3].xxxx 583: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz 584: MOV TEMP[16].xy, TEMP[11].xyyy 585: MOV TEMP[16].w, TEMP[8].xxxx 586: TXL TEMP[16], TEMP[16], SAMP[5], 2D 587: FSEQ TEMP[17].x, TEMP[7].zzzz, IMM[2].wwww 588: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz 589: MOV TEMP[18].xy, TEMP[11].xyyy 590: MOV TEMP[18].w, TEMP[8].xxxx 591: TXL TEMP[18], TEMP[18], SAMP[3], 2D 592: FSEQ TEMP[19].x, TEMP[7].zzzz, IMM[2].zzzz 593: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz 594: MOV TEMP[11].xy, TEMP[11].xyyy 595: MOV TEMP[11].w, TEMP[8].xxxx 596: TXL TEMP[11], TEMP[11], SAMP[1], 2D 597: FSEQ TEMP[20].x, TEMP[7].zzzz, IMM[3].yyyy 598: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz 599: MUL TEMP[11], TEMP[11], TEMP[20].xxxx 600: MAD TEMP[11], TEMP[18], TEMP[19].xxxx, TEMP[11] 601: MAD TEMP[11], TEMP[16], TEMP[17].xxxx, TEMP[11] 602: MAD TEMP[11], TEMP[14], TEMP[15].xxxx, TEMP[11] 603: MAD TEMP[11].yw, TEMP[12], TEMP[13].xxxx, TEMP[11] 604: MAD TEMP[11].xy, TEMP[11].wyyy, IMM[2].wwww, IMM[3].zzzz 605: DP2 TEMP[12].x, TEMP[11].xyyy, TEMP[11].xyyy 606: MOV_SAT TEMP[28].x, TEMP[12].xxxx 607: MOV TEMP[12].xy, IN[3].zxzz 608: MOV TEMP[13].x, IMM[2].xxxx 609: FSNE TEMP[14].x, CONST[14].xxxx, TEMP[4].xxxx 610: UIF TEMP[14].xxxx :0 611: MOV TEMP[13].x, IMM[2].yyyy 612: RCP TEMP[14].x, CONST[17].xxxx 613: MUL TEMP[12].xy, IN[3].zxxx, TEMP[14].xxxx 614: ELSE :0 615: RCP TEMP[14].x, CONST[16].xxxx 616: MUL TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx 617: ENDIF 618: FRC TEMP[12].xy, TEMP[12].xyyy 619: MUL TEMP[14].x, CONST[18].xxxx, IMM[2].wwww 620: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[13].xxxx 621: ADD TEMP[14].x, IMM[2].zzzz, -TEMP[14].xxxx 622: MUL TEMP[13].x, TEMP[13].xxxx, CONST[18].xxxx 623: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx, TEMP[13].xxxx 624: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[4].xxxx, TEMP[7].xyyy 625: MOV TEMP[13].xy, TEMP[12].xyyy 626: MOV TEMP[13].w, TEMP[8].xxxx 627: TXL TEMP[13], TEMP[13], SAMP[9], 2D 628: FSEQ TEMP[14].x, TEMP[7].zzzz, IMM[1].zzzz 629: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz 630: MOV TEMP[15].xy, TEMP[12].xyyy 631: MOV TEMP[15].w, TEMP[8].xxxx 632: TXL TEMP[15], TEMP[15], SAMP[7], 2D 633: FSEQ TEMP[16].x, TEMP[7].zzzz, IMM[3].xxxx 634: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz 635: MOV TEMP[17].xy, TEMP[12].xyyy 636: MOV TEMP[17].w, TEMP[8].xxxx 637: TXL TEMP[17], TEMP[17], SAMP[5], 2D 638: FSEQ TEMP[18].x, TEMP[7].zzzz, IMM[2].wwww 639: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz 640: MOV TEMP[19].xy, TEMP[12].xyyy 641: MOV TEMP[19].w, TEMP[8].xxxx 642: TXL TEMP[19], TEMP[19], SAMP[3], 2D 643: FSEQ TEMP[20].x, TEMP[7].zzzz, IMM[2].zzzz 644: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz 645: MOV TEMP[12].xy, TEMP[12].xyyy 646: MOV TEMP[12].w, TEMP[8].xxxx 647: TXL TEMP[12], TEMP[12], SAMP[1], 2D 648: FSEQ TEMP[21].x, TEMP[7].zzzz, IMM[3].yyyy 649: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz 650: MUL TEMP[12], TEMP[12], TEMP[21].xxxx 651: MAD TEMP[12], TEMP[19], TEMP[20].xxxx, TEMP[12] 652: MAD TEMP[12], TEMP[17], TEMP[18].xxxx, TEMP[12] 653: MAD TEMP[12], TEMP[15], TEMP[16].xxxx, TEMP[12] 654: MAD TEMP[12].yw, TEMP[13], TEMP[14].xxxx, TEMP[12] 655: MAD TEMP[12].xy, TEMP[12].wyyy, IMM[2].wwww, IMM[3].zzzz 656: DP2 TEMP[13].x, TEMP[12].xyyy, TEMP[12].xyyy 657: MOV_SAT TEMP[29].x, TEMP[13].xxxx 658: MOV TEMP[13].xy, IN[3].xyxx 659: MOV TEMP[14].x, IMM[2].xxxx 660: FSNE TEMP[15].x, CONST[14].xxxx, TEMP[4].xxxx 661: UIF TEMP[15].xxxx :0 662: MOV TEMP[14].x, IMM[2].yyyy 663: RCP TEMP[15].x, CONST[17].xxxx 664: MUL TEMP[13].xy, IN[3].xyyy, TEMP[15].xxxx 665: ELSE :0 666: RCP TEMP[15].x, CONST[16].xxxx 667: MUL TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx 668: ENDIF 669: FRC TEMP[13].xy, TEMP[13].xyyy 670: MUL TEMP[15].x, CONST[18].xxxx, IMM[2].wwww 671: MUL TEMP[15].x, TEMP[15].xxxx, TEMP[14].xxxx 672: ADD TEMP[15].x, IMM[2].zzzz, -TEMP[15].xxxx 673: MUL TEMP[14].x, TEMP[14].xxxx, CONST[18].xxxx 674: MAD TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx, TEMP[14].xxxx 675: MAD TEMP[4].xy, TEMP[13].xyyy, TEMP[4].xxxx, TEMP[7].xyyy 676: MOV TEMP[13].xy, TEMP[4].xyyy 677: MOV TEMP[13].w, TEMP[8].xxxx 678: TXL TEMP[13], TEMP[13], SAMP[9], 2D 679: FSEQ TEMP[14].x, TEMP[7].zzzz, IMM[1].zzzz 680: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz 681: MOV TEMP[15].xy, TEMP[4].xyyy 682: MOV TEMP[15].w, TEMP[8].xxxx 683: TXL TEMP[15], TEMP[15], SAMP[7], 2D 684: FSEQ TEMP[16].x, TEMP[7].zzzz, IMM[3].xxxx 685: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz 686: MOV TEMP[17].xy, TEMP[4].xyyy 687: MOV TEMP[17].w, TEMP[8].xxxx 688: TXL TEMP[17], TEMP[17], SAMP[5], 2D 689: FSEQ TEMP[18].x, TEMP[7].zzzz, IMM[2].wwww 690: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz 691: MOV TEMP[19].xy, TEMP[4].xyyy 692: MOV TEMP[19].w, TEMP[8].xxxx 693: TXL TEMP[19], TEMP[19], SAMP[3], 2D 694: FSEQ TEMP[20].x, TEMP[7].zzzz, IMM[2].zzzz 695: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz 696: MOV TEMP[4].xy, TEMP[4].xyyy 697: MOV TEMP[4].w, TEMP[8].xxxx 698: TXL TEMP[4], TEMP[4], SAMP[1], 2D 699: FSEQ TEMP[7].x, TEMP[7].zzzz, IMM[3].yyyy 700: AND TEMP[7].x, TEMP[7].xxxx, IMM[2].zzzz 701: MUL TEMP[4], TEMP[4], TEMP[7].xxxx 702: MAD TEMP[4], TEMP[19], TEMP[20].xxxx, TEMP[4] 703: MAD TEMP[4], TEMP[17], TEMP[18].xxxx, TEMP[4] 704: MAD TEMP[4], TEMP[15], TEMP[16].xxxx, TEMP[4] 705: MAD TEMP[4].yw, TEMP[13], TEMP[14].xxxx, TEMP[4] 706: MAD TEMP[4].xy, TEMP[4].wyyy, IMM[2].wwww, IMM[3].zzzz 707: DP2 TEMP[7].x, TEMP[4].xyyy, TEMP[4].xyyy 708: MOV_SAT TEMP[30].x, TEMP[7].xxxx 709: MOV TEMP[7].x, IMM[3].yyyy 710: MOV TEMP[7].y, TEMP[11].xxxx 711: MOV TEMP[7].z, TEMP[11].yyyy 712: MOV TEMP[11].y, IMM[3].yyyy 713: MOV TEMP[11].x, TEMP[12].yyyy 714: MOV TEMP[11].z, TEMP[12].xxxx 715: MOV TEMP[12].z, IMM[3].yyyy 716: MOV TEMP[12].xy, TEMP[4].xyxx 717: MUL TEMP[4].xyz, TEMP[7].xyzz, TEMP[2].xxxx 718: MAD TEMP[4].xyz, TEMP[11].xyzz, TEMP[2].yyyy, TEMP[4].xyzz 719: MAD TEMP[4].xyz, TEMP[12].xyzz, TEMP[2].zzzz, TEMP[4].xyzz 720: MOV TEMP[7].xy, IN[3].zyzz 721: MOV TEMP[11].x, IMM[2].xxxx 722: FSNE TEMP[12].x, CONST[14].xxxx, TEMP[5].xxxx 723: UIF TEMP[12].xxxx :0 724: MOV TEMP[11].x, IMM[2].yyyy 725: RCP TEMP[12].x, CONST[17].xxxx 726: MUL TEMP[7].xy, IN[3].zyyy, TEMP[12].xxxx 727: ELSE :0 728: RCP TEMP[12].x, CONST[16].xxxx 729: MUL TEMP[7].xy, TEMP[7].xyyy, TEMP[12].xxxx 730: ENDIF 731: FRC TEMP[7].xy, TEMP[7].xyyy 732: MUL TEMP[12].x, CONST[18].xxxx, IMM[2].wwww 733: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[11].xxxx 734: ADD TEMP[12].x, IMM[2].zzzz, -TEMP[12].xxxx 735: MUL TEMP[11].x, TEMP[11].xxxx, CONST[18].xxxx 736: MAD TEMP[7].xy, TEMP[7].xyyy, TEMP[12].xxxx, TEMP[11].xxxx 737: MAD TEMP[7].xy, TEMP[7].xyyy, TEMP[5].xxxx, TEMP[9].xyyy 738: MOV TEMP[11].xy, TEMP[7].xyyy 739: MOV TEMP[11].w, TEMP[8].xxxx 740: TXL TEMP[11], TEMP[11], SAMP[9], 2D 741: FSEQ TEMP[12].x, TEMP[9].zzzz, IMM[1].zzzz 742: AND TEMP[12].x, TEMP[12].xxxx, IMM[2].zzzz 743: MOV TEMP[13].xy, TEMP[7].xyyy 744: MOV TEMP[13].w, TEMP[8].xxxx 745: TXL TEMP[13], TEMP[13], SAMP[7], 2D 746: FSEQ TEMP[14].x, TEMP[9].zzzz, IMM[3].xxxx 747: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz 748: MOV TEMP[15].xy, TEMP[7].xyyy 749: MOV TEMP[15].w, TEMP[8].xxxx 750: TXL TEMP[15], TEMP[15], SAMP[5], 2D 751: FSEQ TEMP[16].x, TEMP[9].zzzz, IMM[2].wwww 752: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz 753: MOV TEMP[17].xy, TEMP[7].xyyy 754: MOV TEMP[17].w, TEMP[8].xxxx 755: TXL TEMP[17], TEMP[17], SAMP[3], 2D 756: FSEQ TEMP[18].x, TEMP[9].zzzz, IMM[2].zzzz 757: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz 758: MOV TEMP[7].xy, TEMP[7].xyyy 759: MOV TEMP[7].w, TEMP[8].xxxx 760: TXL TEMP[7], TEMP[7], SAMP[1], 2D 761: FSEQ TEMP[19].x, TEMP[9].zzzz, IMM[3].yyyy 762: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz 763: MUL TEMP[7], TEMP[7], TEMP[19].xxxx 764: MAD TEMP[7], TEMP[17], TEMP[18].xxxx, TEMP[7] 765: MAD TEMP[7], TEMP[15], TEMP[16].xxxx, TEMP[7] 766: MAD TEMP[7], TEMP[13], TEMP[14].xxxx, TEMP[7] 767: MAD TEMP[7].yw, TEMP[11], TEMP[12].xxxx, TEMP[7] 768: MAD TEMP[7].xy, TEMP[7].wyyy, IMM[2].wwww, IMM[3].zzzz 769: DP2 TEMP[11].x, TEMP[7].xyyy, TEMP[7].xyyy 770: MOV_SAT TEMP[31].x, TEMP[11].xxxx 771: MOV TEMP[11].xy, IN[3].zxzz 772: MOV TEMP[12].x, IMM[2].xxxx 773: FSNE TEMP[13].x, CONST[14].xxxx, TEMP[5].xxxx 774: UIF TEMP[13].xxxx :0 775: MOV TEMP[12].x, IMM[2].yyyy 776: RCP TEMP[13].x, CONST[17].xxxx 777: MUL TEMP[11].xy, IN[3].zxxx, TEMP[13].xxxx 778: ELSE :0 779: RCP TEMP[13].x, CONST[16].xxxx 780: MUL TEMP[11].xy, TEMP[11].xyyy, TEMP[13].xxxx 781: ENDIF 782: FRC TEMP[11].xy, TEMP[11].xyyy 783: MUL TEMP[13].x, CONST[18].xxxx, IMM[2].wwww 784: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[12].xxxx 785: ADD TEMP[13].x, IMM[2].zzzz, -TEMP[13].xxxx 786: MUL TEMP[12].x, TEMP[12].xxxx, CONST[18].xxxx 787: MAD TEMP[11].xy, TEMP[11].xyyy, TEMP[13].xxxx, TEMP[12].xxxx 788: MAD TEMP[11].xy, TEMP[11].xyyy, TEMP[5].xxxx, TEMP[9].xyyy 789: MOV TEMP[12].xy, TEMP[11].xyyy 790: MOV TEMP[12].w, TEMP[8].xxxx 791: TXL TEMP[12], TEMP[12], SAMP[9], 2D 792: FSEQ TEMP[13].x, TEMP[9].zzzz, IMM[1].zzzz 793: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz 794: MOV TEMP[14].xy, TEMP[11].xyyy 795: MOV TEMP[14].w, TEMP[8].xxxx 796: TXL TEMP[14], TEMP[14], SAMP[7], 2D 797: FSEQ TEMP[15].x, TEMP[9].zzzz, IMM[3].xxxx 798: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz 799: MOV TEMP[16].xy, TEMP[11].xyyy 800: MOV TEMP[16].w, TEMP[8].xxxx 801: TXL TEMP[16], TEMP[16], SAMP[5], 2D 802: FSEQ TEMP[17].x, TEMP[9].zzzz, IMM[2].wwww 803: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz 804: MOV TEMP[18].xy, TEMP[11].xyyy 805: MOV TEMP[18].w, TEMP[8].xxxx 806: TXL TEMP[18], TEMP[18], SAMP[3], 2D 807: FSEQ TEMP[19].x, TEMP[9].zzzz, IMM[2].zzzz 808: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz 809: MOV TEMP[11].xy, TEMP[11].xyyy 810: MOV TEMP[11].w, TEMP[8].xxxx 811: TXL TEMP[11], TEMP[11], SAMP[1], 2D 812: FSEQ TEMP[20].x, TEMP[9].zzzz, IMM[3].yyyy 813: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz 814: MUL TEMP[11], TEMP[11], TEMP[20].xxxx 815: MAD TEMP[11], TEMP[18], TEMP[19].xxxx, TEMP[11] 816: MAD TEMP[11], TEMP[16], TEMP[17].xxxx, TEMP[11] 817: MAD TEMP[11], TEMP[14], TEMP[15].xxxx, TEMP[11] 818: MAD TEMP[11].yw, TEMP[12], TEMP[13].xxxx, TEMP[11] 819: MAD TEMP[11].xy, TEMP[11].wyyy, IMM[2].wwww, IMM[3].zzzz 820: DP2 TEMP[12].x, TEMP[11].xyyy, TEMP[11].xyyy 821: MOV_SAT TEMP[32].x, TEMP[12].xxxx 822: MOV TEMP[12].xy, IN[3].xyxx 823: MOV TEMP[13].x, IMM[2].xxxx 824: FSNE TEMP[14].x, CONST[14].xxxx, TEMP[5].xxxx 825: UIF TEMP[14].xxxx :0 826: MOV TEMP[13].x, IMM[2].yyyy 827: RCP TEMP[14].x, CONST[17].xxxx 828: MUL TEMP[12].xy, IN[3].xyyy, TEMP[14].xxxx 829: ELSE :0 830: RCP TEMP[14].x, CONST[16].xxxx 831: MUL TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx 832: ENDIF 833: FRC TEMP[12].xy, TEMP[12].xyyy 834: MUL TEMP[14].x, CONST[18].xxxx, IMM[2].wwww 835: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[13].xxxx 836: ADD TEMP[14].x, IMM[2].zzzz, -TEMP[14].xxxx 837: MUL TEMP[13].x, TEMP[13].xxxx, CONST[18].xxxx 838: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx, TEMP[13].xxxx 839: MAD TEMP[5].xy, TEMP[12].xyyy, TEMP[5].xxxx, TEMP[9].xyyy 840: MOV TEMP[12].xy, TEMP[5].xyyy 841: MOV TEMP[12].w, TEMP[8].xxxx 842: TXL TEMP[12], TEMP[12], SAMP[9], 2D 843: FSEQ TEMP[13].x, TEMP[9].zzzz, IMM[1].zzzz 844: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz 845: MOV TEMP[14].xy, TEMP[5].xyyy 846: MOV TEMP[14].w, TEMP[8].xxxx 847: TXL TEMP[14], TEMP[14], SAMP[7], 2D 848: FSEQ TEMP[15].x, TEMP[9].zzzz, IMM[3].xxxx 849: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz 850: MOV TEMP[16].xy, TEMP[5].xyyy 851: MOV TEMP[16].w, TEMP[8].xxxx 852: TXL TEMP[16], TEMP[16], SAMP[5], 2D 853: FSEQ TEMP[17].x, TEMP[9].zzzz, IMM[2].wwww 854: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz 855: MOV TEMP[18].xy, TEMP[5].xyyy 856: MOV TEMP[18].w, TEMP[8].xxxx 857: TXL TEMP[18], TEMP[18], SAMP[3], 2D 858: FSEQ TEMP[19].x, TEMP[9].zzzz, IMM[2].zzzz 859: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz 860: MOV TEMP[5].xy, TEMP[5].xyyy 861: MOV TEMP[5].w, TEMP[8].xxxx 862: TXL TEMP[5], TEMP[5], SAMP[1], 2D 863: FSEQ TEMP[9].x, TEMP[9].zzzz, IMM[3].yyyy 864: AND TEMP[9].x, TEMP[9].xxxx, IMM[2].zzzz 865: MUL TEMP[5], TEMP[5], TEMP[9].xxxx 866: MAD TEMP[5], TEMP[18], TEMP[19].xxxx, TEMP[5] 867: MAD TEMP[5], TEMP[16], TEMP[17].xxxx, TEMP[5] 868: MAD TEMP[5], TEMP[14], TEMP[15].xxxx, TEMP[5] 869: MAD TEMP[5].yw, TEMP[12], TEMP[13].xxxx, TEMP[5] 870: MAD TEMP[5].xy, TEMP[5].wyyy, IMM[2].wwww, IMM[3].zzzz 871: DP2 TEMP[9].x, TEMP[5].xyyy, TEMP[5].xyyy 872: MOV_SAT TEMP[33].x, TEMP[9].xxxx 873: MOV TEMP[9].x, IMM[3].yyyy 874: MOV TEMP[9].y, TEMP[7].xxxx 875: MOV TEMP[9].z, TEMP[7].yyyy 876: MOV TEMP[7].y, IMM[3].yyyy 877: MOV TEMP[7].x, TEMP[11].yyyy 878: MOV TEMP[7].z, TEMP[11].xxxx 879: MOV TEMP[11].z, IMM[3].yyyy 880: MOV TEMP[11].xy, TEMP[5].xyxx 881: MUL TEMP[5].xyz, TEMP[9].xyzz, TEMP[2].xxxx 882: MAD TEMP[5].xyz, TEMP[7].xyzz, TEMP[2].yyyy, TEMP[5].xyzz 883: MAD TEMP[5].xyz, TEMP[11].xyzz, TEMP[2].zzzz, TEMP[5].xyzz 884: MOV TEMP[7].xy, IN[3].zyzz 885: MOV TEMP[9].x, IMM[2].xxxx 886: FSNE TEMP[11].x, CONST[14].xxxx, TEMP[6].xxxx 887: UIF TEMP[11].xxxx :0 888: MOV TEMP[9].x, IMM[2].yyyy 889: RCP TEMP[11].x, CONST[17].xxxx 890: MUL TEMP[7].xy, IN[3].zyyy, TEMP[11].xxxx 891: ELSE :0 892: RCP TEMP[11].x, CONST[16].xxxx 893: MUL TEMP[7].xy, TEMP[7].xyyy, TEMP[11].xxxx 894: ENDIF 895: FRC TEMP[7].xy, TEMP[7].xyyy 896: MUL TEMP[11].x, CONST[18].xxxx, IMM[2].wwww 897: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[9].xxxx 898: ADD TEMP[11].x, IMM[2].zzzz, -TEMP[11].xxxx 899: MUL TEMP[9].x, TEMP[9].xxxx, CONST[18].xxxx 900: MAD TEMP[7].xy, TEMP[7].xyyy, TEMP[11].xxxx, TEMP[9].xxxx 901: MAD TEMP[7].xy, TEMP[7].xyyy, TEMP[6].xxxx, TEMP[3].xyyy 902: MOV TEMP[9].xy, TEMP[7].xyyy 903: MOV TEMP[9].w, TEMP[8].xxxx 904: TXL TEMP[9], TEMP[9], SAMP[9], 2D 905: FSEQ TEMP[11].x, TEMP[3].zzzz, IMM[1].zzzz 906: AND TEMP[11].x, TEMP[11].xxxx, IMM[2].zzzz 907: MOV TEMP[12].xy, TEMP[7].xyyy 908: MOV TEMP[12].w, TEMP[8].xxxx 909: TXL TEMP[12], TEMP[12], SAMP[7], 2D 910: FSEQ TEMP[13].x, TEMP[3].zzzz, IMM[3].xxxx 911: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz 912: MOV TEMP[14].xy, TEMP[7].xyyy 913: MOV TEMP[14].w, TEMP[8].xxxx 914: TXL TEMP[14], TEMP[14], SAMP[5], 2D 915: FSEQ TEMP[15].x, TEMP[3].zzzz, IMM[2].wwww 916: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz 917: MOV TEMP[16].xy, TEMP[7].xyyy 918: MOV TEMP[16].w, TEMP[8].xxxx 919: TXL TEMP[16], TEMP[16], SAMP[3], 2D 920: FSEQ TEMP[17].x, TEMP[3].zzzz, IMM[2].zzzz 921: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz 922: MOV TEMP[7].xy, TEMP[7].xyyy 923: MOV TEMP[7].w, TEMP[8].xxxx 924: TXL TEMP[7], TEMP[7], SAMP[1], 2D 925: FSEQ TEMP[18].x, TEMP[3].zzzz, IMM[3].yyyy 926: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz 927: MUL TEMP[7], TEMP[7], TEMP[18].xxxx 928: MAD TEMP[7], TEMP[16], TEMP[17].xxxx, TEMP[7] 929: MAD TEMP[7], TEMP[14], TEMP[15].xxxx, TEMP[7] 930: MAD TEMP[7], TEMP[12], TEMP[13].xxxx, TEMP[7] 931: MAD TEMP[7].yw, TEMP[9], TEMP[11].xxxx, TEMP[7] 932: MAD TEMP[7].xy, TEMP[7].wyyy, IMM[2].wwww, IMM[3].zzzz 933: DP2 TEMP[9].x, TEMP[7].xyyy, TEMP[7].xyyy 934: MOV_SAT TEMP[34].x, TEMP[9].xxxx 935: MOV TEMP[9].xy, IN[3].zxzz 936: MOV TEMP[11].x, IMM[2].xxxx 937: FSNE TEMP[12].x, CONST[14].xxxx, TEMP[6].xxxx 938: UIF TEMP[12].xxxx :0 939: MOV TEMP[11].x, IMM[2].yyyy 940: RCP TEMP[12].x, CONST[17].xxxx 941: MUL TEMP[9].xy, IN[3].zxxx, TEMP[12].xxxx 942: ELSE :0 943: RCP TEMP[12].x, CONST[16].xxxx 944: MUL TEMP[9].xy, TEMP[9].xyyy, TEMP[12].xxxx 945: ENDIF 946: FRC TEMP[9].xy, TEMP[9].xyyy 947: MUL TEMP[12].x, CONST[18].xxxx, IMM[2].wwww 948: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[11].xxxx 949: ADD TEMP[12].x, IMM[2].zzzz, -TEMP[12].xxxx 950: MUL TEMP[11].x, TEMP[11].xxxx, CONST[18].xxxx 951: MAD TEMP[9].xy, TEMP[9].xyyy, TEMP[12].xxxx, TEMP[11].xxxx 952: MAD TEMP[9].xy, TEMP[9].xyyy, TEMP[6].xxxx, TEMP[3].xyyy 953: MOV TEMP[11].xy, TEMP[9].xyyy 954: MOV TEMP[11].w, TEMP[8].xxxx 955: TXL TEMP[11], TEMP[11], SAMP[9], 2D 956: FSEQ TEMP[12].x, TEMP[3].zzzz, IMM[1].zzzz 957: AND TEMP[12].x, TEMP[12].xxxx, IMM[2].zzzz 958: MOV TEMP[13].xy, TEMP[9].xyyy 959: MOV TEMP[13].w, TEMP[8].xxxx 960: TXL TEMP[13], TEMP[13], SAMP[7], 2D 961: FSEQ TEMP[14].x, TEMP[3].zzzz, IMM[3].xxxx 962: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz 963: MOV TEMP[15].xy, TEMP[9].xyyy 964: MOV TEMP[15].w, TEMP[8].xxxx 965: TXL TEMP[15], TEMP[15], SAMP[5], 2D 966: FSEQ TEMP[16].x, TEMP[3].zzzz, IMM[2].wwww 967: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz 968: MOV TEMP[17].xy, TEMP[9].xyyy 969: MOV TEMP[17].w, TEMP[8].xxxx 970: TXL TEMP[17], TEMP[17], SAMP[3], 2D 971: FSEQ TEMP[18].x, TEMP[3].zzzz, IMM[2].zzzz 972: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz 973: MOV TEMP[9].xy, TEMP[9].xyyy 974: MOV TEMP[9].w, TEMP[8].xxxx 975: TXL TEMP[9], TEMP[9], SAMP[1], 2D 976: FSEQ TEMP[19].x, TEMP[3].zzzz, IMM[3].yyyy 977: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz 978: MUL TEMP[9], TEMP[9], TEMP[19].xxxx 979: MAD TEMP[9], TEMP[17], TEMP[18].xxxx, TEMP[9] 980: MAD TEMP[9], TEMP[15], TEMP[16].xxxx, TEMP[9] 981: MAD TEMP[9], TEMP[13], TEMP[14].xxxx, TEMP[9] 982: MAD TEMP[9].yw, TEMP[11], TEMP[12].xxxx, TEMP[9] 983: MAD TEMP[9].xy, TEMP[9].wyyy, IMM[2].wwww, IMM[3].zzzz 984: DP2 TEMP[11].x, TEMP[9].xyyy, TEMP[9].xyyy 985: MOV_SAT TEMP[35].x, TEMP[11].xxxx 986: MOV TEMP[11].xy, IN[3].xyxx 987: MOV TEMP[12].x, IMM[2].xxxx 988: FSNE TEMP[13].x, CONST[14].xxxx, TEMP[6].xxxx 989: UIF TEMP[13].xxxx :0 990: MOV TEMP[12].x, IMM[2].yyyy 991: RCP TEMP[13].x, CONST[17].xxxx 992: MUL TEMP[11].xy, IN[3].xyyy, TEMP[13].xxxx 993: ELSE :0 994: RCP TEMP[13].x, CONST[16].xxxx 995: MUL TEMP[11].xy, TEMP[11].xyyy, TEMP[13].xxxx 996: ENDIF 997: FRC TEMP[11].xy, TEMP[11].xyyy 998: MUL TEMP[13].x, CONST[18].xxxx, IMM[2].wwww 999: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[12].xxxx 1000: ADD TEMP[13].x, IMM[2].zzzz, -TEMP[13].xxxx 1001: MUL TEMP[12].x, TEMP[12].xxxx, CONST[18].xxxx 1002: MAD TEMP[11].xy, TEMP[11].xyyy, TEMP[13].xxxx, TEMP[12].xxxx 1003: MAD TEMP[6].xy, TEMP[11].xyyy, TEMP[6].xxxx, TEMP[3].xyyy 1004: MOV TEMP[11].xy, TEMP[6].xyyy 1005: MOV TEMP[11].w, TEMP[8].xxxx 1006: TXL TEMP[11], TEMP[11], SAMP[9], 2D 1007: FSEQ TEMP[12].x, TEMP[3].zzzz, IMM[1].zzzz 1008: AND TEMP[12].x, TEMP[12].xxxx, IMM[2].zzzz 1009: MOV TEMP[13].xy, TEMP[6].xyyy 1010: MOV TEMP[13].w, TEMP[8].xxxx 1011: TXL TEMP[13], TEMP[13], SAMP[7], 2D 1012: FSEQ TEMP[14].x, TEMP[3].zzzz, IMM[3].xxxx 1013: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz 1014: MOV TEMP[15].xy, TEMP[6].xyyy 1015: MOV TEMP[15].w, TEMP[8].xxxx 1016: TXL TEMP[15], TEMP[15], SAMP[5], 2D 1017: FSEQ TEMP[16].x, TEMP[3].zzzz, IMM[2].wwww 1018: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz 1019: MOV TEMP[17].xy, TEMP[6].xyyy 1020: MOV TEMP[17].w, TEMP[8].xxxx 1021: TXL TEMP[17], TEMP[17], SAMP[3], 2D 1022: FSEQ TEMP[18].x, TEMP[3].zzzz, IMM[2].zzzz 1023: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz 1024: MOV TEMP[6].xy, TEMP[6].xyyy 1025: MOV TEMP[6].w, TEMP[8].xxxx 1026: TXL TEMP[6], TEMP[6], SAMP[1], 2D 1027: FSEQ TEMP[3].x, TEMP[3].zzzz, IMM[3].yyyy 1028: AND TEMP[3].x, TEMP[3].xxxx, IMM[2].zzzz 1029: MUL TEMP[3], TEMP[6], TEMP[3].xxxx 1030: MAD TEMP[3], TEMP[17], TEMP[18].xxxx, TEMP[3] 1031: MAD TEMP[3], TEMP[15], TEMP[16].xxxx, TEMP[3] 1032: MAD TEMP[3], TEMP[13], TEMP[14].xxxx, TEMP[3] 1033: MAD TEMP[3].yw, TEMP[11], TEMP[12].xxxx, TEMP[3] 1034: MAD TEMP[3].xy, TEMP[3].wyyy, IMM[2].wwww, IMM[3].zzzz 1035: DP2 TEMP[6].x, TEMP[3].xyyy, TEMP[3].xyyy 1036: MOV_SAT TEMP[36].x, TEMP[6].xxxx 1037: MOV TEMP[6].x, IMM[3].yyyy 1038: MOV TEMP[6].y, TEMP[7].xxxx 1039: MOV TEMP[6].z, TEMP[7].yyyy 1040: MOV TEMP[7].y, IMM[3].yyyy 1041: MOV TEMP[7].x, TEMP[9].yyyy 1042: MOV TEMP[7].z, TEMP[9].xxxx 1043: MOV TEMP[8].z, IMM[3].yyyy 1044: MOV TEMP[8].xy, TEMP[3].xyxx 1045: MOV TEMP[3].w, IMM[2].zzzz 1046: MUL TEMP[6].xyz, TEMP[6].xyzz, TEMP[2].xxxx 1047: MAD TEMP[6].xyz, TEMP[7].xyzz, TEMP[2].yyyy, TEMP[6].xyzz 1048: MAD TEMP[2].xyz, TEMP[8].xyzz, TEMP[2].zzzz, TEMP[6].xyzz 1049: MUL TEMP[4].xyz, IN[1].xxxx, TEMP[4].xyzz 1050: MAD TEMP[4].xyz, IN[1].yyyy, TEMP[5].xyzz, TEMP[4].xyzz 1051: MAD TEMP[3].xyz, IN[1].zzzz, TEMP[2].xyzz, TEMP[4].xyzz 1052: DP4 TEMP[2].x, TEMP[3], TEMP[3] 1053: RSQ TEMP[2].x, TEMP[2].xxxx 1054: MUL TEMP[2].xyz, TEMP[3], TEMP[2].xxxx 1055: MUL TEMP[2].xyz, TEMP[2].xyzz, IN[0].wwww 1056: ADD TEMP[2].xyz, IN[2].yzww, -TEMP[2].xyzz 1057: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz 1058: RSQ TEMP[3].x, TEMP[3].xxxx 1059: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx 1060: ADD TEMP[1].xyz, TEMP[0].xyzz, TEMP[1].xyzz 1061: DP3 TEMP[3].x, TEMP[1].xyzz, TEMP[1].xyzz 1062: RSQ TEMP[3].x, TEMP[3].xxxx 1063: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xxxx 1064: DP3 TEMP[1].x, TEMP[2].xyzz, TEMP[1].xyzz 1065: MAX TEMP[1].x, IMM[3].wwww, TEMP[1].xxxx 1066: MUL TEMP[3].x, IMM[4].xxxx, IN[1].wwww 1067: POW TEMP[1].x, TEMP[1].xxxx, TEMP[3].xxxx 1068: MOV_SAT TEMP[1].x, TEMP[1].xxxx 1069: MOV TEMP[3].w, IMM[3].yyyy 1070: MOV TEMP[3].xyz, CONST[21].xyzx 1071: MUL TEMP[4].x, IMM[2].wwww, TEMP[1].xxxx 1072: ADD TEMP[4].x, IMM[3].xxxx, -TEMP[4].xxxx 1073: MUL TEMP[4].x, TEMP[1].xxxx, TEMP[4].xxxx 1074: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[4].xxxx 1075: MUL TEMP[1].x, TEMP[1].xxxx, IN[1].wwww 1076: MUL TEMP[4].xyz, TEMP[10].xyzz, CONST[3].xyzz 1077: DP3 TEMP[2].x, TEMP[2].xyzz, TEMP[0].xyzz 1078: MOV_SAT TEMP[2].x, TEMP[2].xxxx 1079: MUL TEMP[5], CONST[22], IMM[2].wwww 1080: MAX TEMP[3], TEMP[5], TEMP[3] 1081: MIN TEMP[3].xyz, TEMP[3], IMM[4].yyyz 1082: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[10].xyzz 1083: MAD TEMP[2].xyz, TEMP[4].xyzz, TEMP[2].xxxx, TEMP[3].xyzz 1084: MAD TEMP[1].xyz, CONST[3].xyzz, TEMP[1].xxxx, TEMP[2].xyzz 1085: MUL TEMP[0].xyz, TEMP[1].xyzz, IMM[0].wwww 1086: MAD TEMP[1].x, IN[2].xxxx, CONST[2].zzzz, CONST[2].wwww 1087: MOV_SAT TEMP[1].x, TEMP[1].xxxx 1088: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xxxx 1089: MOV TEMP[0].w, IMM[2].zzzz 1090: MOV OUT[0], TEMP[0] 1091: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 272) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 288) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 304) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 320) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 336) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 340) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 344) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 352) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 356) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 360) %48 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %49 = load <8 x i32>, <8 x i32> addrspace(2)* %48, align 32, !tbaa !0 %50 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %51 = load <4 x i32>, <4 x i32> addrspace(2)* %50, align 16, !tbaa !0 %52 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %53 = load <8 x i32>, <8 x i32> addrspace(2)* %52, align 32, !tbaa !0 %54 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %55 = load <4 x i32>, <4 x i32> addrspace(2)* %54, align 16, !tbaa !0 %56 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %57 = load <8 x i32>, <8 x i32> addrspace(2)* %56, align 32, !tbaa !0 %58 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %59 = load <4 x i32>, <4 x i32> addrspace(2)* %58, align 16, !tbaa !0 %60 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %61 = load <8 x i32>, <8 x i32> addrspace(2)* %60, align 32, !tbaa !0 %62 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %63 = load <4 x i32>, <4 x i32> addrspace(2)* %62, align 16, !tbaa !0 %64 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %65 = load <8 x i32>, <8 x i32> addrspace(2)* %64, align 32, !tbaa !0 %66 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %67 = load <4 x i32>, <4 x i32> addrspace(2)* %66, align 16, !tbaa !0 %68 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5 %69 = load <8 x i32>, <8 x i32> addrspace(2)* %68, align 32, !tbaa !0 %70 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5 %71 = load <4 x i32>, <4 x i32> addrspace(2)* %70, align 16, !tbaa !0 %72 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 6 %73 = load <8 x i32>, <8 x i32> addrspace(2)* %72, align 32, !tbaa !0 %74 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 6 %75 = load <4 x i32>, <4 x i32> addrspace(2)* %74, align 16, !tbaa !0 %76 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 7 %77 = load <8 x i32>, <8 x i32> addrspace(2)* %76, align 32, !tbaa !0 %78 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 7 %79 = load <4 x i32>, <4 x i32> addrspace(2)* %78, align 16, !tbaa !0 %80 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 8 %81 = load <8 x i32>, <8 x i32> addrspace(2)* %80, align 32, !tbaa !0 %82 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 8 %83 = load <4 x i32>, <4 x i32> addrspace(2)* %82, align 16, !tbaa !0 %84 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 9 %85 = load <8 x i32>, <8 x i32> addrspace(2)* %84, align 32, !tbaa !0 %86 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 9 %87 = load <4 x i32>, <4 x i32> addrspace(2)* %86, align 16, !tbaa !0 %88 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %89 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %90 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %91 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %92 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %93 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %94 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %95 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %96 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %97 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %98 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %99 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %100 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %101 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %102 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %103 = fmul float %27, %27 %104 = fmul float %28, %28 %105 = fadd float %104, %103 %106 = fmul float %29, %29 %107 = fadd float %105, %106 %108 = call float @llvm.AMDGPU.rsq.clamped.f32(float %107) %109 = fmul float %27, %108 %110 = fmul float %28, %108 %111 = fmul float %29, %108 %112 = fsub float %24, %100 %113 = fsub float %25, %101 %114 = fsub float %26, %102 %115 = fmul float %112, %112 %116 = fmul float %113, %113 %117 = fadd float %116, %115 %118 = fmul float %114, %114 %119 = fadd float %117, %118 %120 = call float @llvm.AMDGPU.rsq.clamped.f32(float %119) %121 = fmul float %112, %120 %122 = fmul float %113, %120 %123 = fmul float %114, %120 %124 = call float @llvm.fabs.f32(float %97) %125 = call float @llvm.fabs.f32(float %98) %126 = call float @llvm.fabs.f32(float %99) %127 = fmul float %124, %124 %128 = fmul float %125, %125 %129 = fadd float %128, %127 %130 = fmul float %126, %126 %131 = fadd float %129, %130 %132 = call float @llvm.AMDGPU.rsq.clamped.f32(float %131) %133 = fmul float %124, %132 %134 = fadd float %133, 0xBFC99999A0000000 %135 = fmul float %125, %132 %136 = fadd float %135, 0xBFC99999A0000000 %137 = fmul float %126, %132 %138 = fadd float %137, 0xBFC99999A0000000 %139 = fmul float %134, 7.000000e+00 %140 = fmul float %136, 7.000000e+00 %141 = fmul float %138, 7.000000e+00 %142 = call float @llvm.maxnum.f32(float %139, float 0x3F847AE140000000) %143 = call float @llvm.maxnum.f32(float %140, float 0x3F847AE140000000) %144 = call float @llvm.maxnum.f32(float %141, float 0x3F847AE140000000) %145 = fadd float %142, %143 %146 = fadd float %145, %144 %147 = fdiv float 1.000000e+00, %146 %148 = fmul float %142, %147 %149 = fmul float %143, %147 %150 = fmul float %144, %147 %151 = fadd float %88, 5.000000e-01 %152 = fadd float %89, 5.000000e-01 %153 = fadd float %90, 5.000000e-01 %154 = call float @llvm.floor.f32(float %151) %155 = call float @llvm.floor.f32(float %152) %156 = call float @llvm.floor.f32(float %153) %157 = fmul float %154, %35 %158 = call float @llvm.floor.f32(float %157) %159 = fmul float %158, %35 %160 = fcmp ult float %154, 6.400000e+01 br i1 %160, label %ELSE, label %IF IF: ; preds = %main_body %161 = fadd float %154, -6.400000e+01 %162 = fmul float %161, %36 %163 = call float @llvm.floor.f32(float %162) %164 = fmul float %163, %36 %165 = call float @llvm.floor.f32(float %162) %166 = fsub float %162, %165 %167 = call float @llvm.floor.f32(float %164) %168 = fsub float %164, %167 %169 = call float @llvm.floor.f32(float %164) %170 = fadd float %169, 4.000000e+00 br label %ENDIF ELSE: ; preds = %main_body %171 = call float @llvm.floor.f32(float %157) %172 = fsub float %157, %171 %173 = call float @llvm.floor.f32(float %159) %174 = fsub float %159, %173 %175 = call float @llvm.floor.f32(float %159) br label %ENDIF ENDIF: ; preds = %ELSE, %IF %temp30.0 = phi float [ %170, %IF ], [ %175, %ELSE ] %temp29.0 = phi float [ %168, %IF ], [ %174, %ELSE ] %temp28.0 = phi float [ %166, %IF ], [ %172, %ELSE ] %temp16.0 = phi float [ %36, %IF ], [ %35, %ELSE ] %176 = fmul float %155, %35 %177 = call float @llvm.floor.f32(float %176) %178 = fmul float %177, %35 %179 = fcmp ult float %155, 6.400000e+01 br i1 %179, label %ELSE150, label %IF149 IF149: ; preds = %ENDIF %180 = fadd float %155, -6.400000e+01 %181 = fmul float %180, %36 %182 = call float @llvm.floor.f32(float %181) %183 = fmul float %182, %36 %184 = call float @llvm.floor.f32(float %181) %185 = fsub float %181, %184 %186 = call float @llvm.floor.f32(float %183) %187 = fsub float %183, %186 %188 = call float @llvm.floor.f32(float %183) %189 = fadd float %188, 4.000000e+00 br label %ENDIF148 ELSE150: ; preds = %ENDIF %190 = call float @llvm.floor.f32(float %176) %191 = fsub float %176, %190 %192 = call float @llvm.floor.f32(float %178) %193 = fsub float %178, %192 %194 = call float @llvm.floor.f32(float %178) br label %ENDIF148 ENDIF148: ; preds = %ELSE150, %IF149 %temp36.0 = phi float [ %185, %IF149 ], [ %191, %ELSE150 ] %temp37.0 = phi float [ %187, %IF149 ], [ %193, %ELSE150 ] %temp38.0 = phi float [ %189, %IF149 ], [ %194, %ELSE150 ] %temp20.0 = phi float [ %36, %IF149 ], [ %35, %ELSE150 ] %195 = fmul float %156, %35 %196 = call float @llvm.floor.f32(float %195) %197 = fmul float %196, %35 %198 = fcmp ult float %156, 6.400000e+01 br i1 %198, label %ELSE153, label %IF152 IF152: ; preds = %ENDIF148 %199 = fadd float %156, -6.400000e+01 %200 = fmul float %199, %36 %201 = call float @llvm.floor.f32(float %200) %202 = fmul float %201, %36 %203 = call float @llvm.floor.f32(float %200) %204 = fsub float %200, %203 %205 = call float @llvm.floor.f32(float %202) %206 = fsub float %202, %205 %207 = call float @llvm.floor.f32(float %202) %208 = fadd float %207, 4.000000e+00 br label %ENDIF151 ELSE153: ; preds = %ENDIF148 %209 = call float @llvm.floor.f32(float %195) %210 = fsub float %195, %209 %211 = call float @llvm.floor.f32(float %197) %212 = fsub float %197, %211 %213 = call float @llvm.floor.f32(float %197) br label %ENDIF151 ENDIF151: ; preds = %ELSE153, %IF152 %temp24.0 = phi float [ %36, %IF152 ], [ %35, %ELSE153 ] %temp14.0 = phi float [ %208, %IF152 ], [ %213, %ELSE153 ] %temp13.0 = phi float [ %206, %IF152 ], [ %212, %ELSE153 ] %temp12.0 = phi float [ %204, %IF152 ], [ %210, %ELSE153 ] %214 = fsub float %100, %24 %215 = fsub float %101, %25 %216 = fsub float %102, %26 %217 = fmul float %214, %214 %218 = fmul float %215, %215 %219 = fadd float %218, %217 %220 = fmul float %216, %216 %221 = fadd float %219, %220 %222 = fmul float %41, %221 %223 = call float @llvm.log2.f32(float %222) %224 = fmul float %223, 0x3FE62E4300000000 %225 = fmul float %224, %40 %226 = fcmp une float %35, %temp16.0 %.sink208 = select i1 %226, float %38, float %37 %temp44.0 = select i1 %226, float 1.953125e-03, float 3.906250e-03 %227 = fdiv float 1.000000e+00, %.sink208 %228 = fmul float %100, %227 %229 = fmul float %101, %227 %230 = call float @llvm.floor.f32(float %228) %231 = fsub float %228, %230 %232 = call float @llvm.floor.f32(float %229) %233 = fsub float %229, %232 %234 = fmul float %39, 2.000000e+00 %235 = fmul float %234, %temp44.0 %236 = fsub float 1.000000e+00, %235 %237 = fmul float %temp44.0, %39 %238 = fmul float %231, %236 %239 = fadd float %238, %237 %240 = fmul float %233, %236 %241 = fadd float %240, %237 %242 = fmul float %239, %temp16.0 %243 = fadd float %242, %temp28.0 %244 = fmul float %241, %temp16.0 %245 = fadd float %244, %temp29.0 %246 = bitcast float %243 to i32 %247 = bitcast float %245 to i32 %248 = bitcast float %225 to i32 %249 = insertelement <4 x i32> undef, i32 %246, i32 0 %250 = insertelement <4 x i32> %249, i32 %247, i32 1 %251 = insertelement <4 x i32> %250, i32 %248, i32 2 %252 = bitcast <8 x i32> %81 to <32 x i8> %253 = bitcast <4 x i32> %83 to <16 x i8> %254 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %251, <32 x i8> %252, <16 x i8> %253, i32 2) %255 = extractelement <4 x float> %254, i32 0 %256 = extractelement <4 x float> %254, i32 1 %257 = extractelement <4 x float> %254, i32 2 %258 = fcmp oeq float %temp30.0, 4.000000e+00 %259 = select i1 %258, float 1.000000e+00, float 0.000000e+00 %260 = bitcast float %243 to i32 %261 = bitcast float %245 to i32 %262 = bitcast float %225 to i32 %263 = insertelement <4 x i32> undef, i32 %260, i32 0 %264 = insertelement <4 x i32> %263, i32 %261, i32 1 %265 = insertelement <4 x i32> %264, i32 %262, i32 2 %266 = bitcast <8 x i32> %73 to <32 x i8> %267 = bitcast <4 x i32> %75 to <16 x i8> %268 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %265, <32 x i8> %266, <16 x i8> %267, i32 2) %269 = extractelement <4 x float> %268, i32 0 %270 = extractelement <4 x float> %268, i32 1 %271 = extractelement <4 x float> %268, i32 2 %272 = fcmp oeq float %temp30.0, 3.000000e+00 %273 = select i1 %272, float 1.000000e+00, float 0.000000e+00 %274 = bitcast float %243 to i32 %275 = bitcast float %245 to i32 %276 = bitcast float %225 to i32 %277 = insertelement <4 x i32> undef, i32 %274, i32 0 %278 = insertelement <4 x i32> %277, i32 %275, i32 1 %279 = insertelement <4 x i32> %278, i32 %276, i32 2 %280 = bitcast <8 x i32> %65 to <32 x i8> %281 = bitcast <4 x i32> %67 to <16 x i8> %282 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %279, <32 x i8> %280, <16 x i8> %281, i32 2) %283 = extractelement <4 x float> %282, i32 0 %284 = extractelement <4 x float> %282, i32 1 %285 = extractelement <4 x float> %282, i32 2 %286 = fcmp oeq float %temp30.0, 2.000000e+00 %287 = select i1 %286, float 1.000000e+00, float 0.000000e+00 %288 = bitcast float %243 to i32 %289 = bitcast float %245 to i32 %290 = bitcast float %225 to i32 %291 = insertelement <4 x i32> undef, i32 %288, i32 0 %292 = insertelement <4 x i32> %291, i32 %289, i32 1 %293 = insertelement <4 x i32> %292, i32 %290, i32 2 %294 = bitcast <8 x i32> %57 to <32 x i8> %295 = bitcast <4 x i32> %59 to <16 x i8> %296 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %293, <32 x i8> %294, <16 x i8> %295, i32 2) %297 = extractelement <4 x float> %296, i32 0 %298 = extractelement <4 x float> %296, i32 1 %299 = extractelement <4 x float> %296, i32 2 %300 = fcmp oeq float %temp30.0, 1.000000e+00 %301 = select i1 %300, float 1.000000e+00, float 0.000000e+00 %302 = bitcast float %243 to i32 %303 = bitcast float %245 to i32 %304 = bitcast float %225 to i32 %305 = insertelement <4 x i32> undef, i32 %302, i32 0 %306 = insertelement <4 x i32> %305, i32 %303, i32 1 %307 = insertelement <4 x i32> %306, i32 %304, i32 2 %308 = bitcast <8 x i32> %49 to <32 x i8> %309 = bitcast <4 x i32> %51 to <16 x i8> %310 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %307, <32 x i8> %308, <16 x i8> %309, i32 2) %311 = extractelement <4 x float> %310, i32 0 %312 = extractelement <4 x float> %310, i32 1 %313 = extractelement <4 x float> %310, i32 2 %314 = fcmp oeq float %temp30.0, 0.000000e+00 %315 = select i1 %314, float 1.000000e+00, float 0.000000e+00 %316 = fmul float %311, %315 %317 = fmul float %312, %315 %318 = fmul float %313, %315 %319 = fmul float %297, %301 %320 = fadd float %319, %316 %321 = fmul float %298, %301 %322 = fadd float %321, %317 %323 = fmul float %299, %301 %324 = fadd float %323, %318 %325 = fmul float %283, %287 %326 = fadd float %325, %320 %327 = fmul float %284, %287 %328 = fadd float %327, %322 %329 = fmul float %285, %287 %330 = fadd float %329, %324 %331 = fmul float %269, %273 %332 = fadd float %331, %326 %333 = fmul float %270, %273 %334 = fadd float %333, %328 %335 = fmul float %271, %273 %336 = fadd float %335, %330 %337 = fmul float %255, %259 %338 = fadd float %337, %332 %339 = fmul float %256, %259 %340 = fadd float %339, %334 %341 = fmul float %257, %259 %342 = fadd float %341, %336 %343 = fcmp une float %35, %temp16.0 %.sink209 = select i1 %343, float %38, float %37 %temp48.0 = select i1 %343, float 1.953125e-03, float 3.906250e-03 %344 = fdiv float 1.000000e+00, %.sink209 %345 = fmul float %102, %344 %346 = fmul float %101, %344 %347 = call float @llvm.floor.f32(float %345) %348 = fsub float %345, %347 %349 = call float @llvm.floor.f32(float %346) %350 = fsub float %346, %349 %351 = fmul float %39, 2.000000e+00 %352 = fmul float %351, %temp48.0 %353 = fsub float 1.000000e+00, %352 %354 = fmul float %temp48.0, %39 %355 = fmul float %348, %353 %356 = fadd float %355, %354 %357 = fmul float %350, %353 %358 = fadd float %357, %354 %359 = fmul float %356, %temp16.0 %360 = fadd float %359, %temp28.0 %361 = fmul float %358, %temp16.0 %362 = fadd float %361, %temp29.0 %363 = bitcast float %360 to i32 %364 = bitcast float %362 to i32 %365 = bitcast float %225 to i32 %366 = insertelement <4 x i32> undef, i32 %363, i32 0 %367 = insertelement <4 x i32> %366, i32 %364, i32 1 %368 = insertelement <4 x i32> %367, i32 %365, i32 2 %369 = bitcast <8 x i32> %81 to <32 x i8> %370 = bitcast <4 x i32> %83 to <16 x i8> %371 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %368, <32 x i8> %369, <16 x i8> %370, i32 2) %372 = extractelement <4 x float> %371, i32 0 %373 = extractelement <4 x float> %371, i32 1 %374 = extractelement <4 x float> %371, i32 2 %375 = fcmp oeq float %temp30.0, 4.000000e+00 %376 = select i1 %375, float 1.000000e+00, float 0.000000e+00 %377 = bitcast float %360 to i32 %378 = bitcast float %362 to i32 %379 = bitcast float %225 to i32 %380 = insertelement <4 x i32> undef, i32 %377, i32 0 %381 = insertelement <4 x i32> %380, i32 %378, i32 1 %382 = insertelement <4 x i32> %381, i32 %379, i32 2 %383 = bitcast <8 x i32> %73 to <32 x i8> %384 = bitcast <4 x i32> %75 to <16 x i8> %385 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %382, <32 x i8> %383, <16 x i8> %384, i32 2) %386 = extractelement <4 x float> %385, i32 0 %387 = extractelement <4 x float> %385, i32 1 %388 = extractelement <4 x float> %385, i32 2 %389 = fcmp oeq float %temp30.0, 3.000000e+00 %390 = select i1 %389, float 1.000000e+00, float 0.000000e+00 %391 = bitcast float %360 to i32 %392 = bitcast float %362 to i32 %393 = bitcast float %225 to i32 %394 = insertelement <4 x i32> undef, i32 %391, i32 0 %395 = insertelement <4 x i32> %394, i32 %392, i32 1 %396 = insertelement <4 x i32> %395, i32 %393, i32 2 %397 = bitcast <8 x i32> %65 to <32 x i8> %398 = bitcast <4 x i32> %67 to <16 x i8> %399 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %396, <32 x i8> %397, <16 x i8> %398, i32 2) %400 = extractelement <4 x float> %399, i32 0 %401 = extractelement <4 x float> %399, i32 1 %402 = extractelement <4 x float> %399, i32 2 %403 = fcmp oeq float %temp30.0, 2.000000e+00 %404 = select i1 %403, float 1.000000e+00, float 0.000000e+00 %405 = bitcast float %360 to i32 %406 = bitcast float %362 to i32 %407 = bitcast float %225 to i32 %408 = insertelement <4 x i32> undef, i32 %405, i32 0 %409 = insertelement <4 x i32> %408, i32 %406, i32 1 %410 = insertelement <4 x i32> %409, i32 %407, i32 2 %411 = bitcast <8 x i32> %57 to <32 x i8> %412 = bitcast <4 x i32> %59 to <16 x i8> %413 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %410, <32 x i8> %411, <16 x i8> %412, i32 2) %414 = extractelement <4 x float> %413, i32 0 %415 = extractelement <4 x float> %413, i32 1 %416 = extractelement <4 x float> %413, i32 2 %417 = fcmp oeq float %temp30.0, 1.000000e+00 %418 = select i1 %417, float 1.000000e+00, float 0.000000e+00 %419 = bitcast float %360 to i32 %420 = bitcast float %362 to i32 %421 = bitcast float %225 to i32 %422 = insertelement <4 x i32> undef, i32 %419, i32 0 %423 = insertelement <4 x i32> %422, i32 %420, i32 1 %424 = insertelement <4 x i32> %423, i32 %421, i32 2 %425 = bitcast <8 x i32> %49 to <32 x i8> %426 = bitcast <4 x i32> %51 to <16 x i8> %427 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %424, <32 x i8> %425, <16 x i8> %426, i32 2) %428 = extractelement <4 x float> %427, i32 0 %429 = extractelement <4 x float> %427, i32 1 %430 = extractelement <4 x float> %427, i32 2 %431 = fcmp oeq float %temp30.0, 0.000000e+00 %432 = select i1 %431, float 1.000000e+00, float 0.000000e+00 %433 = fmul float %428, %432 %434 = fmul float %429, %432 %435 = fmul float %430, %432 %436 = fmul float %414, %418 %437 = fadd float %436, %433 %438 = fmul float %415, %418 %439 = fadd float %438, %434 %440 = fmul float %416, %418 %441 = fadd float %440, %435 %442 = fmul float %400, %404 %443 = fadd float %442, %437 %444 = fmul float %401, %404 %445 = fadd float %444, %439 %446 = fmul float %402, %404 %447 = fadd float %446, %441 %448 = fmul float %386, %390 %449 = fadd float %448, %443 %450 = fmul float %387, %390 %451 = fadd float %450, %445 %452 = fmul float %388, %390 %453 = fadd float %452, %447 %454 = fmul float %372, %376 %455 = fadd float %454, %449 %456 = fmul float %373, %376 %457 = fadd float %456, %451 %458 = fmul float %374, %376 %459 = fadd float %458, %453 %460 = fcmp une float %35, %temp16.0 %.sink210 = select i1 %460, float %38, float %37 %temp52.0 = select i1 %460, float 1.953125e-03, float 3.906250e-03 %461 = fdiv float 1.000000e+00, %.sink210 %462 = fmul float %102, %461 %463 = fmul float %100, %461 %464 = call float @llvm.floor.f32(float %462) %465 = fsub float %462, %464 %466 = call float @llvm.floor.f32(float %463) %467 = fsub float %463, %466 %468 = fmul float %39, 2.000000e+00 %469 = fmul float %468, %temp52.0 %470 = fsub float 1.000000e+00, %469 %471 = fmul float %temp52.0, %39 %472 = fmul float %465, %470 %473 = fadd float %472, %471 %474 = fmul float %467, %470 %475 = fadd float %474, %471 %476 = fmul float %473, %temp16.0 %477 = fadd float %476, %temp28.0 %478 = fmul float %475, %temp16.0 %479 = fadd float %478, %temp29.0 %480 = bitcast float %477 to i32 %481 = bitcast float %479 to i32 %482 = bitcast float %225 to i32 %483 = insertelement <4 x i32> undef, i32 %480, i32 0 %484 = insertelement <4 x i32> %483, i32 %481, i32 1 %485 = insertelement <4 x i32> %484, i32 %482, i32 2 %486 = bitcast <8 x i32> %81 to <32 x i8> %487 = bitcast <4 x i32> %83 to <16 x i8> %488 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %485, <32 x i8> %486, <16 x i8> %487, i32 2) %489 = extractelement <4 x float> %488, i32 0 %490 = extractelement <4 x float> %488, i32 1 %491 = extractelement <4 x float> %488, i32 2 %492 = fcmp oeq float %temp30.0, 4.000000e+00 %493 = select i1 %492, float 1.000000e+00, float 0.000000e+00 %494 = bitcast float %477 to i32 %495 = bitcast float %479 to i32 %496 = bitcast float %225 to i32 %497 = insertelement <4 x i32> undef, i32 %494, i32 0 %498 = insertelement <4 x i32> %497, i32 %495, i32 1 %499 = insertelement <4 x i32> %498, i32 %496, i32 2 %500 = bitcast <8 x i32> %73 to <32 x i8> %501 = bitcast <4 x i32> %75 to <16 x i8> %502 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %499, <32 x i8> %500, <16 x i8> %501, i32 2) %503 = extractelement <4 x float> %502, i32 0 %504 = extractelement <4 x float> %502, i32 1 %505 = extractelement <4 x float> %502, i32 2 %506 = fcmp oeq float %temp30.0, 3.000000e+00 %507 = select i1 %506, float 1.000000e+00, float 0.000000e+00 %508 = bitcast float %477 to i32 %509 = bitcast float %479 to i32 %510 = bitcast float %225 to i32 %511 = insertelement <4 x i32> undef, i32 %508, i32 0 %512 = insertelement <4 x i32> %511, i32 %509, i32 1 %513 = insertelement <4 x i32> %512, i32 %510, i32 2 %514 = bitcast <8 x i32> %65 to <32 x i8> %515 = bitcast <4 x i32> %67 to <16 x i8> %516 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %513, <32 x i8> %514, <16 x i8> %515, i32 2) %517 = extractelement <4 x float> %516, i32 0 %518 = extractelement <4 x float> %516, i32 1 %519 = extractelement <4 x float> %516, i32 2 %520 = fcmp oeq float %temp30.0, 2.000000e+00 %521 = select i1 %520, float 1.000000e+00, float 0.000000e+00 %522 = bitcast float %477 to i32 %523 = bitcast float %479 to i32 %524 = bitcast float %225 to i32 %525 = insertelement <4 x i32> undef, i32 %522, i32 0 %526 = insertelement <4 x i32> %525, i32 %523, i32 1 %527 = insertelement <4 x i32> %526, i32 %524, i32 2 %528 = bitcast <8 x i32> %57 to <32 x i8> %529 = bitcast <4 x i32> %59 to <16 x i8> %530 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %527, <32 x i8> %528, <16 x i8> %529, i32 2) %531 = extractelement <4 x float> %530, i32 0 %532 = extractelement <4 x float> %530, i32 1 %533 = extractelement <4 x float> %530, i32 2 %534 = fcmp oeq float %temp30.0, 1.000000e+00 %535 = select i1 %534, float 1.000000e+00, float 0.000000e+00 %536 = bitcast float %477 to i32 %537 = bitcast float %479 to i32 %538 = bitcast float %225 to i32 %539 = insertelement <4 x i32> undef, i32 %536, i32 0 %540 = insertelement <4 x i32> %539, i32 %537, i32 1 %541 = insertelement <4 x i32> %540, i32 %538, i32 2 %542 = bitcast <8 x i32> %49 to <32 x i8> %543 = bitcast <4 x i32> %51 to <16 x i8> %544 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %541, <32 x i8> %542, <16 x i8> %543, i32 2) %545 = extractelement <4 x float> %544, i32 0 %546 = extractelement <4 x float> %544, i32 1 %547 = extractelement <4 x float> %544, i32 2 %548 = fcmp oeq float %temp30.0, 0.000000e+00 %549 = select i1 %548, float 1.000000e+00, float 0.000000e+00 %550 = fmul float %545, %549 %551 = fmul float %546, %549 %552 = fmul float %547, %549 %553 = fmul float %531, %535 %554 = fadd float %553, %550 %555 = fmul float %532, %535 %556 = fadd float %555, %551 %557 = fmul float %533, %535 %558 = fadd float %557, %552 %559 = fmul float %517, %521 %560 = fadd float %559, %554 %561 = fmul float %518, %521 %562 = fadd float %561, %556 %563 = fmul float %519, %521 %564 = fadd float %563, %558 %565 = fmul float %503, %507 %566 = fadd float %565, %560 %567 = fmul float %504, %507 %568 = fadd float %567, %562 %569 = fmul float %505, %507 %570 = fadd float %569, %564 %571 = fmul float %489, %493 %572 = fadd float %571, %566 %573 = fmul float %490, %493 %574 = fadd float %573, %568 %575 = fmul float %491, %493 %576 = fadd float %575, %570 %577 = fcmp une float %35, %temp20.0 %.sink211 = select i1 %577, float %38, float %37 %temp56.0 = select i1 %577, float 1.953125e-03, float 3.906250e-03 %578 = fdiv float 1.000000e+00, %.sink211 %579 = fmul float %100, %578 %580 = fmul float %101, %578 %581 = call float @llvm.floor.f32(float %579) %582 = fsub float %579, %581 %583 = call float @llvm.floor.f32(float %580) %584 = fsub float %580, %583 %585 = fmul float %39, 2.000000e+00 %586 = fmul float %585, %temp56.0 %587 = fsub float 1.000000e+00, %586 %588 = fmul float %temp56.0, %39 %589 = fmul float %582, %587 %590 = fadd float %589, %588 %591 = fmul float %584, %587 %592 = fadd float %591, %588 %593 = fmul float %590, %temp20.0 %594 = fadd float %593, %temp36.0 %595 = fmul float %592, %temp20.0 %596 = fadd float %595, %temp37.0 %597 = bitcast float %594 to i32 %598 = bitcast float %596 to i32 %599 = bitcast float %225 to i32 %600 = insertelement <4 x i32> undef, i32 %597, i32 0 %601 = insertelement <4 x i32> %600, i32 %598, i32 1 %602 = insertelement <4 x i32> %601, i32 %599, i32 2 %603 = bitcast <8 x i32> %81 to <32 x i8> %604 = bitcast <4 x i32> %83 to <16 x i8> %605 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %602, <32 x i8> %603, <16 x i8> %604, i32 2) %606 = extractelement <4 x float> %605, i32 0 %607 = extractelement <4 x float> %605, i32 1 %608 = extractelement <4 x float> %605, i32 2 %609 = fcmp oeq float %temp38.0, 4.000000e+00 %610 = select i1 %609, float 1.000000e+00, float 0.000000e+00 %611 = bitcast float %594 to i32 %612 = bitcast float %596 to i32 %613 = bitcast float %225 to i32 %614 = insertelement <4 x i32> undef, i32 %611, i32 0 %615 = insertelement <4 x i32> %614, i32 %612, i32 1 %616 = insertelement <4 x i32> %615, i32 %613, i32 2 %617 = bitcast <8 x i32> %73 to <32 x i8> %618 = bitcast <4 x i32> %75 to <16 x i8> %619 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %616, <32 x i8> %617, <16 x i8> %618, i32 2) %620 = extractelement <4 x float> %619, i32 0 %621 = extractelement <4 x float> %619, i32 1 %622 = extractelement <4 x float> %619, i32 2 %623 = fcmp oeq float %temp38.0, 3.000000e+00 %624 = select i1 %623, float 1.000000e+00, float 0.000000e+00 %625 = bitcast float %594 to i32 %626 = bitcast float %596 to i32 %627 = bitcast float %225 to i32 %628 = insertelement <4 x i32> undef, i32 %625, i32 0 %629 = insertelement <4 x i32> %628, i32 %626, i32 1 %630 = insertelement <4 x i32> %629, i32 %627, i32 2 %631 = bitcast <8 x i32> %65 to <32 x i8> %632 = bitcast <4 x i32> %67 to <16 x i8> %633 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %630, <32 x i8> %631, <16 x i8> %632, i32 2) %634 = extractelement <4 x float> %633, i32 0 %635 = extractelement <4 x float> %633, i32 1 %636 = extractelement <4 x float> %633, i32 2 %637 = fcmp oeq float %temp38.0, 2.000000e+00 %638 = select i1 %637, float 1.000000e+00, float 0.000000e+00 %639 = bitcast float %594 to i32 %640 = bitcast float %596 to i32 %641 = bitcast float %225 to i32 %642 = insertelement <4 x i32> undef, i32 %639, i32 0 %643 = insertelement <4 x i32> %642, i32 %640, i32 1 %644 = insertelement <4 x i32> %643, i32 %641, i32 2 %645 = bitcast <8 x i32> %57 to <32 x i8> %646 = bitcast <4 x i32> %59 to <16 x i8> %647 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %644, <32 x i8> %645, <16 x i8> %646, i32 2) %648 = extractelement <4 x float> %647, i32 0 %649 = extractelement <4 x float> %647, i32 1 %650 = extractelement <4 x float> %647, i32 2 %651 = fcmp oeq float %temp38.0, 1.000000e+00 %652 = select i1 %651, float 1.000000e+00, float 0.000000e+00 %653 = bitcast float %594 to i32 %654 = bitcast float %596 to i32 %655 = bitcast float %225 to i32 %656 = insertelement <4 x i32> undef, i32 %653, i32 0 %657 = insertelement <4 x i32> %656, i32 %654, i32 1 %658 = insertelement <4 x i32> %657, i32 %655, i32 2 %659 = bitcast <8 x i32> %49 to <32 x i8> %660 = bitcast <4 x i32> %51 to <16 x i8> %661 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %658, <32 x i8> %659, <16 x i8> %660, i32 2) %662 = extractelement <4 x float> %661, i32 0 %663 = extractelement <4 x float> %661, i32 1 %664 = extractelement <4 x float> %661, i32 2 %665 = fcmp oeq float %temp38.0, 0.000000e+00 %666 = select i1 %665, float 1.000000e+00, float 0.000000e+00 %667 = fmul float %662, %666 %668 = fmul float %663, %666 %669 = fmul float %664, %666 %670 = fmul float %648, %652 %671 = fadd float %670, %667 %672 = fmul float %649, %652 %673 = fadd float %672, %668 %674 = fmul float %650, %652 %675 = fadd float %674, %669 %676 = fmul float %634, %638 %677 = fadd float %676, %671 %678 = fmul float %635, %638 %679 = fadd float %678, %673 %680 = fmul float %636, %638 %681 = fadd float %680, %675 %682 = fmul float %620, %624 %683 = fadd float %682, %677 %684 = fmul float %621, %624 %685 = fadd float %684, %679 %686 = fmul float %622, %624 %687 = fadd float %686, %681 %688 = fmul float %606, %610 %689 = fadd float %688, %683 %690 = fmul float %607, %610 %691 = fadd float %690, %685 %692 = fmul float %608, %610 %693 = fadd float %692, %687 %694 = fcmp une float %35, %temp20.0 %.sink212 = select i1 %694, float %38, float %37 %temp60.0 = select i1 %694, float 1.953125e-03, float 3.906250e-03 %695 = fdiv float 1.000000e+00, %.sink212 %696 = fmul float %102, %695 %697 = fmul float %101, %695 %698 = call float @llvm.floor.f32(float %696) %699 = fsub float %696, %698 %700 = call float @llvm.floor.f32(float %697) %701 = fsub float %697, %700 %702 = fmul float %39, 2.000000e+00 %703 = fmul float %702, %temp60.0 %704 = fsub float 1.000000e+00, %703 %705 = fmul float %temp60.0, %39 %706 = fmul float %699, %704 %707 = fadd float %706, %705 %708 = fmul float %701, %704 %709 = fadd float %708, %705 %710 = fmul float %707, %temp20.0 %711 = fadd float %710, %temp36.0 %712 = fmul float %709, %temp20.0 %713 = fadd float %712, %temp37.0 %714 = bitcast float %711 to i32 %715 = bitcast float %713 to i32 %716 = bitcast float %225 to i32 %717 = insertelement <4 x i32> undef, i32 %714, i32 0 %718 = insertelement <4 x i32> %717, i32 %715, i32 1 %719 = insertelement <4 x i32> %718, i32 %716, i32 2 %720 = bitcast <8 x i32> %81 to <32 x i8> %721 = bitcast <4 x i32> %83 to <16 x i8> %722 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %719, <32 x i8> %720, <16 x i8> %721, i32 2) %723 = extractelement <4 x float> %722, i32 0 %724 = extractelement <4 x float> %722, i32 1 %725 = extractelement <4 x float> %722, i32 2 %726 = fcmp oeq float %temp38.0, 4.000000e+00 %727 = select i1 %726, float 1.000000e+00, float 0.000000e+00 %728 = bitcast float %711 to i32 %729 = bitcast float %713 to i32 %730 = bitcast float %225 to i32 %731 = insertelement <4 x i32> undef, i32 %728, i32 0 %732 = insertelement <4 x i32> %731, i32 %729, i32 1 %733 = insertelement <4 x i32> %732, i32 %730, i32 2 %734 = bitcast <8 x i32> %73 to <32 x i8> %735 = bitcast <4 x i32> %75 to <16 x i8> %736 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %733, <32 x i8> %734, <16 x i8> %735, i32 2) %737 = extractelement <4 x float> %736, i32 0 %738 = extractelement <4 x float> %736, i32 1 %739 = extractelement <4 x float> %736, i32 2 %740 = fcmp oeq float %temp38.0, 3.000000e+00 %741 = select i1 %740, float 1.000000e+00, float 0.000000e+00 %742 = bitcast float %711 to i32 %743 = bitcast float %713 to i32 %744 = bitcast float %225 to i32 %745 = insertelement <4 x i32> undef, i32 %742, i32 0 %746 = insertelement <4 x i32> %745, i32 %743, i32 1 %747 = insertelement <4 x i32> %746, i32 %744, i32 2 %748 = bitcast <8 x i32> %65 to <32 x i8> %749 = bitcast <4 x i32> %67 to <16 x i8> %750 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %747, <32 x i8> %748, <16 x i8> %749, i32 2) %751 = extractelement <4 x float> %750, i32 0 %752 = extractelement <4 x float> %750, i32 1 %753 = extractelement <4 x float> %750, i32 2 %754 = fcmp oeq float %temp38.0, 2.000000e+00 %755 = select i1 %754, float 1.000000e+00, float 0.000000e+00 %756 = bitcast float %711 to i32 %757 = bitcast float %713 to i32 %758 = bitcast float %225 to i32 %759 = insertelement <4 x i32> undef, i32 %756, i32 0 %760 = insertelement <4 x i32> %759, i32 %757, i32 1 %761 = insertelement <4 x i32> %760, i32 %758, i32 2 %762 = bitcast <8 x i32> %57 to <32 x i8> %763 = bitcast <4 x i32> %59 to <16 x i8> %764 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %761, <32 x i8> %762, <16 x i8> %763, i32 2) %765 = extractelement <4 x float> %764, i32 0 %766 = extractelement <4 x float> %764, i32 1 %767 = extractelement <4 x float> %764, i32 2 %768 = fcmp oeq float %temp38.0, 1.000000e+00 %769 = select i1 %768, float 1.000000e+00, float 0.000000e+00 %770 = bitcast float %711 to i32 %771 = bitcast float %713 to i32 %772 = bitcast float %225 to i32 %773 = insertelement <4 x i32> undef, i32 %770, i32 0 %774 = insertelement <4 x i32> %773, i32 %771, i32 1 %775 = insertelement <4 x i32> %774, i32 %772, i32 2 %776 = bitcast <8 x i32> %49 to <32 x i8> %777 = bitcast <4 x i32> %51 to <16 x i8> %778 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %775, <32 x i8> %776, <16 x i8> %777, i32 2) %779 = extractelement <4 x float> %778, i32 0 %780 = extractelement <4 x float> %778, i32 1 %781 = extractelement <4 x float> %778, i32 2 %782 = fcmp oeq float %temp38.0, 0.000000e+00 %783 = select i1 %782, float 1.000000e+00, float 0.000000e+00 %784 = fmul float %779, %783 %785 = fmul float %780, %783 %786 = fmul float %781, %783 %787 = fmul float %765, %769 %788 = fadd float %787, %784 %789 = fmul float %766, %769 %790 = fadd float %789, %785 %791 = fmul float %767, %769 %792 = fadd float %791, %786 %793 = fmul float %751, %755 %794 = fadd float %793, %788 %795 = fmul float %752, %755 %796 = fadd float %795, %790 %797 = fmul float %753, %755 %798 = fadd float %797, %792 %799 = fmul float %737, %741 %800 = fadd float %799, %794 %801 = fmul float %738, %741 %802 = fadd float %801, %796 %803 = fmul float %739, %741 %804 = fadd float %803, %798 %805 = fmul float %723, %727 %806 = fadd float %805, %800 %807 = fmul float %724, %727 %808 = fadd float %807, %802 %809 = fmul float %725, %727 %810 = fadd float %809, %804 %811 = fcmp une float %35, %temp20.0 %.sink213 = select i1 %811, float %38, float %37 %temp64.0 = select i1 %811, float 1.953125e-03, float 3.906250e-03 %812 = fdiv float 1.000000e+00, %.sink213 %813 = fmul float %102, %812 %814 = fmul float %100, %812 %815 = call float @llvm.floor.f32(float %813) %816 = fsub float %813, %815 %817 = call float @llvm.floor.f32(float %814) %818 = fsub float %814, %817 %819 = fmul float %39, 2.000000e+00 %820 = fmul float %819, %temp64.0 %821 = fsub float 1.000000e+00, %820 %822 = fmul float %temp64.0, %39 %823 = fmul float %816, %821 %824 = fadd float %823, %822 %825 = fmul float %818, %821 %826 = fadd float %825, %822 %827 = fmul float %824, %temp20.0 %828 = fadd float %827, %temp36.0 %829 = fmul float %826, %temp20.0 %830 = fadd float %829, %temp37.0 %831 = bitcast float %828 to i32 %832 = bitcast float %830 to i32 %833 = bitcast float %225 to i32 %834 = insertelement <4 x i32> undef, i32 %831, i32 0 %835 = insertelement <4 x i32> %834, i32 %832, i32 1 %836 = insertelement <4 x i32> %835, i32 %833, i32 2 %837 = bitcast <8 x i32> %81 to <32 x i8> %838 = bitcast <4 x i32> %83 to <16 x i8> %839 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %836, <32 x i8> %837, <16 x i8> %838, i32 2) %840 = extractelement <4 x float> %839, i32 0 %841 = extractelement <4 x float> %839, i32 1 %842 = extractelement <4 x float> %839, i32 2 %843 = fcmp oeq float %temp38.0, 4.000000e+00 %844 = select i1 %843, float 1.000000e+00, float 0.000000e+00 %845 = bitcast float %828 to i32 %846 = bitcast float %830 to i32 %847 = bitcast float %225 to i32 %848 = insertelement <4 x i32> undef, i32 %845, i32 0 %849 = insertelement <4 x i32> %848, i32 %846, i32 1 %850 = insertelement <4 x i32> %849, i32 %847, i32 2 %851 = bitcast <8 x i32> %73 to <32 x i8> %852 = bitcast <4 x i32> %75 to <16 x i8> %853 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %850, <32 x i8> %851, <16 x i8> %852, i32 2) %854 = extractelement <4 x float> %853, i32 0 %855 = extractelement <4 x float> %853, i32 1 %856 = extractelement <4 x float> %853, i32 2 %857 = fcmp oeq float %temp38.0, 3.000000e+00 %858 = select i1 %857, float 1.000000e+00, float 0.000000e+00 %859 = bitcast float %828 to i32 %860 = bitcast float %830 to i32 %861 = bitcast float %225 to i32 %862 = insertelement <4 x i32> undef, i32 %859, i32 0 %863 = insertelement <4 x i32> %862, i32 %860, i32 1 %864 = insertelement <4 x i32> %863, i32 %861, i32 2 %865 = bitcast <8 x i32> %65 to <32 x i8> %866 = bitcast <4 x i32> %67 to <16 x i8> %867 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %864, <32 x i8> %865, <16 x i8> %866, i32 2) %868 = extractelement <4 x float> %867, i32 0 %869 = extractelement <4 x float> %867, i32 1 %870 = extractelement <4 x float> %867, i32 2 %871 = fcmp oeq float %temp38.0, 2.000000e+00 %872 = select i1 %871, float 1.000000e+00, float 0.000000e+00 %873 = bitcast float %828 to i32 %874 = bitcast float %830 to i32 %875 = bitcast float %225 to i32 %876 = insertelement <4 x i32> undef, i32 %873, i32 0 %877 = insertelement <4 x i32> %876, i32 %874, i32 1 %878 = insertelement <4 x i32> %877, i32 %875, i32 2 %879 = bitcast <8 x i32> %57 to <32 x i8> %880 = bitcast <4 x i32> %59 to <16 x i8> %881 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %878, <32 x i8> %879, <16 x i8> %880, i32 2) %882 = extractelement <4 x float> %881, i32 0 %883 = extractelement <4 x float> %881, i32 1 %884 = extractelement <4 x float> %881, i32 2 %885 = fcmp oeq float %temp38.0, 1.000000e+00 %886 = select i1 %885, float 1.000000e+00, float 0.000000e+00 %887 = bitcast float %828 to i32 %888 = bitcast float %830 to i32 %889 = bitcast float %225 to i32 %890 = insertelement <4 x i32> undef, i32 %887, i32 0 %891 = insertelement <4 x i32> %890, i32 %888, i32 1 %892 = insertelement <4 x i32> %891, i32 %889, i32 2 %893 = bitcast <8 x i32> %49 to <32 x i8> %894 = bitcast <4 x i32> %51 to <16 x i8> %895 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %892, <32 x i8> %893, <16 x i8> %894, i32 2) %896 = extractelement <4 x float> %895, i32 0 %897 = extractelement <4 x float> %895, i32 1 %898 = extractelement <4 x float> %895, i32 2 %899 = fcmp oeq float %temp38.0, 0.000000e+00 %900 = select i1 %899, float 1.000000e+00, float 0.000000e+00 %901 = fmul float %896, %900 %902 = fmul float %897, %900 %903 = fmul float %898, %900 %904 = fmul float %882, %886 %905 = fadd float %904, %901 %906 = fmul float %883, %886 %907 = fadd float %906, %902 %908 = fmul float %884, %886 %909 = fadd float %908, %903 %910 = fmul float %868, %872 %911 = fadd float %910, %905 %912 = fmul float %869, %872 %913 = fadd float %912, %907 %914 = fmul float %870, %872 %915 = fadd float %914, %909 %916 = fmul float %854, %858 %917 = fadd float %916, %911 %918 = fmul float %855, %858 %919 = fadd float %918, %913 %920 = fmul float %856, %858 %921 = fadd float %920, %915 %922 = fmul float %840, %844 %923 = fadd float %922, %917 %924 = fmul float %841, %844 %925 = fadd float %924, %919 %926 = fmul float %842, %844 %927 = fadd float %926, %921 %928 = fcmp une float %35, %temp24.0 %.sink214 = select i1 %928, float %38, float %37 %temp68.0 = select i1 %928, float 1.953125e-03, float 3.906250e-03 %929 = fdiv float 1.000000e+00, %.sink214 %930 = fmul float %100, %929 %931 = fmul float %101, %929 %932 = call float @llvm.floor.f32(float %930) %933 = fsub float %930, %932 %934 = call float @llvm.floor.f32(float %931) %935 = fsub float %931, %934 %936 = fmul float %39, 2.000000e+00 %937 = fmul float %936, %temp68.0 %938 = fsub float 1.000000e+00, %937 %939 = fmul float %temp68.0, %39 %940 = fmul float %933, %938 %941 = fadd float %940, %939 %942 = fmul float %935, %938 %943 = fadd float %942, %939 %944 = fmul float %941, %temp24.0 %945 = fadd float %944, %temp12.0 %946 = fmul float %943, %temp24.0 %947 = fadd float %946, %temp13.0 %948 = bitcast float %945 to i32 %949 = bitcast float %947 to i32 %950 = bitcast float %225 to i32 %951 = insertelement <4 x i32> undef, i32 %948, i32 0 %952 = insertelement <4 x i32> %951, i32 %949, i32 1 %953 = insertelement <4 x i32> %952, i32 %950, i32 2 %954 = bitcast <8 x i32> %81 to <32 x i8> %955 = bitcast <4 x i32> %83 to <16 x i8> %956 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %953, <32 x i8> %954, <16 x i8> %955, i32 2) %957 = extractelement <4 x float> %956, i32 0 %958 = extractelement <4 x float> %956, i32 1 %959 = extractelement <4 x float> %956, i32 2 %960 = fcmp oeq float %temp14.0, 4.000000e+00 %961 = select i1 %960, float 1.000000e+00, float 0.000000e+00 %962 = bitcast float %945 to i32 %963 = bitcast float %947 to i32 %964 = bitcast float %225 to i32 %965 = insertelement <4 x i32> undef, i32 %962, i32 0 %966 = insertelement <4 x i32> %965, i32 %963, i32 1 %967 = insertelement <4 x i32> %966, i32 %964, i32 2 %968 = bitcast <8 x i32> %73 to <32 x i8> %969 = bitcast <4 x i32> %75 to <16 x i8> %970 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %967, <32 x i8> %968, <16 x i8> %969, i32 2) %971 = extractelement <4 x float> %970, i32 0 %972 = extractelement <4 x float> %970, i32 1 %973 = extractelement <4 x float> %970, i32 2 %974 = fcmp oeq float %temp14.0, 3.000000e+00 %975 = select i1 %974, float 1.000000e+00, float 0.000000e+00 %976 = bitcast float %945 to i32 %977 = bitcast float %947 to i32 %978 = bitcast float %225 to i32 %979 = insertelement <4 x i32> undef, i32 %976, i32 0 %980 = insertelement <4 x i32> %979, i32 %977, i32 1 %981 = insertelement <4 x i32> %980, i32 %978, i32 2 %982 = bitcast <8 x i32> %65 to <32 x i8> %983 = bitcast <4 x i32> %67 to <16 x i8> %984 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %981, <32 x i8> %982, <16 x i8> %983, i32 2) %985 = extractelement <4 x float> %984, i32 0 %986 = extractelement <4 x float> %984, i32 1 %987 = extractelement <4 x float> %984, i32 2 %988 = fcmp oeq float %temp14.0, 2.000000e+00 %989 = select i1 %988, float 1.000000e+00, float 0.000000e+00 %990 = bitcast float %945 to i32 %991 = bitcast float %947 to i32 %992 = bitcast float %225 to i32 %993 = insertelement <4 x i32> undef, i32 %990, i32 0 %994 = insertelement <4 x i32> %993, i32 %991, i32 1 %995 = insertelement <4 x i32> %994, i32 %992, i32 2 %996 = bitcast <8 x i32> %57 to <32 x i8> %997 = bitcast <4 x i32> %59 to <16 x i8> %998 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %995, <32 x i8> %996, <16 x i8> %997, i32 2) %999 = extractelement <4 x float> %998, i32 0 %1000 = extractelement <4 x float> %998, i32 1 %1001 = extractelement <4 x float> %998, i32 2 %1002 = fcmp oeq float %temp14.0, 1.000000e+00 %1003 = select i1 %1002, float 1.000000e+00, float 0.000000e+00 %1004 = bitcast float %945 to i32 %1005 = bitcast float %947 to i32 %1006 = bitcast float %225 to i32 %1007 = insertelement <4 x i32> undef, i32 %1004, i32 0 %1008 = insertelement <4 x i32> %1007, i32 %1005, i32 1 %1009 = insertelement <4 x i32> %1008, i32 %1006, i32 2 %1010 = bitcast <8 x i32> %49 to <32 x i8> %1011 = bitcast <4 x i32> %51 to <16 x i8> %1012 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1009, <32 x i8> %1010, <16 x i8> %1011, i32 2) %1013 = extractelement <4 x float> %1012, i32 0 %1014 = extractelement <4 x float> %1012, i32 1 %1015 = extractelement <4 x float> %1012, i32 2 %1016 = fcmp oeq float %temp14.0, 0.000000e+00 %1017 = select i1 %1016, float 1.000000e+00, float 0.000000e+00 %1018 = fmul float %1013, %1017 %1019 = fmul float %1014, %1017 %1020 = fmul float %1015, %1017 %1021 = fmul float %999, %1003 %1022 = fadd float %1021, %1018 %1023 = fmul float %1000, %1003 %1024 = fadd float %1023, %1019 %1025 = fmul float %1001, %1003 %1026 = fadd float %1025, %1020 %1027 = fmul float %985, %989 %1028 = fadd float %1027, %1022 %1029 = fmul float %986, %989 %1030 = fadd float %1029, %1024 %1031 = fmul float %987, %989 %1032 = fadd float %1031, %1026 %1033 = fmul float %971, %975 %1034 = fadd float %1033, %1028 %1035 = fmul float %972, %975 %1036 = fadd float %1035, %1030 %1037 = fmul float %973, %975 %1038 = fadd float %1037, %1032 %1039 = fmul float %957, %961 %1040 = fadd float %1039, %1034 %1041 = fmul float %958, %961 %1042 = fadd float %1041, %1036 %1043 = fmul float %959, %961 %1044 = fadd float %1043, %1038 %1045 = fcmp une float %35, %temp24.0 %.sink215 = select i1 %1045, float %38, float %37 %temp72.0 = select i1 %1045, float 1.953125e-03, float 3.906250e-03 %1046 = fdiv float 1.000000e+00, %.sink215 %1047 = fmul float %102, %1046 %1048 = fmul float %101, %1046 %1049 = call float @llvm.floor.f32(float %1047) %1050 = fsub float %1047, %1049 %1051 = call float @llvm.floor.f32(float %1048) %1052 = fsub float %1048, %1051 %1053 = fmul float %39, 2.000000e+00 %1054 = fmul float %1053, %temp72.0 %1055 = fsub float 1.000000e+00, %1054 %1056 = fmul float %temp72.0, %39 %1057 = fmul float %1050, %1055 %1058 = fadd float %1057, %1056 %1059 = fmul float %1052, %1055 %1060 = fadd float %1059, %1056 %1061 = fmul float %1058, %temp24.0 %1062 = fadd float %1061, %temp12.0 %1063 = fmul float %1060, %temp24.0 %1064 = fadd float %1063, %temp13.0 %1065 = bitcast float %1062 to i32 %1066 = bitcast float %1064 to i32 %1067 = bitcast float %225 to i32 %1068 = insertelement <4 x i32> undef, i32 %1065, i32 0 %1069 = insertelement <4 x i32> %1068, i32 %1066, i32 1 %1070 = insertelement <4 x i32> %1069, i32 %1067, i32 2 %1071 = bitcast <8 x i32> %81 to <32 x i8> %1072 = bitcast <4 x i32> %83 to <16 x i8> %1073 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1070, <32 x i8> %1071, <16 x i8> %1072, i32 2) %1074 = extractelement <4 x float> %1073, i32 0 %1075 = extractelement <4 x float> %1073, i32 1 %1076 = extractelement <4 x float> %1073, i32 2 %1077 = fcmp oeq float %temp14.0, 4.000000e+00 %1078 = select i1 %1077, float 1.000000e+00, float 0.000000e+00 %1079 = bitcast float %1062 to i32 %1080 = bitcast float %1064 to i32 %1081 = bitcast float %225 to i32 %1082 = insertelement <4 x i32> undef, i32 %1079, i32 0 %1083 = insertelement <4 x i32> %1082, i32 %1080, i32 1 %1084 = insertelement <4 x i32> %1083, i32 %1081, i32 2 %1085 = bitcast <8 x i32> %73 to <32 x i8> %1086 = bitcast <4 x i32> %75 to <16 x i8> %1087 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1084, <32 x i8> %1085, <16 x i8> %1086, i32 2) %1088 = extractelement <4 x float> %1087, i32 0 %1089 = extractelement <4 x float> %1087, i32 1 %1090 = extractelement <4 x float> %1087, i32 2 %1091 = fcmp oeq float %temp14.0, 3.000000e+00 %1092 = select i1 %1091, float 1.000000e+00, float 0.000000e+00 %1093 = bitcast float %1062 to i32 %1094 = bitcast float %1064 to i32 %1095 = bitcast float %225 to i32 %1096 = insertelement <4 x i32> undef, i32 %1093, i32 0 %1097 = insertelement <4 x i32> %1096, i32 %1094, i32 1 %1098 = insertelement <4 x i32> %1097, i32 %1095, i32 2 %1099 = bitcast <8 x i32> %65 to <32 x i8> %1100 = bitcast <4 x i32> %67 to <16 x i8> %1101 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1098, <32 x i8> %1099, <16 x i8> %1100, i32 2) %1102 = extractelement <4 x float> %1101, i32 0 %1103 = extractelement <4 x float> %1101, i32 1 %1104 = extractelement <4 x float> %1101, i32 2 %1105 = fcmp oeq float %temp14.0, 2.000000e+00 %1106 = select i1 %1105, float 1.000000e+00, float 0.000000e+00 %1107 = bitcast float %1062 to i32 %1108 = bitcast float %1064 to i32 %1109 = bitcast float %225 to i32 %1110 = insertelement <4 x i32> undef, i32 %1107, i32 0 %1111 = insertelement <4 x i32> %1110, i32 %1108, i32 1 %1112 = insertelement <4 x i32> %1111, i32 %1109, i32 2 %1113 = bitcast <8 x i32> %57 to <32 x i8> %1114 = bitcast <4 x i32> %59 to <16 x i8> %1115 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1112, <32 x i8> %1113, <16 x i8> %1114, i32 2) %1116 = extractelement <4 x float> %1115, i32 0 %1117 = extractelement <4 x float> %1115, i32 1 %1118 = extractelement <4 x float> %1115, i32 2 %1119 = fcmp oeq float %temp14.0, 1.000000e+00 %1120 = select i1 %1119, float 1.000000e+00, float 0.000000e+00 %1121 = bitcast float %1062 to i32 %1122 = bitcast float %1064 to i32 %1123 = bitcast float %225 to i32 %1124 = insertelement <4 x i32> undef, i32 %1121, i32 0 %1125 = insertelement <4 x i32> %1124, i32 %1122, i32 1 %1126 = insertelement <4 x i32> %1125, i32 %1123, i32 2 %1127 = bitcast <8 x i32> %49 to <32 x i8> %1128 = bitcast <4 x i32> %51 to <16 x i8> %1129 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1126, <32 x i8> %1127, <16 x i8> %1128, i32 2) %1130 = extractelement <4 x float> %1129, i32 0 %1131 = extractelement <4 x float> %1129, i32 1 %1132 = extractelement <4 x float> %1129, i32 2 %1133 = fcmp oeq float %temp14.0, 0.000000e+00 %1134 = select i1 %1133, float 1.000000e+00, float 0.000000e+00 %1135 = fmul float %1130, %1134 %1136 = fmul float %1131, %1134 %1137 = fmul float %1132, %1134 %1138 = fmul float %1116, %1120 %1139 = fadd float %1138, %1135 %1140 = fmul float %1117, %1120 %1141 = fadd float %1140, %1136 %1142 = fmul float %1118, %1120 %1143 = fadd float %1142, %1137 %1144 = fmul float %1102, %1106 %1145 = fadd float %1144, %1139 %1146 = fmul float %1103, %1106 %1147 = fadd float %1146, %1141 %1148 = fmul float %1104, %1106 %1149 = fadd float %1148, %1143 %1150 = fmul float %1088, %1092 %1151 = fadd float %1150, %1145 %1152 = fmul float %1089, %1092 %1153 = fadd float %1152, %1147 %1154 = fmul float %1090, %1092 %1155 = fadd float %1154, %1149 %1156 = fmul float %1074, %1078 %1157 = fadd float %1156, %1151 %1158 = fmul float %1075, %1078 %1159 = fadd float %1158, %1153 %1160 = fmul float %1076, %1078 %1161 = fadd float %1160, %1155 %1162 = fcmp une float %35, %temp24.0 %.sink216 = select i1 %1162, float %38, float %37 %temp76.0 = select i1 %1162, float 1.953125e-03, float 3.906250e-03 %1163 = fdiv float 1.000000e+00, %.sink216 %1164 = fmul float %102, %1163 %1165 = fmul float %100, %1163 %1166 = call float @llvm.floor.f32(float %1164) %1167 = fsub float %1164, %1166 %1168 = call float @llvm.floor.f32(float %1165) %1169 = fsub float %1165, %1168 %1170 = fmul float %39, 2.000000e+00 %1171 = fmul float %1170, %temp76.0 %1172 = fsub float 1.000000e+00, %1171 %1173 = fmul float %temp76.0, %39 %1174 = fmul float %1167, %1172 %1175 = fadd float %1174, %1173 %1176 = fmul float %1169, %1172 %1177 = fadd float %1176, %1173 %1178 = fmul float %1175, %temp24.0 %1179 = fadd float %1178, %temp12.0 %1180 = fmul float %1177, %temp24.0 %1181 = fadd float %1180, %temp13.0 %1182 = bitcast float %1179 to i32 %1183 = bitcast float %1181 to i32 %1184 = bitcast float %225 to i32 %1185 = insertelement <4 x i32> undef, i32 %1182, i32 0 %1186 = insertelement <4 x i32> %1185, i32 %1183, i32 1 %1187 = insertelement <4 x i32> %1186, i32 %1184, i32 2 %1188 = bitcast <8 x i32> %81 to <32 x i8> %1189 = bitcast <4 x i32> %83 to <16 x i8> %1190 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1187, <32 x i8> %1188, <16 x i8> %1189, i32 2) %1191 = extractelement <4 x float> %1190, i32 0 %1192 = extractelement <4 x float> %1190, i32 1 %1193 = extractelement <4 x float> %1190, i32 2 %1194 = fcmp oeq float %temp14.0, 4.000000e+00 %1195 = select i1 %1194, float 1.000000e+00, float 0.000000e+00 %1196 = bitcast float %1179 to i32 %1197 = bitcast float %1181 to i32 %1198 = bitcast float %225 to i32 %1199 = insertelement <4 x i32> undef, i32 %1196, i32 0 %1200 = insertelement <4 x i32> %1199, i32 %1197, i32 1 %1201 = insertelement <4 x i32> %1200, i32 %1198, i32 2 %1202 = bitcast <8 x i32> %73 to <32 x i8> %1203 = bitcast <4 x i32> %75 to <16 x i8> %1204 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1201, <32 x i8> %1202, <16 x i8> %1203, i32 2) %1205 = extractelement <4 x float> %1204, i32 0 %1206 = extractelement <4 x float> %1204, i32 1 %1207 = extractelement <4 x float> %1204, i32 2 %1208 = fcmp oeq float %temp14.0, 3.000000e+00 %1209 = select i1 %1208, float 1.000000e+00, float 0.000000e+00 %1210 = bitcast float %1179 to i32 %1211 = bitcast float %1181 to i32 %1212 = bitcast float %225 to i32 %1213 = insertelement <4 x i32> undef, i32 %1210, i32 0 %1214 = insertelement <4 x i32> %1213, i32 %1211, i32 1 %1215 = insertelement <4 x i32> %1214, i32 %1212, i32 2 %1216 = bitcast <8 x i32> %65 to <32 x i8> %1217 = bitcast <4 x i32> %67 to <16 x i8> %1218 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1215, <32 x i8> %1216, <16 x i8> %1217, i32 2) %1219 = extractelement <4 x float> %1218, i32 0 %1220 = extractelement <4 x float> %1218, i32 1 %1221 = extractelement <4 x float> %1218, i32 2 %1222 = fcmp oeq float %temp14.0, 2.000000e+00 %1223 = select i1 %1222, float 1.000000e+00, float 0.000000e+00 %1224 = bitcast float %1179 to i32 %1225 = bitcast float %1181 to i32 %1226 = bitcast float %225 to i32 %1227 = insertelement <4 x i32> undef, i32 %1224, i32 0 %1228 = insertelement <4 x i32> %1227, i32 %1225, i32 1 %1229 = insertelement <4 x i32> %1228, i32 %1226, i32 2 %1230 = bitcast <8 x i32> %57 to <32 x i8> %1231 = bitcast <4 x i32> %59 to <16 x i8> %1232 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1229, <32 x i8> %1230, <16 x i8> %1231, i32 2) %1233 = extractelement <4 x float> %1232, i32 0 %1234 = extractelement <4 x float> %1232, i32 1 %1235 = extractelement <4 x float> %1232, i32 2 %1236 = fcmp oeq float %temp14.0, 1.000000e+00 %1237 = select i1 %1236, float 1.000000e+00, float 0.000000e+00 %1238 = bitcast float %1179 to i32 %1239 = bitcast float %1181 to i32 %1240 = bitcast float %225 to i32 %1241 = insertelement <4 x i32> undef, i32 %1238, i32 0 %1242 = insertelement <4 x i32> %1241, i32 %1239, i32 1 %1243 = insertelement <4 x i32> %1242, i32 %1240, i32 2 %1244 = bitcast <8 x i32> %49 to <32 x i8> %1245 = bitcast <4 x i32> %51 to <16 x i8> %1246 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1243, <32 x i8> %1244, <16 x i8> %1245, i32 2) %1247 = extractelement <4 x float> %1246, i32 0 %1248 = extractelement <4 x float> %1246, i32 1 %1249 = extractelement <4 x float> %1246, i32 2 %1250 = fcmp oeq float %temp14.0, 0.000000e+00 %1251 = select i1 %1250, float 1.000000e+00, float 0.000000e+00 %1252 = fmul float %1247, %1251 %1253 = fmul float %1248, %1251 %1254 = fmul float %1249, %1251 %1255 = fmul float %1233, %1237 %1256 = fadd float %1255, %1252 %1257 = fmul float %1234, %1237 %1258 = fadd float %1257, %1253 %1259 = fmul float %1235, %1237 %1260 = fadd float %1259, %1254 %1261 = fmul float %1219, %1223 %1262 = fadd float %1261, %1256 %1263 = fmul float %1220, %1223 %1264 = fadd float %1263, %1258 %1265 = fmul float %1221, %1223 %1266 = fadd float %1265, %1260 %1267 = fmul float %1205, %1209 %1268 = fadd float %1267, %1262 %1269 = fmul float %1206, %1209 %1270 = fadd float %1269, %1264 %1271 = fmul float %1207, %1209 %1272 = fadd float %1271, %1266 %1273 = fmul float %1191, %1195 %1274 = fadd float %1273, %1268 %1275 = fmul float %1192, %1195 %1276 = fadd float %1275, %1270 %1277 = fmul float %1193, %1195 %1278 = fadd float %1277, %1272 %1279 = fmul float %1040, %150 %1280 = fmul float %1042, %150 %1281 = fmul float %1044, %150 %1282 = fmul float %1157, %148 %1283 = fadd float %1282, %1279 %1284 = fmul float %1159, %148 %1285 = fadd float %1284, %1280 %1286 = fmul float %1161, %148 %1287 = fadd float %1286, %1281 %1288 = fmul float %1274, %149 %1289 = fadd float %1288, %1283 %1290 = fmul float %1276, %149 %1291 = fadd float %1290, %1285 %1292 = fmul float %1278, %149 %1293 = fadd float %1292, %1287 %1294 = fmul float %689, %150 %1295 = fmul float %691, %150 %1296 = fmul float %693, %150 %1297 = fmul float %806, %148 %1298 = fadd float %1297, %1294 %1299 = fmul float %808, %148 %1300 = fadd float %1299, %1295 %1301 = fmul float %810, %148 %1302 = fadd float %1301, %1296 %1303 = fmul float %923, %149 %1304 = fadd float %1303, %1298 %1305 = fmul float %925, %149 %1306 = fadd float %1305, %1300 %1307 = fmul float %927, %149 %1308 = fadd float %1307, %1302 %1309 = fmul float %338, %150 %1310 = fmul float %340, %150 %1311 = fmul float %342, %150 %1312 = fmul float %455, %148 %1313 = fadd float %1312, %1309 %1314 = fmul float %457, %148 %1315 = fadd float %1314, %1310 %1316 = fmul float %459, %148 %1317 = fadd float %1316, %1311 %1318 = fmul float %572, %149 %1319 = fadd float %1318, %1313 %1320 = fmul float %574, %149 %1321 = fadd float %1320, %1315 %1322 = fmul float %576, %149 %1323 = fadd float %1322, %1317 %1324 = fmul float %92, %1319 %1325 = fmul float %92, %1321 %1326 = fmul float %92, %1323 %1327 = fmul float %93, %1304 %1328 = fadd float %1327, %1324 %1329 = fmul float %93, %1306 %1330 = fadd float %1329, %1325 %1331 = fmul float %93, %1308 %1332 = fadd float %1331, %1326 %1333 = fmul float %94, %1289 %1334 = fadd float %1333, %1328 %1335 = fmul float %94, %1291 %1336 = fadd float %1335, %1330 %1337 = fmul float %94, %1293 %1338 = fadd float %1337, %1332 %1339 = fcmp une float %35, %temp16.0 %.sink217 = select i1 %1339, float %38, float %37 %temp48.2 = select i1 %1339, float 1.953125e-03, float 3.906250e-03 %1340 = fdiv float 1.000000e+00, %.sink217 %1341 = fmul float %102, %1340 %1342 = fmul float %101, %1340 %1343 = call float @llvm.floor.f32(float %1341) %1344 = fsub float %1341, %1343 %1345 = call float @llvm.floor.f32(float %1342) %1346 = fsub float %1342, %1345 %1347 = fmul float %39, 2.000000e+00 %1348 = fmul float %1347, %temp48.2 %1349 = fsub float 1.000000e+00, %1348 %1350 = fmul float %temp48.2, %39 %1351 = fmul float %1344, %1349 %1352 = fadd float %1351, %1350 %1353 = fmul float %1346, %1349 %1354 = fadd float %1353, %1350 %1355 = fmul float %1352, %temp16.0 %1356 = fadd float %1355, %temp28.0 %1357 = fmul float %1354, %temp16.0 %1358 = fadd float %1357, %temp29.0 %1359 = bitcast float %1356 to i32 %1360 = bitcast float %1358 to i32 %1361 = bitcast float %225 to i32 %1362 = insertelement <4 x i32> undef, i32 %1359, i32 0 %1363 = insertelement <4 x i32> %1362, i32 %1360, i32 1 %1364 = insertelement <4 x i32> %1363, i32 %1361, i32 2 %1365 = bitcast <8 x i32> %85 to <32 x i8> %1366 = bitcast <4 x i32> %87 to <16 x i8> %1367 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1364, <32 x i8> %1365, <16 x i8> %1366, i32 2) %1368 = extractelement <4 x float> %1367, i32 1 %1369 = extractelement <4 x float> %1367, i32 3 %1370 = fcmp oeq float %temp30.0, 4.000000e+00 %1371 = select i1 %1370, float 1.000000e+00, float 0.000000e+00 %1372 = bitcast float %1356 to i32 %1373 = bitcast float %1358 to i32 %1374 = bitcast float %225 to i32 %1375 = insertelement <4 x i32> undef, i32 %1372, i32 0 %1376 = insertelement <4 x i32> %1375, i32 %1373, i32 1 %1377 = insertelement <4 x i32> %1376, i32 %1374, i32 2 %1378 = bitcast <8 x i32> %77 to <32 x i8> %1379 = bitcast <4 x i32> %79 to <16 x i8> %1380 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1377, <32 x i8> %1378, <16 x i8> %1379, i32 2) %1381 = extractelement <4 x float> %1380, i32 1 %1382 = extractelement <4 x float> %1380, i32 3 %1383 = fcmp oeq float %temp30.0, 3.000000e+00 %1384 = select i1 %1383, float 1.000000e+00, float 0.000000e+00 %1385 = bitcast float %1356 to i32 %1386 = bitcast float %1358 to i32 %1387 = bitcast float %225 to i32 %1388 = insertelement <4 x i32> undef, i32 %1385, i32 0 %1389 = insertelement <4 x i32> %1388, i32 %1386, i32 1 %1390 = insertelement <4 x i32> %1389, i32 %1387, i32 2 %1391 = bitcast <8 x i32> %69 to <32 x i8> %1392 = bitcast <4 x i32> %71 to <16 x i8> %1393 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1390, <32 x i8> %1391, <16 x i8> %1392, i32 2) %1394 = extractelement <4 x float> %1393, i32 1 %1395 = extractelement <4 x float> %1393, i32 3 %1396 = fcmp oeq float %temp30.0, 2.000000e+00 %1397 = select i1 %1396, float 1.000000e+00, float 0.000000e+00 %1398 = bitcast float %1356 to i32 %1399 = bitcast float %1358 to i32 %1400 = bitcast float %225 to i32 %1401 = insertelement <4 x i32> undef, i32 %1398, i32 0 %1402 = insertelement <4 x i32> %1401, i32 %1399, i32 1 %1403 = insertelement <4 x i32> %1402, i32 %1400, i32 2 %1404 = bitcast <8 x i32> %61 to <32 x i8> %1405 = bitcast <4 x i32> %63 to <16 x i8> %1406 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1403, <32 x i8> %1404, <16 x i8> %1405, i32 2) %1407 = extractelement <4 x float> %1406, i32 1 %1408 = extractelement <4 x float> %1406, i32 3 %1409 = fcmp oeq float %temp30.0, 1.000000e+00 %1410 = select i1 %1409, float 1.000000e+00, float 0.000000e+00 %1411 = bitcast float %1356 to i32 %1412 = bitcast float %1358 to i32 %1413 = bitcast float %225 to i32 %1414 = insertelement <4 x i32> undef, i32 %1411, i32 0 %1415 = insertelement <4 x i32> %1414, i32 %1412, i32 1 %1416 = insertelement <4 x i32> %1415, i32 %1413, i32 2 %1417 = bitcast <8 x i32> %53 to <32 x i8> %1418 = bitcast <4 x i32> %55 to <16 x i8> %1419 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1416, <32 x i8> %1417, <16 x i8> %1418, i32 2) %1420 = extractelement <4 x float> %1419, i32 1 %1421 = extractelement <4 x float> %1419, i32 3 %1422 = fcmp oeq float %temp30.0, 0.000000e+00 %1423 = select i1 %1422, float 1.000000e+00, float 0.000000e+00 %1424 = fmul float %1420, %1423 %1425 = fmul float %1421, %1423 %1426 = fmul float %1407, %1410 %1427 = fadd float %1426, %1424 %1428 = fmul float %1408, %1410 %1429 = fadd float %1428, %1425 %1430 = fmul float %1394, %1397 %1431 = fadd float %1430, %1427 %1432 = fmul float %1395, %1397 %1433 = fadd float %1432, %1429 %1434 = fmul float %1381, %1384 %1435 = fadd float %1434, %1431 %1436 = fmul float %1382, %1384 %1437 = fadd float %1436, %1433 %1438 = fmul float %1368, %1371 %1439 = fadd float %1438, %1435 %1440 = fmul float %1369, %1371 %1441 = fadd float %1440, %1437 %1442 = fmul float %1441, 2.000000e+00 %1443 = fadd float %1442, -1.000000e+00 %1444 = fmul float %1439, 2.000000e+00 %1445 = fadd float %1444, -1.000000e+00 %1446 = fmul float %1443, %1443 %1447 = fmul float %1445, %1445 %1448 = fadd float %1446, %1447 %1449 = call float @llvm.AMDIL.clamp.(float %1448, float 0.000000e+00, float 1.000000e+00) %1450 = fcmp une float %35, %temp16.0 %.sink218 = select i1 %1450, float %38, float %37 %temp52.2 = select i1 %1450, float 1.953125e-03, float 3.906250e-03 %1451 = fdiv float 1.000000e+00, %.sink218 %1452 = fmul float %102, %1451 %1453 = fmul float %100, %1451 %1454 = call float @llvm.floor.f32(float %1452) %1455 = fsub float %1452, %1454 %1456 = call float @llvm.floor.f32(float %1453) %1457 = fsub float %1453, %1456 %1458 = fmul float %39, 2.000000e+00 %1459 = fmul float %1458, %temp52.2 %1460 = fsub float 1.000000e+00, %1459 %1461 = fmul float %temp52.2, %39 %1462 = fmul float %1455, %1460 %1463 = fadd float %1462, %1461 %1464 = fmul float %1457, %1460 %1465 = fadd float %1464, %1461 %1466 = fmul float %1463, %temp16.0 %1467 = fadd float %1466, %temp28.0 %1468 = fmul float %1465, %temp16.0 %1469 = fadd float %1468, %temp29.0 %1470 = bitcast float %1467 to i32 %1471 = bitcast float %1469 to i32 %1472 = bitcast float %225 to i32 %1473 = insertelement <4 x i32> undef, i32 %1470, i32 0 %1474 = insertelement <4 x i32> %1473, i32 %1471, i32 1 %1475 = insertelement <4 x i32> %1474, i32 %1472, i32 2 %1476 = bitcast <8 x i32> %85 to <32 x i8> %1477 = bitcast <4 x i32> %87 to <16 x i8> %1478 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1475, <32 x i8> %1476, <16 x i8> %1477, i32 2) %1479 = extractelement <4 x float> %1478, i32 1 %1480 = extractelement <4 x float> %1478, i32 3 %1481 = fcmp oeq float %temp30.0, 4.000000e+00 %1482 = select i1 %1481, float 1.000000e+00, float 0.000000e+00 %1483 = bitcast float %1467 to i32 %1484 = bitcast float %1469 to i32 %1485 = bitcast float %225 to i32 %1486 = insertelement <4 x i32> undef, i32 %1483, i32 0 %1487 = insertelement <4 x i32> %1486, i32 %1484, i32 1 %1488 = insertelement <4 x i32> %1487, i32 %1485, i32 2 %1489 = bitcast <8 x i32> %77 to <32 x i8> %1490 = bitcast <4 x i32> %79 to <16 x i8> %1491 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1488, <32 x i8> %1489, <16 x i8> %1490, i32 2) %1492 = extractelement <4 x float> %1491, i32 1 %1493 = extractelement <4 x float> %1491, i32 3 %1494 = fcmp oeq float %temp30.0, 3.000000e+00 %1495 = select i1 %1494, float 1.000000e+00, float 0.000000e+00 %1496 = bitcast float %1467 to i32 %1497 = bitcast float %1469 to i32 %1498 = bitcast float %225 to i32 %1499 = insertelement <4 x i32> undef, i32 %1496, i32 0 %1500 = insertelement <4 x i32> %1499, i32 %1497, i32 1 %1501 = insertelement <4 x i32> %1500, i32 %1498, i32 2 %1502 = bitcast <8 x i32> %69 to <32 x i8> %1503 = bitcast <4 x i32> %71 to <16 x i8> %1504 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1501, <32 x i8> %1502, <16 x i8> %1503, i32 2) %1505 = extractelement <4 x float> %1504, i32 1 %1506 = extractelement <4 x float> %1504, i32 3 %1507 = fcmp oeq float %temp30.0, 2.000000e+00 %1508 = select i1 %1507, float 1.000000e+00, float 0.000000e+00 %1509 = bitcast float %1467 to i32 %1510 = bitcast float %1469 to i32 %1511 = bitcast float %225 to i32 %1512 = insertelement <4 x i32> undef, i32 %1509, i32 0 %1513 = insertelement <4 x i32> %1512, i32 %1510, i32 1 %1514 = insertelement <4 x i32> %1513, i32 %1511, i32 2 %1515 = bitcast <8 x i32> %61 to <32 x i8> %1516 = bitcast <4 x i32> %63 to <16 x i8> %1517 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1514, <32 x i8> %1515, <16 x i8> %1516, i32 2) %1518 = extractelement <4 x float> %1517, i32 1 %1519 = extractelement <4 x float> %1517, i32 3 %1520 = fcmp oeq float %temp30.0, 1.000000e+00 %1521 = select i1 %1520, float 1.000000e+00, float 0.000000e+00 %1522 = bitcast float %1467 to i32 %1523 = bitcast float %1469 to i32 %1524 = bitcast float %225 to i32 %1525 = insertelement <4 x i32> undef, i32 %1522, i32 0 %1526 = insertelement <4 x i32> %1525, i32 %1523, i32 1 %1527 = insertelement <4 x i32> %1526, i32 %1524, i32 2 %1528 = bitcast <8 x i32> %53 to <32 x i8> %1529 = bitcast <4 x i32> %55 to <16 x i8> %1530 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1527, <32 x i8> %1528, <16 x i8> %1529, i32 2) %1531 = extractelement <4 x float> %1530, i32 1 %1532 = extractelement <4 x float> %1530, i32 3 %1533 = fcmp oeq float %temp30.0, 0.000000e+00 %1534 = select i1 %1533, float 1.000000e+00, float 0.000000e+00 %1535 = fmul float %1531, %1534 %1536 = fmul float %1532, %1534 %1537 = fmul float %1518, %1521 %1538 = fadd float %1537, %1535 %1539 = fmul float %1519, %1521 %1540 = fadd float %1539, %1536 %1541 = fmul float %1505, %1508 %1542 = fadd float %1541, %1538 %1543 = fmul float %1506, %1508 %1544 = fadd float %1543, %1540 %1545 = fmul float %1492, %1495 %1546 = fadd float %1545, %1542 %1547 = fmul float %1493, %1495 %1548 = fadd float %1547, %1544 %1549 = fmul float %1479, %1482 %1550 = fadd float %1549, %1546 %1551 = fmul float %1480, %1482 %1552 = fadd float %1551, %1548 %1553 = fmul float %1552, 2.000000e+00 %1554 = fadd float %1553, -1.000000e+00 %1555 = fmul float %1550, 2.000000e+00 %1556 = fadd float %1555, -1.000000e+00 %1557 = fmul float %1554, %1554 %1558 = fmul float %1556, %1556 %1559 = fadd float %1557, %1558 %1560 = call float @llvm.AMDIL.clamp.(float %1559, float 0.000000e+00, float 1.000000e+00) %1561 = fcmp une float %35, %temp16.0 %.sink219 = select i1 %1561, float %38, float %37 %temp56.2 = select i1 %1561, float 1.953125e-03, float 3.906250e-03 %1562 = fdiv float 1.000000e+00, %.sink219 %1563 = fmul float %100, %1562 %1564 = fmul float %101, %1562 %1565 = call float @llvm.floor.f32(float %1563) %1566 = fsub float %1563, %1565 %1567 = call float @llvm.floor.f32(float %1564) %1568 = fsub float %1564, %1567 %1569 = fmul float %39, 2.000000e+00 %1570 = fmul float %1569, %temp56.2 %1571 = fsub float 1.000000e+00, %1570 %1572 = fmul float %temp56.2, %39 %1573 = fmul float %1566, %1571 %1574 = fadd float %1573, %1572 %1575 = fmul float %1568, %1571 %1576 = fadd float %1575, %1572 %1577 = fmul float %1574, %temp16.0 %1578 = fadd float %1577, %temp28.0 %1579 = fmul float %1576, %temp16.0 %1580 = fadd float %1579, %temp29.0 %1581 = bitcast float %1578 to i32 %1582 = bitcast float %1580 to i32 %1583 = bitcast float %225 to i32 %1584 = insertelement <4 x i32> undef, i32 %1581, i32 0 %1585 = insertelement <4 x i32> %1584, i32 %1582, i32 1 %1586 = insertelement <4 x i32> %1585, i32 %1583, i32 2 %1587 = bitcast <8 x i32> %85 to <32 x i8> %1588 = bitcast <4 x i32> %87 to <16 x i8> %1589 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1586, <32 x i8> %1587, <16 x i8> %1588, i32 2) %1590 = extractelement <4 x float> %1589, i32 1 %1591 = extractelement <4 x float> %1589, i32 3 %1592 = fcmp oeq float %temp30.0, 4.000000e+00 %1593 = select i1 %1592, float 1.000000e+00, float 0.000000e+00 %1594 = bitcast float %1578 to i32 %1595 = bitcast float %1580 to i32 %1596 = bitcast float %225 to i32 %1597 = insertelement <4 x i32> undef, i32 %1594, i32 0 %1598 = insertelement <4 x i32> %1597, i32 %1595, i32 1 %1599 = insertelement <4 x i32> %1598, i32 %1596, i32 2 %1600 = bitcast <8 x i32> %77 to <32 x i8> %1601 = bitcast <4 x i32> %79 to <16 x i8> %1602 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1599, <32 x i8> %1600, <16 x i8> %1601, i32 2) %1603 = extractelement <4 x float> %1602, i32 1 %1604 = extractelement <4 x float> %1602, i32 3 %1605 = fcmp oeq float %temp30.0, 3.000000e+00 %1606 = select i1 %1605, float 1.000000e+00, float 0.000000e+00 %1607 = bitcast float %1578 to i32 %1608 = bitcast float %1580 to i32 %1609 = bitcast float %225 to i32 %1610 = insertelement <4 x i32> undef, i32 %1607, i32 0 %1611 = insertelement <4 x i32> %1610, i32 %1608, i32 1 %1612 = insertelement <4 x i32> %1611, i32 %1609, i32 2 %1613 = bitcast <8 x i32> %69 to <32 x i8> %1614 = bitcast <4 x i32> %71 to <16 x i8> %1615 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1612, <32 x i8> %1613, <16 x i8> %1614, i32 2) %1616 = extractelement <4 x float> %1615, i32 1 %1617 = extractelement <4 x float> %1615, i32 3 %1618 = fcmp oeq float %temp30.0, 2.000000e+00 %1619 = select i1 %1618, float 1.000000e+00, float 0.000000e+00 %1620 = bitcast float %1578 to i32 %1621 = bitcast float %1580 to i32 %1622 = bitcast float %225 to i32 %1623 = insertelement <4 x i32> undef, i32 %1620, i32 0 %1624 = insertelement <4 x i32> %1623, i32 %1621, i32 1 %1625 = insertelement <4 x i32> %1624, i32 %1622, i32 2 %1626 = bitcast <8 x i32> %61 to <32 x i8> %1627 = bitcast <4 x i32> %63 to <16 x i8> %1628 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1625, <32 x i8> %1626, <16 x i8> %1627, i32 2) %1629 = extractelement <4 x float> %1628, i32 1 %1630 = extractelement <4 x float> %1628, i32 3 %1631 = fcmp oeq float %temp30.0, 1.000000e+00 %1632 = select i1 %1631, float 1.000000e+00, float 0.000000e+00 %1633 = bitcast float %1578 to i32 %1634 = bitcast float %1580 to i32 %1635 = bitcast float %225 to i32 %1636 = insertelement <4 x i32> undef, i32 %1633, i32 0 %1637 = insertelement <4 x i32> %1636, i32 %1634, i32 1 %1638 = insertelement <4 x i32> %1637, i32 %1635, i32 2 %1639 = bitcast <8 x i32> %53 to <32 x i8> %1640 = bitcast <4 x i32> %55 to <16 x i8> %1641 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1638, <32 x i8> %1639, <16 x i8> %1640, i32 2) %1642 = extractelement <4 x float> %1641, i32 1 %1643 = extractelement <4 x float> %1641, i32 3 %1644 = fcmp oeq float %temp30.0, 0.000000e+00 %1645 = select i1 %1644, float 1.000000e+00, float 0.000000e+00 %1646 = fmul float %1642, %1645 %1647 = fmul float %1643, %1645 %1648 = fmul float %1629, %1632 %1649 = fadd float %1648, %1646 %1650 = fmul float %1630, %1632 %1651 = fadd float %1650, %1647 %1652 = fmul float %1616, %1619 %1653 = fadd float %1652, %1649 %1654 = fmul float %1617, %1619 %1655 = fadd float %1654, %1651 %1656 = fmul float %1603, %1606 %1657 = fadd float %1656, %1653 %1658 = fmul float %1604, %1606 %1659 = fadd float %1658, %1655 %1660 = fmul float %1590, %1593 %1661 = fadd float %1660, %1657 %1662 = fmul float %1591, %1593 %1663 = fadd float %1662, %1659 %1664 = fmul float %1663, 2.000000e+00 %1665 = fadd float %1664, -1.000000e+00 %1666 = fmul float %1661, 2.000000e+00 %1667 = fadd float %1666, -1.000000e+00 %1668 = fmul float %1665, %1665 %1669 = fmul float %1667, %1667 %1670 = fadd float %1668, %1669 %1671 = call float @llvm.AMDIL.clamp.(float %1670, float 0.000000e+00, float 1.000000e+00) %1672 = fmul float %148, 0.000000e+00 %1673 = fmul float %1443, %148 %1674 = fmul float %1445, %148 %1675 = fmul float %1556, %149 %1676 = fadd float %1675, %1672 %1677 = fmul float %149, 0.000000e+00 %1678 = fadd float %1677, %1673 %1679 = fmul float %1554, %149 %1680 = fadd float %1679, %1674 %1681 = fmul float %1665, %150 %1682 = fadd float %1681, %1676 %1683 = fmul float %1667, %150 %1684 = fadd float %1683, %1678 %1685 = fmul float %150, 0.000000e+00 %1686 = fadd float %1685, %1680 %1687 = fcmp une float %35, %temp20.0 %.sink220 = select i1 %1687, float %38, float %37 %temp44.3 = select i1 %1687, float 1.953125e-03, float 3.906250e-03 %1688 = fdiv float 1.000000e+00, %.sink220 %1689 = fmul float %102, %1688 %1690 = fmul float %101, %1688 %1691 = call float @llvm.floor.f32(float %1689) %1692 = fsub float %1689, %1691 %1693 = call float @llvm.floor.f32(float %1690) %1694 = fsub float %1690, %1693 %1695 = fmul float %39, 2.000000e+00 %1696 = fmul float %1695, %temp44.3 %1697 = fsub float 1.000000e+00, %1696 %1698 = fmul float %temp44.3, %39 %1699 = fmul float %1692, %1697 %1700 = fadd float %1699, %1698 %1701 = fmul float %1694, %1697 %1702 = fadd float %1701, %1698 %1703 = fmul float %1700, %temp20.0 %1704 = fadd float %1703, %temp36.0 %1705 = fmul float %1702, %temp20.0 %1706 = fadd float %1705, %temp37.0 %1707 = bitcast float %1704 to i32 %1708 = bitcast float %1706 to i32 %1709 = bitcast float %225 to i32 %1710 = insertelement <4 x i32> undef, i32 %1707, i32 0 %1711 = insertelement <4 x i32> %1710, i32 %1708, i32 1 %1712 = insertelement <4 x i32> %1711, i32 %1709, i32 2 %1713 = bitcast <8 x i32> %85 to <32 x i8> %1714 = bitcast <4 x i32> %87 to <16 x i8> %1715 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1712, <32 x i8> %1713, <16 x i8> %1714, i32 2) %1716 = extractelement <4 x float> %1715, i32 1 %1717 = extractelement <4 x float> %1715, i32 3 %1718 = fcmp oeq float %temp38.0, 4.000000e+00 %1719 = select i1 %1718, float 1.000000e+00, float 0.000000e+00 %1720 = bitcast float %1704 to i32 %1721 = bitcast float %1706 to i32 %1722 = bitcast float %225 to i32 %1723 = insertelement <4 x i32> undef, i32 %1720, i32 0 %1724 = insertelement <4 x i32> %1723, i32 %1721, i32 1 %1725 = insertelement <4 x i32> %1724, i32 %1722, i32 2 %1726 = bitcast <8 x i32> %77 to <32 x i8> %1727 = bitcast <4 x i32> %79 to <16 x i8> %1728 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1725, <32 x i8> %1726, <16 x i8> %1727, i32 2) %1729 = extractelement <4 x float> %1728, i32 1 %1730 = extractelement <4 x float> %1728, i32 3 %1731 = fcmp oeq float %temp38.0, 3.000000e+00 %1732 = select i1 %1731, float 1.000000e+00, float 0.000000e+00 %1733 = bitcast float %1704 to i32 %1734 = bitcast float %1706 to i32 %1735 = bitcast float %225 to i32 %1736 = insertelement <4 x i32> undef, i32 %1733, i32 0 %1737 = insertelement <4 x i32> %1736, i32 %1734, i32 1 %1738 = insertelement <4 x i32> %1737, i32 %1735, i32 2 %1739 = bitcast <8 x i32> %69 to <32 x i8> %1740 = bitcast <4 x i32> %71 to <16 x i8> %1741 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1738, <32 x i8> %1739, <16 x i8> %1740, i32 2) %1742 = extractelement <4 x float> %1741, i32 1 %1743 = extractelement <4 x float> %1741, i32 3 %1744 = fcmp oeq float %temp38.0, 2.000000e+00 %1745 = select i1 %1744, float 1.000000e+00, float 0.000000e+00 %1746 = bitcast float %1704 to i32 %1747 = bitcast float %1706 to i32 %1748 = bitcast float %225 to i32 %1749 = insertelement <4 x i32> undef, i32 %1746, i32 0 %1750 = insertelement <4 x i32> %1749, i32 %1747, i32 1 %1751 = insertelement <4 x i32> %1750, i32 %1748, i32 2 %1752 = bitcast <8 x i32> %61 to <32 x i8> %1753 = bitcast <4 x i32> %63 to <16 x i8> %1754 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1751, <32 x i8> %1752, <16 x i8> %1753, i32 2) %1755 = extractelement <4 x float> %1754, i32 1 %1756 = extractelement <4 x float> %1754, i32 3 %1757 = fcmp oeq float %temp38.0, 1.000000e+00 %1758 = select i1 %1757, float 1.000000e+00, float 0.000000e+00 %1759 = bitcast float %1704 to i32 %1760 = bitcast float %1706 to i32 %1761 = bitcast float %225 to i32 %1762 = insertelement <4 x i32> undef, i32 %1759, i32 0 %1763 = insertelement <4 x i32> %1762, i32 %1760, i32 1 %1764 = insertelement <4 x i32> %1763, i32 %1761, i32 2 %1765 = bitcast <8 x i32> %53 to <32 x i8> %1766 = bitcast <4 x i32> %55 to <16 x i8> %1767 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1764, <32 x i8> %1765, <16 x i8> %1766, i32 2) %1768 = extractelement <4 x float> %1767, i32 1 %1769 = extractelement <4 x float> %1767, i32 3 %1770 = fcmp oeq float %temp38.0, 0.000000e+00 %1771 = select i1 %1770, float 1.000000e+00, float 0.000000e+00 %1772 = fmul float %1768, %1771 %1773 = fmul float %1769, %1771 %1774 = fmul float %1755, %1758 %1775 = fadd float %1774, %1772 %1776 = fmul float %1756, %1758 %1777 = fadd float %1776, %1773 %1778 = fmul float %1742, %1745 %1779 = fadd float %1778, %1775 %1780 = fmul float %1743, %1745 %1781 = fadd float %1780, %1777 %1782 = fmul float %1729, %1732 %1783 = fadd float %1782, %1779 %1784 = fmul float %1730, %1732 %1785 = fadd float %1784, %1781 %1786 = fmul float %1716, %1719 %1787 = fadd float %1786, %1783 %1788 = fmul float %1717, %1719 %1789 = fadd float %1788, %1785 %1790 = fmul float %1789, 2.000000e+00 %1791 = fadd float %1790, -1.000000e+00 %1792 = fmul float %1787, 2.000000e+00 %1793 = fadd float %1792, -1.000000e+00 %1794 = fmul float %1791, %1791 %1795 = fmul float %1793, %1793 %1796 = fadd float %1794, %1795 %1797 = call float @llvm.AMDIL.clamp.(float %1796, float 0.000000e+00, float 1.000000e+00) %1798 = fcmp une float %35, %temp20.0 %.sink221 = select i1 %1798, float %38, float %37 %temp48.4 = select i1 %1798, float 1.953125e-03, float 3.906250e-03 %1799 = fdiv float 1.000000e+00, %.sink221 %1800 = fmul float %102, %1799 %1801 = fmul float %100, %1799 %1802 = call float @llvm.floor.f32(float %1800) %1803 = fsub float %1800, %1802 %1804 = call float @llvm.floor.f32(float %1801) %1805 = fsub float %1801, %1804 %1806 = fmul float %39, 2.000000e+00 %1807 = fmul float %1806, %temp48.4 %1808 = fsub float 1.000000e+00, %1807 %1809 = fmul float %temp48.4, %39 %1810 = fmul float %1803, %1808 %1811 = fadd float %1810, %1809 %1812 = fmul float %1805, %1808 %1813 = fadd float %1812, %1809 %1814 = fmul float %1811, %temp20.0 %1815 = fadd float %1814, %temp36.0 %1816 = fmul float %1813, %temp20.0 %1817 = fadd float %1816, %temp37.0 %1818 = bitcast float %1815 to i32 %1819 = bitcast float %1817 to i32 %1820 = bitcast float %225 to i32 %1821 = insertelement <4 x i32> undef, i32 %1818, i32 0 %1822 = insertelement <4 x i32> %1821, i32 %1819, i32 1 %1823 = insertelement <4 x i32> %1822, i32 %1820, i32 2 %1824 = bitcast <8 x i32> %85 to <32 x i8> %1825 = bitcast <4 x i32> %87 to <16 x i8> %1826 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1823, <32 x i8> %1824, <16 x i8> %1825, i32 2) %1827 = extractelement <4 x float> %1826, i32 1 %1828 = extractelement <4 x float> %1826, i32 3 %1829 = fcmp oeq float %temp38.0, 4.000000e+00 %1830 = select i1 %1829, float 1.000000e+00, float 0.000000e+00 %1831 = bitcast float %1815 to i32 %1832 = bitcast float %1817 to i32 %1833 = bitcast float %225 to i32 %1834 = insertelement <4 x i32> undef, i32 %1831, i32 0 %1835 = insertelement <4 x i32> %1834, i32 %1832, i32 1 %1836 = insertelement <4 x i32> %1835, i32 %1833, i32 2 %1837 = bitcast <8 x i32> %77 to <32 x i8> %1838 = bitcast <4 x i32> %79 to <16 x i8> %1839 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1836, <32 x i8> %1837, <16 x i8> %1838, i32 2) %1840 = extractelement <4 x float> %1839, i32 1 %1841 = extractelement <4 x float> %1839, i32 3 %1842 = fcmp oeq float %temp38.0, 3.000000e+00 %1843 = select i1 %1842, float 1.000000e+00, float 0.000000e+00 %1844 = bitcast float %1815 to i32 %1845 = bitcast float %1817 to i32 %1846 = bitcast float %225 to i32 %1847 = insertelement <4 x i32> undef, i32 %1844, i32 0 %1848 = insertelement <4 x i32> %1847, i32 %1845, i32 1 %1849 = insertelement <4 x i32> %1848, i32 %1846, i32 2 %1850 = bitcast <8 x i32> %69 to <32 x i8> %1851 = bitcast <4 x i32> %71 to <16 x i8> %1852 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1849, <32 x i8> %1850, <16 x i8> %1851, i32 2) %1853 = extractelement <4 x float> %1852, i32 1 %1854 = extractelement <4 x float> %1852, i32 3 %1855 = fcmp oeq float %temp38.0, 2.000000e+00 %1856 = select i1 %1855, float 1.000000e+00, float 0.000000e+00 %1857 = bitcast float %1815 to i32 %1858 = bitcast float %1817 to i32 %1859 = bitcast float %225 to i32 %1860 = insertelement <4 x i32> undef, i32 %1857, i32 0 %1861 = insertelement <4 x i32> %1860, i32 %1858, i32 1 %1862 = insertelement <4 x i32> %1861, i32 %1859, i32 2 %1863 = bitcast <8 x i32> %61 to <32 x i8> %1864 = bitcast <4 x i32> %63 to <16 x i8> %1865 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1862, <32 x i8> %1863, <16 x i8> %1864, i32 2) %1866 = extractelement <4 x float> %1865, i32 1 %1867 = extractelement <4 x float> %1865, i32 3 %1868 = fcmp oeq float %temp38.0, 1.000000e+00 %1869 = select i1 %1868, float 1.000000e+00, float 0.000000e+00 %1870 = bitcast float %1815 to i32 %1871 = bitcast float %1817 to i32 %1872 = bitcast float %225 to i32 %1873 = insertelement <4 x i32> undef, i32 %1870, i32 0 %1874 = insertelement <4 x i32> %1873, i32 %1871, i32 1 %1875 = insertelement <4 x i32> %1874, i32 %1872, i32 2 %1876 = bitcast <8 x i32> %53 to <32 x i8> %1877 = bitcast <4 x i32> %55 to <16 x i8> %1878 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1875, <32 x i8> %1876, <16 x i8> %1877, i32 2) %1879 = extractelement <4 x float> %1878, i32 1 %1880 = extractelement <4 x float> %1878, i32 3 %1881 = fcmp oeq float %temp38.0, 0.000000e+00 %1882 = select i1 %1881, float 1.000000e+00, float 0.000000e+00 %1883 = fmul float %1879, %1882 %1884 = fmul float %1880, %1882 %1885 = fmul float %1866, %1869 %1886 = fadd float %1885, %1883 %1887 = fmul float %1867, %1869 %1888 = fadd float %1887, %1884 %1889 = fmul float %1853, %1856 %1890 = fadd float %1889, %1886 %1891 = fmul float %1854, %1856 %1892 = fadd float %1891, %1888 %1893 = fmul float %1840, %1843 %1894 = fadd float %1893, %1890 %1895 = fmul float %1841, %1843 %1896 = fadd float %1895, %1892 %1897 = fmul float %1827, %1830 %1898 = fadd float %1897, %1894 %1899 = fmul float %1828, %1830 %1900 = fadd float %1899, %1896 %1901 = fmul float %1900, 2.000000e+00 %1902 = fadd float %1901, -1.000000e+00 %1903 = fmul float %1898, 2.000000e+00 %1904 = fadd float %1903, -1.000000e+00 %1905 = fmul float %1902, %1902 %1906 = fmul float %1904, %1904 %1907 = fadd float %1905, %1906 %1908 = call float @llvm.AMDIL.clamp.(float %1907, float 0.000000e+00, float 1.000000e+00) %1909 = fcmp une float %35, %temp20.0 %.sink222 = select i1 %1909, float %38, float %37 %temp52.4 = select i1 %1909, float 1.953125e-03, float 3.906250e-03 %1910 = fdiv float 1.000000e+00, %.sink222 %1911 = fmul float %100, %1910 %1912 = fmul float %101, %1910 %1913 = call float @llvm.floor.f32(float %1911) %1914 = fsub float %1911, %1913 %1915 = call float @llvm.floor.f32(float %1912) %1916 = fsub float %1912, %1915 %1917 = fmul float %39, 2.000000e+00 %1918 = fmul float %1917, %temp52.4 %1919 = fsub float 1.000000e+00, %1918 %1920 = fmul float %temp52.4, %39 %1921 = fmul float %1914, %1919 %1922 = fadd float %1921, %1920 %1923 = fmul float %1916, %1919 %1924 = fadd float %1923, %1920 %1925 = fmul float %1922, %temp20.0 %1926 = fadd float %1925, %temp36.0 %1927 = fmul float %1924, %temp20.0 %1928 = fadd float %1927, %temp37.0 %1929 = bitcast float %1926 to i32 %1930 = bitcast float %1928 to i32 %1931 = bitcast float %225 to i32 %1932 = insertelement <4 x i32> undef, i32 %1929, i32 0 %1933 = insertelement <4 x i32> %1932, i32 %1930, i32 1 %1934 = insertelement <4 x i32> %1933, i32 %1931, i32 2 %1935 = bitcast <8 x i32> %85 to <32 x i8> %1936 = bitcast <4 x i32> %87 to <16 x i8> %1937 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1934, <32 x i8> %1935, <16 x i8> %1936, i32 2) %1938 = extractelement <4 x float> %1937, i32 1 %1939 = extractelement <4 x float> %1937, i32 3 %1940 = fcmp oeq float %temp38.0, 4.000000e+00 %1941 = select i1 %1940, float 1.000000e+00, float 0.000000e+00 %1942 = bitcast float %1926 to i32 %1943 = bitcast float %1928 to i32 %1944 = bitcast float %225 to i32 %1945 = insertelement <4 x i32> undef, i32 %1942, i32 0 %1946 = insertelement <4 x i32> %1945, i32 %1943, i32 1 %1947 = insertelement <4 x i32> %1946, i32 %1944, i32 2 %1948 = bitcast <8 x i32> %77 to <32 x i8> %1949 = bitcast <4 x i32> %79 to <16 x i8> %1950 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1947, <32 x i8> %1948, <16 x i8> %1949, i32 2) %1951 = extractelement <4 x float> %1950, i32 1 %1952 = extractelement <4 x float> %1950, i32 3 %1953 = fcmp oeq float %temp38.0, 3.000000e+00 %1954 = select i1 %1953, float 1.000000e+00, float 0.000000e+00 %1955 = bitcast float %1926 to i32 %1956 = bitcast float %1928 to i32 %1957 = bitcast float %225 to i32 %1958 = insertelement <4 x i32> undef, i32 %1955, i32 0 %1959 = insertelement <4 x i32> %1958, i32 %1956, i32 1 %1960 = insertelement <4 x i32> %1959, i32 %1957, i32 2 %1961 = bitcast <8 x i32> %69 to <32 x i8> %1962 = bitcast <4 x i32> %71 to <16 x i8> %1963 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1960, <32 x i8> %1961, <16 x i8> %1962, i32 2) %1964 = extractelement <4 x float> %1963, i32 1 %1965 = extractelement <4 x float> %1963, i32 3 %1966 = fcmp oeq float %temp38.0, 2.000000e+00 %1967 = select i1 %1966, float 1.000000e+00, float 0.000000e+00 %1968 = bitcast float %1926 to i32 %1969 = bitcast float %1928 to i32 %1970 = bitcast float %225 to i32 %1971 = insertelement <4 x i32> undef, i32 %1968, i32 0 %1972 = insertelement <4 x i32> %1971, i32 %1969, i32 1 %1973 = insertelement <4 x i32> %1972, i32 %1970, i32 2 %1974 = bitcast <8 x i32> %61 to <32 x i8> %1975 = bitcast <4 x i32> %63 to <16 x i8> %1976 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1973, <32 x i8> %1974, <16 x i8> %1975, i32 2) %1977 = extractelement <4 x float> %1976, i32 1 %1978 = extractelement <4 x float> %1976, i32 3 %1979 = fcmp oeq float %temp38.0, 1.000000e+00 %1980 = select i1 %1979, float 1.000000e+00, float 0.000000e+00 %1981 = bitcast float %1926 to i32 %1982 = bitcast float %1928 to i32 %1983 = bitcast float %225 to i32 %1984 = insertelement <4 x i32> undef, i32 %1981, i32 0 %1985 = insertelement <4 x i32> %1984, i32 %1982, i32 1 %1986 = insertelement <4 x i32> %1985, i32 %1983, i32 2 %1987 = bitcast <8 x i32> %53 to <32 x i8> %1988 = bitcast <4 x i32> %55 to <16 x i8> %1989 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1986, <32 x i8> %1987, <16 x i8> %1988, i32 2) %1990 = extractelement <4 x float> %1989, i32 1 %1991 = extractelement <4 x float> %1989, i32 3 %1992 = fcmp oeq float %temp38.0, 0.000000e+00 %1993 = select i1 %1992, float 1.000000e+00, float 0.000000e+00 %1994 = fmul float %1990, %1993 %1995 = fmul float %1991, %1993 %1996 = fmul float %1977, %1980 %1997 = fadd float %1996, %1994 %1998 = fmul float %1978, %1980 %1999 = fadd float %1998, %1995 %2000 = fmul float %1964, %1967 %2001 = fadd float %2000, %1997 %2002 = fmul float %1965, %1967 %2003 = fadd float %2002, %1999 %2004 = fmul float %1951, %1954 %2005 = fadd float %2004, %2001 %2006 = fmul float %1952, %1954 %2007 = fadd float %2006, %2003 %2008 = fmul float %1938, %1941 %2009 = fadd float %2008, %2005 %2010 = fmul float %1939, %1941 %2011 = fadd float %2010, %2007 %2012 = fmul float %2011, 2.000000e+00 %2013 = fadd float %2012, -1.000000e+00 %2014 = fmul float %2009, 2.000000e+00 %2015 = fadd float %2014, -1.000000e+00 %2016 = fmul float %2013, %2013 %2017 = fmul float %2015, %2015 %2018 = fadd float %2016, %2017 %2019 = call float @llvm.AMDIL.clamp.(float %2018, float 0.000000e+00, float 1.000000e+00) %2020 = fmul float %148, 0.000000e+00 %2021 = fmul float %1791, %148 %2022 = fmul float %1793, %148 %2023 = fmul float %1904, %149 %2024 = fadd float %2023, %2020 %2025 = fmul float %149, 0.000000e+00 %2026 = fadd float %2025, %2021 %2027 = fmul float %1902, %149 %2028 = fadd float %2027, %2022 %2029 = fmul float %2013, %150 %2030 = fadd float %2029, %2024 %2031 = fmul float %2015, %150 %2032 = fadd float %2031, %2026 %2033 = fmul float %150, 0.000000e+00 %2034 = fadd float %2033, %2028 %2035 = fcmp une float %35, %temp24.0 %.sink223 = select i1 %2035, float %38, float %37 %temp36.1 = select i1 %2035, float 1.953125e-03, float 3.906250e-03 %2036 = fdiv float 1.000000e+00, %.sink223 %2037 = fmul float %102, %2036 %2038 = fmul float %101, %2036 %2039 = call float @llvm.floor.f32(float %2037) %2040 = fsub float %2037, %2039 %2041 = call float @llvm.floor.f32(float %2038) %2042 = fsub float %2038, %2041 %2043 = fmul float %39, 2.000000e+00 %2044 = fmul float %2043, %temp36.1 %2045 = fsub float 1.000000e+00, %2044 %2046 = fmul float %temp36.1, %39 %2047 = fmul float %2040, %2045 %2048 = fadd float %2047, %2046 %2049 = fmul float %2042, %2045 %2050 = fadd float %2049, %2046 %2051 = fmul float %2048, %temp24.0 %2052 = fadd float %2051, %temp12.0 %2053 = fmul float %2050, %temp24.0 %2054 = fadd float %2053, %temp13.0 %2055 = bitcast float %2052 to i32 %2056 = bitcast float %2054 to i32 %2057 = bitcast float %225 to i32 %2058 = insertelement <4 x i32> undef, i32 %2055, i32 0 %2059 = insertelement <4 x i32> %2058, i32 %2056, i32 1 %2060 = insertelement <4 x i32> %2059, i32 %2057, i32 2 %2061 = bitcast <8 x i32> %85 to <32 x i8> %2062 = bitcast <4 x i32> %87 to <16 x i8> %2063 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2060, <32 x i8> %2061, <16 x i8> %2062, i32 2) %2064 = extractelement <4 x float> %2063, i32 1 %2065 = extractelement <4 x float> %2063, i32 3 %2066 = fcmp oeq float %temp14.0, 4.000000e+00 %2067 = select i1 %2066, float 1.000000e+00, float 0.000000e+00 %2068 = bitcast float %2052 to i32 %2069 = bitcast float %2054 to i32 %2070 = bitcast float %225 to i32 %2071 = insertelement <4 x i32> undef, i32 %2068, i32 0 %2072 = insertelement <4 x i32> %2071, i32 %2069, i32 1 %2073 = insertelement <4 x i32> %2072, i32 %2070, i32 2 %2074 = bitcast <8 x i32> %77 to <32 x i8> %2075 = bitcast <4 x i32> %79 to <16 x i8> %2076 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2073, <32 x i8> %2074, <16 x i8> %2075, i32 2) %2077 = extractelement <4 x float> %2076, i32 1 %2078 = extractelement <4 x float> %2076, i32 3 %2079 = fcmp oeq float %temp14.0, 3.000000e+00 %2080 = select i1 %2079, float 1.000000e+00, float 0.000000e+00 %2081 = bitcast float %2052 to i32 %2082 = bitcast float %2054 to i32 %2083 = bitcast float %225 to i32 %2084 = insertelement <4 x i32> undef, i32 %2081, i32 0 %2085 = insertelement <4 x i32> %2084, i32 %2082, i32 1 %2086 = insertelement <4 x i32> %2085, i32 %2083, i32 2 %2087 = bitcast <8 x i32> %69 to <32 x i8> %2088 = bitcast <4 x i32> %71 to <16 x i8> %2089 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2086, <32 x i8> %2087, <16 x i8> %2088, i32 2) %2090 = extractelement <4 x float> %2089, i32 1 %2091 = extractelement <4 x float> %2089, i32 3 %2092 = fcmp oeq float %temp14.0, 2.000000e+00 %2093 = select i1 %2092, float 1.000000e+00, float 0.000000e+00 %2094 = bitcast float %2052 to i32 %2095 = bitcast float %2054 to i32 %2096 = bitcast float %225 to i32 %2097 = insertelement <4 x i32> undef, i32 %2094, i32 0 %2098 = insertelement <4 x i32> %2097, i32 %2095, i32 1 %2099 = insertelement <4 x i32> %2098, i32 %2096, i32 2 %2100 = bitcast <8 x i32> %61 to <32 x i8> %2101 = bitcast <4 x i32> %63 to <16 x i8> %2102 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2099, <32 x i8> %2100, <16 x i8> %2101, i32 2) %2103 = extractelement <4 x float> %2102, i32 1 %2104 = extractelement <4 x float> %2102, i32 3 %2105 = fcmp oeq float %temp14.0, 1.000000e+00 %2106 = select i1 %2105, float 1.000000e+00, float 0.000000e+00 %2107 = bitcast float %2052 to i32 %2108 = bitcast float %2054 to i32 %2109 = bitcast float %225 to i32 %2110 = insertelement <4 x i32> undef, i32 %2107, i32 0 %2111 = insertelement <4 x i32> %2110, i32 %2108, i32 1 %2112 = insertelement <4 x i32> %2111, i32 %2109, i32 2 %2113 = bitcast <8 x i32> %53 to <32 x i8> %2114 = bitcast <4 x i32> %55 to <16 x i8> %2115 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2112, <32 x i8> %2113, <16 x i8> %2114, i32 2) %2116 = extractelement <4 x float> %2115, i32 1 %2117 = extractelement <4 x float> %2115, i32 3 %2118 = fcmp oeq float %temp14.0, 0.000000e+00 %2119 = select i1 %2118, float 1.000000e+00, float 0.000000e+00 %2120 = fmul float %2116, %2119 %2121 = fmul float %2117, %2119 %2122 = fmul float %2103, %2106 %2123 = fadd float %2122, %2120 %2124 = fmul float %2104, %2106 %2125 = fadd float %2124, %2121 %2126 = fmul float %2090, %2093 %2127 = fadd float %2126, %2123 %2128 = fmul float %2091, %2093 %2129 = fadd float %2128, %2125 %2130 = fmul float %2077, %2080 %2131 = fadd float %2130, %2127 %2132 = fmul float %2078, %2080 %2133 = fadd float %2132, %2129 %2134 = fmul float %2064, %2067 %2135 = fadd float %2134, %2131 %2136 = fmul float %2065, %2067 %2137 = fadd float %2136, %2133 %2138 = fmul float %2137, 2.000000e+00 %2139 = fadd float %2138, -1.000000e+00 %2140 = fmul float %2135, 2.000000e+00 %2141 = fadd float %2140, -1.000000e+00 %2142 = fmul float %2139, %2139 %2143 = fmul float %2141, %2141 %2144 = fadd float %2142, %2143 %2145 = call float @llvm.AMDIL.clamp.(float %2144, float 0.000000e+00, float 1.000000e+00) %2146 = fcmp une float %35, %temp24.0 %.sink224 = select i1 %2146, float %38, float %37 %temp44.5 = select i1 %2146, float 1.953125e-03, float 3.906250e-03 %2147 = fdiv float 1.000000e+00, %.sink224 %2148 = fmul float %102, %2147 %2149 = fmul float %100, %2147 %2150 = call float @llvm.floor.f32(float %2148) %2151 = fsub float %2148, %2150 %2152 = call float @llvm.floor.f32(float %2149) %2153 = fsub float %2149, %2152 %2154 = fmul float %39, 2.000000e+00 %2155 = fmul float %2154, %temp44.5 %2156 = fsub float 1.000000e+00, %2155 %2157 = fmul float %temp44.5, %39 %2158 = fmul float %2151, %2156 %2159 = fadd float %2158, %2157 %2160 = fmul float %2153, %2156 %2161 = fadd float %2160, %2157 %2162 = fmul float %2159, %temp24.0 %2163 = fadd float %2162, %temp12.0 %2164 = fmul float %2161, %temp24.0 %2165 = fadd float %2164, %temp13.0 %2166 = bitcast float %2163 to i32 %2167 = bitcast float %2165 to i32 %2168 = bitcast float %225 to i32 %2169 = insertelement <4 x i32> undef, i32 %2166, i32 0 %2170 = insertelement <4 x i32> %2169, i32 %2167, i32 1 %2171 = insertelement <4 x i32> %2170, i32 %2168, i32 2 %2172 = bitcast <8 x i32> %85 to <32 x i8> %2173 = bitcast <4 x i32> %87 to <16 x i8> %2174 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2171, <32 x i8> %2172, <16 x i8> %2173, i32 2) %2175 = extractelement <4 x float> %2174, i32 1 %2176 = extractelement <4 x float> %2174, i32 3 %2177 = fcmp oeq float %temp14.0, 4.000000e+00 %2178 = select i1 %2177, float 1.000000e+00, float 0.000000e+00 %2179 = bitcast float %2163 to i32 %2180 = bitcast float %2165 to i32 %2181 = bitcast float %225 to i32 %2182 = insertelement <4 x i32> undef, i32 %2179, i32 0 %2183 = insertelement <4 x i32> %2182, i32 %2180, i32 1 %2184 = insertelement <4 x i32> %2183, i32 %2181, i32 2 %2185 = bitcast <8 x i32> %77 to <32 x i8> %2186 = bitcast <4 x i32> %79 to <16 x i8> %2187 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2184, <32 x i8> %2185, <16 x i8> %2186, i32 2) %2188 = extractelement <4 x float> %2187, i32 1 %2189 = extractelement <4 x float> %2187, i32 3 %2190 = fcmp oeq float %temp14.0, 3.000000e+00 %2191 = select i1 %2190, float 1.000000e+00, float 0.000000e+00 %2192 = bitcast float %2163 to i32 %2193 = bitcast float %2165 to i32 %2194 = bitcast float %225 to i32 %2195 = insertelement <4 x i32> undef, i32 %2192, i32 0 %2196 = insertelement <4 x i32> %2195, i32 %2193, i32 1 %2197 = insertelement <4 x i32> %2196, i32 %2194, i32 2 %2198 = bitcast <8 x i32> %69 to <32 x i8> %2199 = bitcast <4 x i32> %71 to <16 x i8> %2200 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2197, <32 x i8> %2198, <16 x i8> %2199, i32 2) %2201 = extractelement <4 x float> %2200, i32 1 %2202 = extractelement <4 x float> %2200, i32 3 %2203 = fcmp oeq float %temp14.0, 2.000000e+00 %2204 = select i1 %2203, float 1.000000e+00, float 0.000000e+00 %2205 = bitcast float %2163 to i32 %2206 = bitcast float %2165 to i32 %2207 = bitcast float %225 to i32 %2208 = insertelement <4 x i32> undef, i32 %2205, i32 0 %2209 = insertelement <4 x i32> %2208, i32 %2206, i32 1 %2210 = insertelement <4 x i32> %2209, i32 %2207, i32 2 %2211 = bitcast <8 x i32> %61 to <32 x i8> %2212 = bitcast <4 x i32> %63 to <16 x i8> %2213 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2210, <32 x i8> %2211, <16 x i8> %2212, i32 2) %2214 = extractelement <4 x float> %2213, i32 1 %2215 = extractelement <4 x float> %2213, i32 3 %2216 = fcmp oeq float %temp14.0, 1.000000e+00 %2217 = select i1 %2216, float 1.000000e+00, float 0.000000e+00 %2218 = bitcast float %2163 to i32 %2219 = bitcast float %2165 to i32 %2220 = bitcast float %225 to i32 %2221 = insertelement <4 x i32> undef, i32 %2218, i32 0 %2222 = insertelement <4 x i32> %2221, i32 %2219, i32 1 %2223 = insertelement <4 x i32> %2222, i32 %2220, i32 2 %2224 = bitcast <8 x i32> %53 to <32 x i8> %2225 = bitcast <4 x i32> %55 to <16 x i8> %2226 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2223, <32 x i8> %2224, <16 x i8> %2225, i32 2) %2227 = extractelement <4 x float> %2226, i32 1 %2228 = extractelement <4 x float> %2226, i32 3 %2229 = fcmp oeq float %temp14.0, 0.000000e+00 %2230 = select i1 %2229, float 1.000000e+00, float 0.000000e+00 %2231 = fmul float %2227, %2230 %2232 = fmul float %2228, %2230 %2233 = fmul float %2214, %2217 %2234 = fadd float %2233, %2231 %2235 = fmul float %2215, %2217 %2236 = fadd float %2235, %2232 %2237 = fmul float %2201, %2204 %2238 = fadd float %2237, %2234 %2239 = fmul float %2202, %2204 %2240 = fadd float %2239, %2236 %2241 = fmul float %2188, %2191 %2242 = fadd float %2241, %2238 %2243 = fmul float %2189, %2191 %2244 = fadd float %2243, %2240 %2245 = fmul float %2175, %2178 %2246 = fadd float %2245, %2242 %2247 = fmul float %2176, %2178 %2248 = fadd float %2247, %2244 %2249 = fmul float %2248, 2.000000e+00 %2250 = fadd float %2249, -1.000000e+00 %2251 = fmul float %2246, 2.000000e+00 %2252 = fadd float %2251, -1.000000e+00 %2253 = fmul float %2250, %2250 %2254 = fmul float %2252, %2252 %2255 = fadd float %2253, %2254 %2256 = call float @llvm.AMDIL.clamp.(float %2255, float 0.000000e+00, float 1.000000e+00) %2257 = fcmp une float %35, %temp24.0 %.sink225 = select i1 %2257, float %38, float %37 %temp48.6 = select i1 %2257, float 1.953125e-03, float 3.906250e-03 %2258 = fdiv float 1.000000e+00, %.sink225 %2259 = fmul float %100, %2258 %2260 = fmul float %101, %2258 %2261 = call float @llvm.floor.f32(float %2259) %2262 = fsub float %2259, %2261 %2263 = call float @llvm.floor.f32(float %2260) %2264 = fsub float %2260, %2263 %2265 = fmul float %39, 2.000000e+00 %2266 = fmul float %2265, %temp48.6 %2267 = fsub float 1.000000e+00, %2266 %2268 = fmul float %temp48.6, %39 %2269 = fmul float %2262, %2267 %2270 = fadd float %2269, %2268 %2271 = fmul float %2264, %2267 %2272 = fadd float %2271, %2268 %2273 = fmul float %2270, %temp24.0 %2274 = fadd float %2273, %temp12.0 %2275 = fmul float %2272, %temp24.0 %2276 = fadd float %2275, %temp13.0 %2277 = bitcast float %2274 to i32 %2278 = bitcast float %2276 to i32 %2279 = bitcast float %225 to i32 %2280 = insertelement <4 x i32> undef, i32 %2277, i32 0 %2281 = insertelement <4 x i32> %2280, i32 %2278, i32 1 %2282 = insertelement <4 x i32> %2281, i32 %2279, i32 2 %2283 = bitcast <8 x i32> %85 to <32 x i8> %2284 = bitcast <4 x i32> %87 to <16 x i8> %2285 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2282, <32 x i8> %2283, <16 x i8> %2284, i32 2) %2286 = extractelement <4 x float> %2285, i32 1 %2287 = extractelement <4 x float> %2285, i32 3 %2288 = fcmp oeq float %temp14.0, 4.000000e+00 %2289 = select i1 %2288, float 1.000000e+00, float 0.000000e+00 %2290 = bitcast float %2274 to i32 %2291 = bitcast float %2276 to i32 %2292 = bitcast float %225 to i32 %2293 = insertelement <4 x i32> undef, i32 %2290, i32 0 %2294 = insertelement <4 x i32> %2293, i32 %2291, i32 1 %2295 = insertelement <4 x i32> %2294, i32 %2292, i32 2 %2296 = bitcast <8 x i32> %77 to <32 x i8> %2297 = bitcast <4 x i32> %79 to <16 x i8> %2298 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2295, <32 x i8> %2296, <16 x i8> %2297, i32 2) %2299 = extractelement <4 x float> %2298, i32 1 %2300 = extractelement <4 x float> %2298, i32 3 %2301 = fcmp oeq float %temp14.0, 3.000000e+00 %2302 = select i1 %2301, float 1.000000e+00, float 0.000000e+00 %2303 = bitcast float %2274 to i32 %2304 = bitcast float %2276 to i32 %2305 = bitcast float %225 to i32 %2306 = insertelement <4 x i32> undef, i32 %2303, i32 0 %2307 = insertelement <4 x i32> %2306, i32 %2304, i32 1 %2308 = insertelement <4 x i32> %2307, i32 %2305, i32 2 %2309 = bitcast <8 x i32> %69 to <32 x i8> %2310 = bitcast <4 x i32> %71 to <16 x i8> %2311 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2308, <32 x i8> %2309, <16 x i8> %2310, i32 2) %2312 = extractelement <4 x float> %2311, i32 1 %2313 = extractelement <4 x float> %2311, i32 3 %2314 = fcmp oeq float %temp14.0, 2.000000e+00 %2315 = select i1 %2314, float 1.000000e+00, float 0.000000e+00 %2316 = bitcast float %2274 to i32 %2317 = bitcast float %2276 to i32 %2318 = bitcast float %225 to i32 %2319 = insertelement <4 x i32> undef, i32 %2316, i32 0 %2320 = insertelement <4 x i32> %2319, i32 %2317, i32 1 %2321 = insertelement <4 x i32> %2320, i32 %2318, i32 2 %2322 = bitcast <8 x i32> %61 to <32 x i8> %2323 = bitcast <4 x i32> %63 to <16 x i8> %2324 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2321, <32 x i8> %2322, <16 x i8> %2323, i32 2) %2325 = extractelement <4 x float> %2324, i32 1 %2326 = extractelement <4 x float> %2324, i32 3 %2327 = fcmp oeq float %temp14.0, 1.000000e+00 %2328 = select i1 %2327, float 1.000000e+00, float 0.000000e+00 %2329 = bitcast float %2274 to i32 %2330 = bitcast float %2276 to i32 %2331 = bitcast float %225 to i32 %2332 = insertelement <4 x i32> undef, i32 %2329, i32 0 %2333 = insertelement <4 x i32> %2332, i32 %2330, i32 1 %2334 = insertelement <4 x i32> %2333, i32 %2331, i32 2 %2335 = bitcast <8 x i32> %53 to <32 x i8> %2336 = bitcast <4 x i32> %55 to <16 x i8> %2337 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2334, <32 x i8> %2335, <16 x i8> %2336, i32 2) %2338 = extractelement <4 x float> %2337, i32 1 %2339 = extractelement <4 x float> %2337, i32 3 %2340 = fcmp oeq float %temp14.0, 0.000000e+00 %2341 = select i1 %2340, float 1.000000e+00, float 0.000000e+00 %2342 = fmul float %2338, %2341 %2343 = fmul float %2339, %2341 %2344 = fmul float %2325, %2328 %2345 = fadd float %2344, %2342 %2346 = fmul float %2326, %2328 %2347 = fadd float %2346, %2343 %2348 = fmul float %2312, %2315 %2349 = fadd float %2348, %2345 %2350 = fmul float %2313, %2315 %2351 = fadd float %2350, %2347 %2352 = fmul float %2299, %2302 %2353 = fadd float %2352, %2349 %2354 = fmul float %2300, %2302 %2355 = fadd float %2354, %2351 %2356 = fmul float %2286, %2289 %2357 = fadd float %2356, %2353 %2358 = fmul float %2287, %2289 %2359 = fadd float %2358, %2355 %2360 = fmul float %2359, 2.000000e+00 %2361 = fadd float %2360, -1.000000e+00 %2362 = fmul float %2357, 2.000000e+00 %2363 = fadd float %2362, -1.000000e+00 %2364 = fmul float %2361, %2361 %2365 = fmul float %2363, %2363 %2366 = fadd float %2364, %2365 %2367 = call float @llvm.AMDIL.clamp.(float %2366, float 0.000000e+00, float 1.000000e+00) %2368 = fmul float %148, 0.000000e+00 %2369 = fmul float %2139, %148 %2370 = fmul float %2141, %148 %2371 = fmul float %2252, %149 %2372 = fadd float %2371, %2368 %2373 = fmul float %149, 0.000000e+00 %2374 = fadd float %2373, %2369 %2375 = fmul float %2250, %149 %2376 = fadd float %2375, %2370 %2377 = fmul float %2361, %150 %2378 = fadd float %2377, %2372 %2379 = fmul float %2363, %150 %2380 = fadd float %2379, %2374 %2381 = fmul float %150, 0.000000e+00 %2382 = fadd float %2381, %2376 %2383 = fmul float %92, %1682 %2384 = fmul float %92, %1684 %2385 = fmul float %92, %1686 %2386 = fmul float %93, %2030 %2387 = fadd float %2386, %2383 %2388 = fmul float %93, %2032 %2389 = fadd float %2388, %2384 %2390 = fmul float %93, %2034 %2391 = fadd float %2390, %2385 %2392 = fmul float %94, %2378 %2393 = fadd float %2392, %2387 %2394 = fmul float %94, %2380 %2395 = fadd float %2394, %2389 %2396 = fmul float %94, %2382 %2397 = fadd float %2396, %2391 %2398 = fmul float %2393, %2393 %2399 = fmul float %2395, %2395 %2400 = fadd float %2398, %2399 %2401 = fmul float %2397, %2397 %2402 = fadd float %2400, %2401 %2403 = fadd float %2402, 1.000000e+00 %2404 = call float @llvm.AMDGPU.rsq.clamped.f32(float %2403) %2405 = fmul float %2393, %2404 %2406 = fmul float %2395, %2404 %2407 = fmul float %2397, %2404 %2408 = fmul float %2405, %91 %2409 = fmul float %2406, %91 %2410 = fmul float %2407, %91 %2411 = fsub float %97, %2408 %2412 = fsub float %98, %2409 %2413 = fsub float %99, %2410 %2414 = fmul float %2411, %2411 %2415 = fmul float %2412, %2412 %2416 = fadd float %2415, %2414 %2417 = fmul float %2413, %2413 %2418 = fadd float %2416, %2417 %2419 = call float @llvm.AMDGPU.rsq.clamped.f32(float %2418) %2420 = fmul float %2411, %2419 %2421 = fmul float %2412, %2419 %2422 = fmul float %2413, %2419 %2423 = fadd float %109, %121 %2424 = fadd float %110, %122 %2425 = fadd float %111, %123 %2426 = fmul float %2423, %2423 %2427 = fmul float %2424, %2424 %2428 = fadd float %2427, %2426 %2429 = fmul float %2425, %2425 %2430 = fadd float %2428, %2429 %2431 = call float @llvm.AMDGPU.rsq.clamped.f32(float %2430) %2432 = fmul float %2423, %2431 %2433 = fmul float %2424, %2431 %2434 = fmul float %2425, %2431 %2435 = fmul float %2420, %2432 %2436 = fmul float %2421, %2433 %2437 = fadd float %2436, %2435 %2438 = fmul float %2422, %2434 %2439 = fadd float %2437, %2438 %2440 = call float @llvm.maxnum.f32(float %2439, float 0x3F1A36E2E0000000) %2441 = fmul float %95, 3.200000e+01 %2442 = call float @llvm.pow.f32(float %2440, float %2441) %2443 = call float @llvm.AMDIL.clamp.(float %2442, float 0.000000e+00, float 1.000000e+00) %2444 = fmul float %2443, 2.000000e+00 %2445 = fsub float 3.000000e+00, %2444 %2446 = fmul float %2443, %2445 %2447 = fmul float %2443, %2446 %2448 = fmul float %2447, %95 %2449 = fmul float %1334, %32 %2450 = fmul float %1336, %33 %2451 = fmul float %1338, %34 %2452 = fmul float %2420, %109 %2453 = fmul float %2421, %110 %2454 = fadd float %2453, %2452 %2455 = fmul float %2422, %111 %2456 = fadd float %2454, %2455 %2457 = call float @llvm.AMDIL.clamp.(float %2456, float 0.000000e+00, float 1.000000e+00) %2458 = fmul float %45, 2.000000e+00 %2459 = fmul float %46, 2.000000e+00 %2460 = fmul float %47, 2.000000e+00 %2461 = call float @llvm.maxnum.f32(float %2458, float %42) %2462 = call float @llvm.maxnum.f32(float %2459, float %43) %2463 = call float @llvm.maxnum.f32(float %2460, float %44) %2464 = call float @llvm.minnum.f32(float %2461, float 1.000000e+00) %2465 = call float @llvm.minnum.f32(float %2462, float 1.000000e+00) %2466 = call float @llvm.minnum.f32(float %2463, float 1.000000e+00) %2467 = fmul float %2464, %1334 %2468 = fmul float %2465, %1336 %2469 = fmul float %2466, %1338 %2470 = fmul float %2449, %2457 %2471 = fadd float %2470, %2467 %2472 = fmul float %2450, %2457 %2473 = fadd float %2472, %2468 %2474 = fmul float %2451, %2457 %2475 = fadd float %2474, %2469 %2476 = fmul float %32, %2448 %2477 = fadd float %2476, %2471 %2478 = fmul float %33, %2448 %2479 = fadd float %2478, %2473 %2480 = fmul float %34, %2448 %2481 = fadd float %2480, %2475 %2482 = fmul float %2477, 5.000000e-01 %2483 = fmul float %2479, 5.000000e-01 %2484 = fmul float %2481, 5.000000e-01 %2485 = fmul float %96, %30 %2486 = fadd float %2485, %31 %2487 = call float @llvm.AMDIL.clamp.(float %2486, float 0.000000e+00, float 1.000000e+00) %2488 = fmul float %2482, %2487 %2489 = fmul float %2483, %2487 %2490 = fmul float %2484, %2487 %2491 = call i32 @llvm.SI.packf16(float %2488, float %2489) %2492 = bitcast i32 %2491 to float %2493 = call i32 @llvm.SI.packf16(float %2490, float 1.000000e+00) %2494 = bitcast i32 %2493 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %2492, float %2494, float %2492, float %2494) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.floor.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_mov_b64 s[84:85], s[4:5] ; BED40404 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v13, v0, 0, 0, [m0] ; C8340000 v_interp_p2_f32 v13, [v13], v1, 0, 0, [m0] ; C8350001 v_interp_p1_f32 v14, v0, 1, 0, [m0] ; C8380100 v_interp_p2_f32 v14, [v14], v1, 1, 0, [m0] ; C8390101 v_interp_p1_f32 v15, v0, 2, 0, [m0] ; C83C0200 v_interp_p2_f32 v15, [v15], v1, 2, 0, [m0] ; C83D0201 v_interp_p1_f32 v3, v0, 3, 0, [m0] ; C80C0300 v_interp_p2_f32 v3, [v3], v1, 3, 0, [m0] ; C80D0301 v_interp_p1_f32 v7, v0, 0, 1, [m0] ; C81C0400 v_interp_p2_f32 v7, [v7], v1, 0, 1, [m0] ; C81D0401 v_interp_p1_f32 v8, v0, 1, 1, [m0] ; C8200500 v_interp_p2_f32 v8, [v8], v1, 1, 1, [m0] ; C8210501 v_interp_p1_f32 v4, v0, 2, 1, [m0] ; C8100600 v_interp_p2_f32 v4, [v4], v1, 2, 1, [m0] ; C8110601 v_interp_p1_f32 v2, v0, 3, 1, [m0] ; C8080700 v_interp_p2_f32 v2, [v2], v1, 3, 1, [m0] ; C8090701 v_interp_p1_f32 v10, v0, 0, 2, [m0] ; C8280800 s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300 v_interp_p2_f32 v10, [v10], v1, 0, 2, [m0] ; C8290801 v_interp_p1_f32 v5, v0, 1, 2, [m0] ; C8140900 v_interp_p2_f32 v5, [v5], v1, 1, 2, [m0] ; C8150901 v_interp_p1_f32 v6, v0, 2, 2, [m0] ; C8180A00 v_interp_p2_f32 v6, [v6], v1, 2, 2, [m0] ; C8190A01 v_interp_p1_f32 v9, v0, 3, 2, [m0] ; C8240B00 v_interp_p2_f32 v9, [v9], v1, 3, 2, [m0] ; C8250B01 v_interp_p1_f32 v30, v0, 0, 3, [m0] ; C8780C00 v_interp_p2_f32 v30, [v30], v1, 0, 3, [m0] ; C8790C01 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s37, s[8:11], 0x4 ; C2128904 s_buffer_load_dword s36, s[8:11], 0x5 ; C2120905 v_interp_p1_f32 v31, v0, 1, 3, [m0] ; C87C0D00 v_interp_p2_f32 v31, [v31], v1, 1, 3, [m0] ; C87D0D01 s_buffer_load_dword s42, s[8:11], 0x6 ; C2150906 v_interp_p1_f32 v27, v0, 2, 3, [m0] ; C86C0E00 s_buffer_load_dword s39, s[8:11], 0x0 ; C2138900 s_buffer_load_dword s38, s[8:11], 0x1 ; C2130901 v_interp_p2_f32 v27, [v27], v1, 2, 3, [m0] ; C86D0E01 s_buffer_load_dword s41, s[8:11], 0x2 ; C2148902 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e64 v0, s37, s37 ; D2100000 00004A25 v_mac_f32_e64 v0, s36, s36 ; D23E0000 00004824 s_buffer_load_dword s0, s[8:11], 0x38 ; C2000938 s_buffer_load_dword s1, s[8:11], 0x3c ; C200893C v_mac_f32_e64 v0, s42, s42 ; D23E0000 0000542A v_rsq_clamp_f32_e32 v1, v0 ; 7E025900 v_sub_f32_e32 v0, s39, v30 ; 08003C27 v_sub_f32_e32 v11, s38, v31 ; 08163E26 v_mul_f32_e32 v16, v0, v0 ; 10200100 v_mac_f32_e32 v16, v11, v11 ; 3E20170B v_sub_f32_e32 v12, s41, v27 ; 08183629 v_mac_f32_e32 v16, v12, v12 ; 3E20190C v_rsq_clamp_f32_e32 v17, v16 ; 7E225910 v_add_f32_e32 v13, 0.5, v13 ; 061A1AF0 v_floor_f32_e32 v13, v13 ; 7E1A490D v_mov_b32_e32 v16, 0x7fffffff ; 7E2002FF 7FFFFFFF v_mov_b32_e32 v18, 0x42800000 ; 7E2402FF 42800000 v_cmp_le_f32_e32 vcc, v18, v13 ; 7C061B12 v_and_b32_e32 v18, v5, v16 ; 36242105 v_and_b32_e32 v19, v6, v16 ; 36262106 v_and_b32_e32 v20, v9, v16 ; 36282109 v_mul_f32_e64 v16, |v5|, |v5| ; D2100310 00020B05 v_mad_f32 v16, |v6|, |v6|, v16 ; D2820310 04420D06 v_mad_f32 v16, |v9|, |v9|, v16 ; D2820310 04421309 v_rsq_clamp_f32_e32 v34, v16 ; 7E445910 v_add_f32_e32 v14, 0.5, v14 ; 061C1CF0 v_add_f32_e32 v15, 0.5, v15 ; 061E1EF0 v_floor_f32_e32 v21, v14 ; 7E2A490E v_floor_f32_e32 v14, v15 ; 7E1C490F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v24, s1 ; 7E300201 v_mul_f32_e32 v22, s0, v13 ; 102C1A00 v_floor_f32_e32 v16, v22 ; 7E204916 s_and_saveexec_b64 s[2:3], vcc ; BE82246A s_xor_b64 s[2:3], exec, s[2:3] ; 8982027E v_mov_b32_e32 v15, 0xc2800000 ; 7E1E02FF C2800000 v_add_f32_e32 v13, v13, v15 ; 061A1F0D v_mul_f32_e32 v15, s1, v13 ; 101E1A01 v_floor_f32_e32 v15, v15 ; 7E1E490F v_mul_f32_e32 v23, s1, v15 ; 102E1E01 v_mad_f32 v28, v13, s1, -v15 ; D282001C 843C030D v_floor_f32_e32 v13, v23 ; 7E1A4917 v_mad_f32 v29, v15, s1, -v13 ; D282001D 8434030F v_add_f32_e32 v15, 4.0, v13 ; 061E1AF6 s_or_saveexec_b64 s[2:3], s[2:3] ; BE822502 v_mov_b32_e32 v13, s0 ; 7E1A0200 v_mov_b32_e32 v32, v24 ; 7E400318 s_xor_b64 exec, exec, s[2:3] ; 89FE027E v_mul_f32_e32 v15, s0, v16 ; 101E2000 v_floor_f32_e32 v23, v22 ; 7E2E4916 v_subrev_f32_e32 v28, v23, v22 ; 0A382D17 v_floor_f32_e32 v15, v15 ; 7E1E490F v_mad_f32 v29, v16, s0, -v15 ; D282001D 843C0110 v_mov_b32_e32 v32, v13 ; 7E40030D s_or_b64 exec, exec, s[2:3] ; 88FE027E v_mul_f32_e32 v23, s0, v21 ; 102E2A00 v_floor_f32_e32 v22, v23 ; 7E2C4917 v_mov_b32_e32 v16, 0x42800000 ; 7E2002FF 42800000 v_cmp_le_f32_e32 vcc, v16, v21 ; 7C062B10 s_and_saveexec_b64 s[2:3], vcc ; BE82246A s_xor_b64 s[2:3], exec, s[2:3] ; 8982027E v_mov_b32_e32 v16, 0xc2800000 ; 7E2002FF C2800000 v_add_f32_e32 v16, v21, v16 ; 06202115 v_mul_f32_e32 v21, s1, v16 ; 102A2001 v_floor_f32_e32 v21, v21 ; 7E2A4915 v_mul_f32_e32 v26, s1, v21 ; 10342A01 v_mad_f32 v25, v16, s1, -v21 ; D2820019 84540310 v_floor_f32_e32 v16, v26 ; 7E20491A v_mad_f32 v26, v21, s1, -v16 ; D282001A 84400315 v_add_f32_e32 v16, 4.0, v16 ; 062020F6 s_or_saveexec_b64 s[2:3], s[2:3] ; BE822502 v_mov_b32_e32 v33, v24 ; 7E420318 s_xor_b64 exec, exec, s[2:3] ; 89FE027E v_mul_f32_e32 v16, s0, v22 ; 10202C00 v_floor_f32_e32 v21, v23 ; 7E2A4917 v_subrev_f32_e32 v25, v21, v23 ; 0A322F15 v_floor_f32_e32 v16, v16 ; 7E204910 v_mad_f32 v26, v22, s0, -v16 ; D282001A 84400116 v_mov_b32_e32 v33, v13 ; 7E42030D s_or_b64 exec, exec, s[2:3] ; 88FE027E s_buffer_load_dword s13, s[8:11], 0xb ; C206890B s_buffer_load_dword s14, s[8:11], 0x40 ; C2070940 s_buffer_load_dword s12, s[8:11], 0x44 ; C2060944 v_mul_f32_e32 v38, s0, v14 ; 104C1C00 v_floor_f32_e32 v35, v38 ; 7E464926 v_mov_b32_e32 v21, 0x42800000 ; 7E2A02FF 42800000 v_cmp_le_f32_e32 vcc, v21, v14 ; 7C061D15 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[2:3], vcc ; BE82246A s_xor_b64 s[2:3], exec, s[2:3] ; 8982027E v_mov_b32_e32 v21, 0xc2800000 ; 7E2A02FF C2800000 v_add_f32_e32 v14, v14, v21 ; 061C2B0E v_mul_f32_e32 v21, s1, v14 ; 102A1C01 v_floor_f32_e32 v21, v21 ; 7E2A4915 v_mul_f32_e32 v23, s1, v21 ; 102E2A01 v_mad_f32 v22, v14, s1, -v21 ; D2820016 8454030E v_floor_f32_e32 v14, v23 ; 7E1C4917 v_mad_f32 v23, v21, s1, -v14 ; D2820017 84380315 v_add_f32_e32 v21, 4.0, v14 ; 062A1CF6 s_or_saveexec_b64 s[2:3], s[2:3] ; BE822502 v_mov_b32_e32 v39, s13 ; 7E4E020D v_mov_b32_e32 v36, s14 ; 7E48020E s_buffer_load_dword s14, s[8:11], 0xa ; C207090A s_buffer_load_dword s100, s[8:11], 0xc ; C232090C s_buffer_load_dword s101, s[8:11], 0xd ; C232890D s_buffer_load_dword s4, s[8:11], 0xe ; C202090E s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v251, s4, 19 ; 05F72604 s_buffer_load_dword s35, s[8:11], 0x48 ; C2118948 s_buffer_load_dword s40, s[8:11], 0x4c ; C214094C s_buffer_load_dword s13, s[8:11], 0x50 ; C2068950 s_buffer_load_dword s4, s[8:11], 0x54 ; C2020954 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v251, s4, 20 ; 05F72804 s_buffer_load_dword s4, s[8:11], 0x55 ; C2020955 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v251, s4, 21 ; 05F72A04 s_buffer_load_dword s4, s[8:11], 0x56 ; C2020956 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v251, s4, 22 ; 05F72C04 s_buffer_load_dword s4, s[8:11], 0x58 ; C2020958 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v251, s4, 0 ; 05F70004 s_buffer_load_dword s4, s[8:11], 0x59 ; C2020959 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v251, s4, 1 ; 05F70204 s_buffer_load_dword s4, s[8:11], 0x5a ; C202095A s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v251, s4, 2 ; 05F70404 v_mov_b32_e32 v37, s12 ; 7E4A020C s_xor_b64 exec, exec, s[2:3] ; 89FE027E v_mul_f32_e32 v14, s0, v35 ; 101C4600 v_floor_f32_e32 v21, v38 ; 7E2A4926 v_subrev_f32_e32 v22, v21, v38 ; 0A2C4D15 v_floor_f32_e32 v21, v14 ; 7E2A490E v_mad_f32 v23, v35, s0, -v21 ; D2820017 84540123 v_mov_b32_e32 v24, v13 ; 7E30030D s_or_b64 exec, exec, s[2:3] ; 88FE027E v_mac_f32_e32 v39, s14, v10 ; 3E4E140E v_mul_f32_e32 v13, v17, v0 ; 101A0111 v_mul_f32_e32 v14, v17, v11 ; 101C1711 v_mul_f32_e32 v12, v17, v12 ; 10181911 v_mac_f32_e32 v13, s37, v1 ; 3E1A0225 v_mac_f32_e32 v14, s36, v1 ; 3E1C0224 v_mac_f32_e32 v12, s42, v1 ; 3E18022A v_add_f32_e64 v0, 0, v39 clamp ; D2060800 00024E80 v_cmp_neq_f32_e64 s[24:25], s0, v32 ; D01A0018 00024000 v_cmp_eq_f32_e32 vcc, 4.0, v15 ; 7C041EF6 v_cmp_eq_f32_e64 s[2:3], 2.0, v15 ; D0040002 00021EF4 v_writelane_b32 v251, s2, 17 ; 05F72202 v_writelane_b32 v251, s3, 18 ; 05F72403 v_cmp_eq_f32_e64 s[2:3], 1.0, v15 ; D0040002 00021EF2 v_writelane_b32 v251, s2, 13 ; 05F71A02 v_writelane_b32 v251, s3, 14 ; 05F71C03 v_cmp_eq_f32_e64 s[2:3], 0, v15 ; D0040002 00021E80 v_writelane_b32 v251, s2, 15 ; 05F71E02 v_writelane_b32 v251, s3, 16 ; 05F72003 v_cmp_neq_f32_e64 s[22:23], s0, v33 ; D01A0016 00024200 v_cmp_neq_f32_e64 s[20:21], s0, v24 ; D01A0014 00023000 v_cmp_eq_f32_e64 s[0:1], 4.0, v16 ; D0040000 000220F6 v_writelane_b32 v251, s0, 5 ; 05F70A00 v_writelane_b32 v251, s1, 6 ; 05F70C01 v_cmp_eq_f32_e64 s[0:1], 2.0, v16 ; D0040000 000220F4 v_writelane_b32 v251, s0, 7 ; 05F70E00 v_writelane_b32 v251, s1, 8 ; 05F71001 v_cmp_eq_f32_e64 s[0:1], 1.0, v16 ; D0040000 000220F2 v_writelane_b32 v251, s0, 9 ; 05F71200 v_writelane_b32 v251, s1, 10 ; 05F71401 v_cmp_eq_f32_e64 s[0:1], 0, v16 ; D0040000 00022080 v_writelane_b32 v251, s0, 11 ; 05F71600 v_writelane_b32 v251, s1, 12 ; 05F71801 v_mul_f32_e32 v11, s37, v1 ; 10160225 v_mul_f32_e32 v10, s36, v1 ; 10140224 v_mul_f32_e32 v1, s42, v1 ; 1002022A v_cmp_eq_f32_e64 s[0:1], 4.0, v21 ; D0040000 00022AF6 v_writelane_b32 v251, s0, 3 ; 05F70600 v_writelane_b32 v251, s1, 4 ; 05F70801 v_mov_b32_e32 v17, 0xbe4ccccd ; 7E2202FF BE4CCCCD v_mad_f32 v18, v34, v18, v17 ; D2820012 04462522 v_mad_f32 v19, v34, v19, v17 ; D2820013 04462722 v_mac_f32_e32 v17, v34, v20 ; 3E222922 v_mov_b32_e32 v20, 0x40e00000 ; 7E2802FF 40E00000 v_mul_f32_e32 v34, v20, v18 ; 10442514 v_mul_f32_e32 v18, v20, v19 ; 10242714 v_mul_f32_e32 v19, v20, v17 ; 10262314 v_mov_b32_e32 v20, 0x3c23d70a ; 7E2802FF 3C23D70A v_max_f32_e32 v17, v20, v34 ; 20224514 v_subrev_f32_e32 v34, s39, v30 ; 0A443C27 v_mul_f32_e32 v34, v34, v34 ; 10444522 v_subrev_f32_e32 v35, s38, v31 ; 0A463E26 v_mac_f32_e32 v34, v35, v35 ; 3E444723 v_subrev_f32_e32 v35, s41, v27 ; 0A463629 v_mac_f32_e32 v34, v35, v35 ; 3E444723 v_mul_f32_e32 v34, s13, v34 ; 1044440D v_log_f32_e32 v34, v34 ; 7E444F22 s_load_dwordx4 s[92:95], s[84:85], 0x8 ; C0AE5508 v_cndmask_b32_e64 v35, v36, v37, s[24:25] ; D2000023 00624B24 v_rcp_f32_e32 v35, v35 ; 7E465523 v_mul_f32_e32 v34, 0x3f317218, v34 ; 104444FF 3F317218 v_mov_b32_e32 v38, 0x3b000000 ; 7E4C02FF 3B000000 v_mov_b32_e32 v39, 0x3b800000 ; 7E4E02FF 3B800000 v_cndmask_b32_e64 v40, v39, v38, s[24:25] ; D2000028 00624D27 v_mul_f32_e32 v41, v35, v30 ; 10523D23 v_floor_f32_e32 v41, v41 ; 7E524929 v_mad_f32 v41, v30, v35, -v41 ; D2820029 84A6471E v_mul_f32_e32 v42, v35, v31 ; 10543F23 v_floor_f32_e32 v42, v42 ; 7E54492A v_mad_f32 v42, v31, v35, -v42 ; D282002A 84AA471F v_add_f32_e64 v43, s35, s35 ; D206002B 00004623 v_mul_f32_e32 v44, v35, v27 ; 10583723 v_floor_f32_e32 v44, v44 ; 7E58492C v_mad_f32 v35, v27, v35, -v44 ; D2820023 84B2471B v_mad_f32 v44, -v43, v40, 1.0 ; D282002C 23CA512B v_mul_f32_e32 v40, s35, v40 ; 10505023 v_cndmask_b32_e64 v45, v36, v37, s[22:23] ; D200002D 005A4B24 v_rcp_f32_e32 v45, v45 ; 7E5A552D v_mad_f32 v41, v44, v41, v40 ; D2820029 04A2532C v_mad_f32 v42, v44, v42, v40 ; D282002A 04A2552C v_mac_f32_e32 v40, v44, v35 ; 3E50472C v_mul_f32_e32 v35, v45, v30 ; 10463D2D v_floor_f32_e32 v35, v35 ; 7E464923 v_mad_f32 v35, v30, v45, -v35 ; D2820023 848E5B1E v_mul_f32_e32 v44, v45, v31 ; 10583F2D v_floor_f32_e32 v44, v44 ; 7E58492C v_mad_f32 v44, v31, v45, -v44 ; D282002C 84B25B1F v_mul_f32_e32 v46, v45, v27 ; 105C372D v_floor_f32_e32 v46, v46 ; 7E5C492E v_mad_f32 v45, v27, v45, -v46 ; D282002D 84BA5B1B v_cndmask_b32_e64 v46, v39, v38, s[22:23] ; D200002E 005A4D27 v_mad_f32 v47, -v43, v46, 1.0 ; D282002F 23CA5D2B v_mul_f32_e32 v46, s35, v46 ; 105C5C23 v_mad_f32 v48, v47, v35, v46 ; D2820030 04BA472F v_mad_f32 v35, v47, v44, v46 ; D2820023 04BA592F v_mac_f32_e32 v46, v47, v45 ; 3E5C5B2F v_mul_f32_e32 v51, s40, v34 ; 10664428 v_mad_f32 v49, v32, v41, v28 ; D2820031 04725320 v_mad_f32 v50, v32, v42, v29 ; D2820032 04765520 v_mac_f32_e32 v28, v32, v40 ; 3E385120 v_mad_f32 v34, v33, v48, v25 ; D2820022 04666121 v_mad_f32 v35, v33, v35, v26 ; D2820023 046A4721 v_cndmask_b32_e64 v36, v36, v37, s[20:21] ; D2000024 00524B24 v_rcp_f32_e32 v36, v36 ; 7E485524 v_mac_f32_e32 v25, v33, v46 ; 3E325D21 v_mov_b32_e32 v44, v28 ; 7E58031C v_mov_b32_e32 v45, v29 ; 7E5A031D v_mov_b32_e32 v46, v30 ; 7E5C031E v_mov_b32_e32 v47, v31 ; 7E5E031F v_mac_f32_e32 v29, v32, v41 ; 3E3A5320 v_mov_b32_e32 v52, v25 ; 7E680319 v_mov_b32_e32 v53, v26 ; 7E6A031A v_mov_b32_e32 v54, v27 ; 7E6C031B v_mov_b32_e32 v55, v28 ; 7E6E031C v_mac_f32_e32 v26, v33, v48 ; 3E346121 v_mul_f32_e32 v32, v36, v30 ; 10403D24 v_floor_f32_e32 v32, v32 ; 7E404920 v_mad_f32 v30, v30, v36, -v32 ; D282001E 8482491E v_mul_f32_e32 v32, v36, v31 ; 10403F24 v_floor_f32_e32 v32, v32 ; 7E404920 v_mad_f32 v31, v31, v36, -v32 ; D282001F 8482491F v_mul_f32_e32 v32, v36, v27 ; 10403724 v_floor_f32_e32 v32, v32 ; 7E404920 v_mad_f32 v27, v27, v36, -v32 ; D282001B 8482491B v_cndmask_b32_e64 v32, v39, v38, s[20:21] ; D2000020 00524D27 v_mad_f32 v33, -v43, v32, 1.0 ; D2820021 23CA412B v_mul_f32_e32 v36, s35, v32 ; 10484023 v_mad_f32 v30, v33, v30, v36 ; D282001E 04923D21 v_mad_f32 v32, v33, v31, v36 ; D2820020 04923F21 v_mac_f32_e32 v36, v33, v27 ; 3E483721 v_mad_f32 v31, v24, v30, v22 ; D282001F 045A3D18 v_mad_f32 v32, v24, v32, v23 ; D2820020 045E4118 v_mac_f32_e32 v22, v24, v36 ; 3E2C4918 s_load_dwordx8 s[68:75], s[6:7], 0x40 ; C0E20740 v_mov_b32_e32 v37, v22 ; 7E4A0316 v_mov_b32_e32 v38, v23 ; 7E4C0317 v_mov_b32_e32 v39, v24 ; 7E4E0318 v_mov_b32_e32 v40, v25 ; 7E500319 s_load_dwordx4 s[0:3], s[84:85], 0x20 ; C0805520 v_mac_f32_e32 v23, v24, v30 ; 3E2E3D18 v_mov_b32_e32 v45, v50 ; 7E5A0332 v_mov_b32_e32 v36, v51 ; 7E480333 v_mov_b32_e32 v53, v35 ; 7E6A0323 v_mov_b32_e32 v33, v51 ; 7E420333 v_mov_b32_e32 v38, v32 ; 7E4C0320 v_mov_b32_e32 v46, v51 ; 7E5C0333 v_mov_b32_e32 v30, v51 ; 7E3C0333 v_mov_b32_e32 v54, v51 ; 7E6C0333 v_mov_b32_e32 v27, v51 ; 7E360333 v_mov_b32_e32 v39, v51 ; 7E4E0333 v_mov_b32_e32 v24, v51 ; 7E300333 s_load_dwordx4 s[8:11], s[84:85], 0x18 ; C0845518 s_load_dwordx8 s[12:19], s[6:7], 0x20 ; C0C60720 s_load_dwordx8 s[32:39], s[6:7], 0x30 ; C0D00730 s_load_dwordx4 s[28:31], s[84:85], 0x10 ; C08E5510 s_load_dwordx4 s[88:91], s[84:85], 0x24 ; C0AC5524 s_load_dwordx8 s[20:27], s[6:7], 0x10 ; C0CA0710 s_load_dwordx8 s[76:83], s[6:7], 0x48 ; C0E60748 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[41:43], 7, 0, 0, 0, 0, 0, 0, 0, v[49:52], s[68:75], s[0:3] ; F0900700 00112931 s_load_dwordx4 s[40:43], s[84:85], 0x0 ; C0945500 s_load_dwordx8 s[60:67], s[6:7], 0x0 ; C0DE0700 s_load_dwordx4 s[56:59], s[84:85], 0x1c ; C09C551C s_load_dwordx8 s[44:51], s[6:7], 0x38 ; C0D60738 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_writelane_b32 v251, s44, 39 ; 05F74E2C v_writelane_b32 v251, s45, 40 ; 05F7502D v_writelane_b32 v251, s46, 41 ; 05F7522E v_writelane_b32 v251, s47, 42 ; 05F7542F v_writelane_b32 v251, s48, 43 ; 05F75630 v_writelane_b32 v251, s49, 44 ; 05F75831 v_writelane_b32 v251, s50, 45 ; 05F75A32 v_writelane_b32 v251, s51, 46 ; 05F75C33 image_sample_l v[56:58], 7, 0, 0, 0, 0, 0, 0, 0, v[49:52], s[32:39], s[8:11] ; F0900700 00483831 image_sample_l v[59:61], 7, 0, 0, 0, 0, 0, 0, 0, v[49:52], s[12:19], s[28:31] ; F0900700 00E33B31 s_load_dwordx4 s[44:47], s[84:85], 0xc ; C096550C s_load_dwordx8 s[48:55], s[6:7], 0x18 ; C0D80718 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_writelane_b32 v251, s48, 31 ; 05F73E30 v_writelane_b32 v251, s49, 32 ; 05F74031 v_writelane_b32 v251, s50, 33 ; 05F74232 v_writelane_b32 v251, s51, 34 ; 05F74433 v_writelane_b32 v251, s52, 35 ; 05F74634 v_writelane_b32 v251, s53, 36 ; 05F74835 v_writelane_b32 v251, s54, 37 ; 05F74A36 v_writelane_b32 v251, s55, 38 ; 05F74C37 image_sample_l v[62:64], 7, 0, 0, 0, 0, 0, 0, 0, v[49:52], s[20:27], s[92:95] ; F0900700 02E53E31 s_load_dwordx4 s[96:99], s[84:85], 0x4 ; C0B05504 s_load_dwordx8 s[48:55], s[6:7], 0x8 ; C0D80708 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_writelane_b32 v251, s48, 23 ; 05F72E30 v_writelane_b32 v251, s49, 24 ; 05F73031 v_writelane_b32 v251, s50, 25 ; 05F73232 v_writelane_b32 v251, s51, 26 ; 05F73433 v_writelane_b32 v251, s52, 27 ; 05F73634 v_writelane_b32 v251, s53, 28 ; 05F73835 v_writelane_b32 v251, s54, 29 ; 05F73A36 v_writelane_b32 v251, s55, 30 ; 05F73C37 image_sample_l v[65:67], 7, 0, 0, 0, 0, 0, 0, 0, v[49:52], s[60:67], s[40:43] ; F0900700 014F4131 image_sample_l v[68:70], 7, 0, 0, 0, 0, 0, 0, 0, v[44:47], s[68:75], s[0:3] ; F0900700 0011442C image_sample_l v[71:73], 7, 0, 0, 0, 0, 0, 0, 0, v[44:47], s[32:39], s[8:11] ; F0900700 0048472C image_sample_l v[74:76], 7, 0, 0, 0, 0, 0, 0, 0, v[44:47], s[12:19], s[28:31] ; F0900700 00E34A2C image_sample_l v[77:79], 7, 0, 0, 0, 0, 0, 0, 0, v[44:47], s[20:27], s[92:95] ; F0900700 02E54D2C image_sample_l v[80:82], 7, 0, 0, 0, 0, 0, 0, 0, v[44:47], s[60:67], s[40:43] ; F0900700 014F502C image_sample_l v[83:85], 7, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[68:75], s[0:3] ; F0900700 0011531C image_sample_l v[86:88], 7, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[32:39], s[8:11] ; F0900700 0048561C image_sample_l v[89:91], 7, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[12:19], s[28:31] ; F0900700 00E3591C image_sample_l v[92:94], 7, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[20:27], s[92:95] ; F0900700 02E55C1C image_sample_l v[95:97], 7, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[60:67], s[40:43] ; F0900700 014F5F1C image_sample_l v[98:100], 7, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[68:75], s[0:3] ; F0900700 00116222 image_sample_l v[101:103], 7, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[32:39], s[8:11] ; F0900700 00486522 image_sample_l v[104:106], 7, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[12:19], s[28:31] ; F0900700 00E36822 image_sample_l v[107:109], 7, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[20:27], s[92:95] ; F0900700 02E56B22 image_sample_l v[110:112], 7, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[60:67], s[40:43] ; F0900700 014F6E22 image_sample_l v[113:115], 7, 0, 0, 0, 0, 0, 0, 0, v[52:55], s[68:75], s[0:3] ; F0900700 00117134 image_sample_l v[116:118], 7, 0, 0, 0, 0, 0, 0, 0, v[52:55], s[32:39], s[8:11] ; F0900700 00487434 image_sample_l v[119:121], 7, 0, 0, 0, 0, 0, 0, 0, v[52:55], s[12:19], s[28:31] ; F0900700 00E37734 image_sample_l v[122:124], 7, 0, 0, 0, 0, 0, 0, 0, v[52:55], s[20:27], s[92:95] ; F0900700 02E57A34 image_sample_l v[125:127], 7, 0, 0, 0, 0, 0, 0, 0, v[52:55], s[60:67], s[40:43] ; F0900700 014F7D34 image_sample_l v[128:130], 7, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[68:75], s[0:3] ; F0900700 00118019 image_sample_l v[131:133], 7, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[32:39], s[8:11] ; F0900700 00488319 image_sample_l v[134:136], 7, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[12:19], s[28:31] ; F0900700 00E38619 image_sample_l v[137:139], 7, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[20:27], s[92:95] ; F0900700 02E58919 image_sample_l v[140:142], 7, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[60:67], s[40:43] ; F0900700 014F8C19 image_sample_l v[143:145], 7, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[68:75], s[0:3] ; F0900700 00118F1F image_sample_l v[146:148], 7, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[32:39], s[8:11] ; F0900700 0048921F image_sample_l v[149:151], 7, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[12:19], s[28:31] ; F0900700 00E3951F image_sample_l v[152:154], 7, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[20:27], s[92:95] ; F0900700 02E5981F image_sample_l v[155:157], 7, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[60:67], s[40:43] ; F0900700 014F9B1F image_sample_l v[158:160], 7, 0, 0, 0, 0, 0, 0, 0, v[37:40], s[68:75], s[0:3] ; F0900700 00119E25 image_sample_l v[161:163], 7, 0, 0, 0, 0, 0, 0, 0, v[37:40], s[32:39], s[8:11] ; F0900700 0048A125 image_sample_l v[164:166], 7, 0, 0, 0, 0, 0, 0, 0, v[37:40], s[12:19], s[28:31] ; F0900700 00E3A425 s_load_dwordx4 s[84:87], s[84:85], 0x14 ; C0AA5514 s_load_dwordx8 s[48:55], s[6:7], 0x28 ; C0D80728 image_sample_l v[167:169], 7, 0, 0, 0, 0, 0, 0, 0, v[37:40], s[20:27], s[92:95] ; F0900700 02E5A725 image_sample_l v[170:172], 7, 0, 0, 0, 0, 0, 0, 0, v[37:40], s[60:67], s[40:43] ; F0900700 014FAA25 image_sample_l v[173:175], 7, 0, 0, 0, 0, 0, 0, 0, v[22:25], s[68:75], s[0:3] ; F0900700 0011AD16 image_sample_l v[176:178], 7, 0, 0, 0, 0, 0, 0, 0, v[22:25], s[32:39], s[8:11] ; F0900700 0048B016 image_sample_l v[179:181], 7, 0, 0, 0, 0, 0, 0, 0, v[22:25], s[12:19], s[28:31] ; F0900700 00E3B316 image_sample_l v[182:184], 7, 0, 0, 0, 0, 0, 0, 0, v[22:25], s[20:27], s[92:95] ; F0900700 02E5B616 image_sample_l v[185:187], 7, 0, 0, 0, 0, 0, 0, 0, v[22:25], s[60:67], s[40:43] ; F0900700 014FB916 image_sample_l v[188:189], 10, 0, 0, 0, 0, 0, 0, 0, v[44:47], s[76:83], s[88:91] ; F0900A00 02D3BC2C v_readlane_b32 s16, v251, 39 ; 02214FFB v_readlane_b32 s17, v251, 40 ; 022351FB v_readlane_b32 s18, v251, 41 ; 022553FB v_readlane_b32 s19, v251, 42 ; 022755FB v_readlane_b32 s20, v251, 43 ; 022957FB v_readlane_b32 s21, v251, 44 ; 022B59FB v_readlane_b32 s22, v251, 45 ; 022D5BFB v_readlane_b32 s23, v251, 46 ; 022F5DFB s_nop 2 ; BF800002 image_sample_l v[190:191], 10, 0, 0, 0, 0, 0, 0, 0, v[44:47], s[16:23], s[56:59] ; F0900A00 01C4BE2C s_waitcnt vmcnt(9) lgkmcnt(0) ; BF8C0079 image_sample_l v[192:193], 10, 0, 0, 0, 0, 0, 0, 0, v[44:47], s[48:55], s[84:87] ; F0900A00 02ACC02C v_readlane_b32 s8, v251, 31 ; 02113FFB v_readlane_b32 s9, v251, 32 ; 021341FB v_readlane_b32 s10, v251, 33 ; 021543FB v_readlane_b32 s11, v251, 34 ; 021745FB v_readlane_b32 s12, v251, 35 ; 021947FB v_readlane_b32 s13, v251, 36 ; 021B49FB v_readlane_b32 s14, v251, 37 ; 021D4BFB v_readlane_b32 s15, v251, 38 ; 021F4DFB s_nop 2 ; BF800002 image_sample_l v[194:195], 10, 0, 0, 0, 0, 0, 0, 0, v[44:47], s[8:15], s[44:47] ; F0900A00 0162C22C v_readlane_b32 s0, v251, 23 ; 02012FFB v_readlane_b32 s1, v251, 24 ; 020331FB v_readlane_b32 s2, v251, 25 ; 020533FB v_readlane_b32 s3, v251, 26 ; 020735FB v_readlane_b32 s4, v251, 27 ; 020937FB v_readlane_b32 s5, v251, 28 ; 020B39FB v_readlane_b32 s6, v251, 29 ; 020D3BFB v_readlane_b32 s7, v251, 30 ; 020F3DFB s_nop 2 ; BF800002 image_sample_l v[44:45], 10, 0, 0, 0, 0, 0, 0, 0, v[44:47], s[0:7], s[96:99] ; F0900A00 03002C2C image_sample_l v[46:47], 10, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[76:83], s[88:91] ; F0900A00 02D32E1C image_sample_l v[196:197], 10, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[16:23], s[56:59] ; F0900A00 01C4C41C s_mov_b32 s28, s16 ; BE9C0310 s_mov_b32 s29, s17 ; BE9D0311 s_mov_b32 s30, s18 ; BE9E0312 s_mov_b32 s31, s19 ; BE9F0313 s_mov_b32 s32, s20 ; BEA00314 s_mov_b32 s33, s21 ; BEA10315 s_mov_b32 s34, s22 ; BEA20316 s_mov_b32 s35, s23 ; BEA30317 image_sample_l v[198:199], 10, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[48:55], s[84:87] ; F0900A00 02ACC61C image_sample_l v[200:201], 10, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[8:15], s[44:47] ; F0900A00 0162C81C s_mov_b32 s20, s8 ; BE940308 s_mov_b32 s21, s9 ; BE950309 s_mov_b32 s22, s10 ; BE96030A s_mov_b32 s23, s11 ; BE97030B s_mov_b32 s24, s12 ; BE98030C s_mov_b32 s25, s13 ; BE99030D s_mov_b32 s26, s14 ; BE9A030E s_mov_b32 s27, s15 ; BE9B030F image_sample_l v[28:29], 10, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[0:7], s[96:99] ; F0900A00 03001C1C s_mov_b32 s12, s0 ; BE8C0300 s_mov_b32 s13, s1 ; BE8D0301 s_mov_b32 s14, s2 ; BE8E0302 s_mov_b32 s15, s3 ; BE8F0303 s_mov_b32 s16, s4 ; BE900304 s_mov_b32 s17, s5 ; BE910305 s_mov_b32 s18, s6 ; BE920306 s_mov_b32 s19, s7 ; BE930307 image_sample_l v[202:203], 10, 0, 0, 0, 0, 0, 0, 0, v[49:52], s[76:83], s[88:91] ; F0900A00 02D3CA31 image_sample_l v[204:205], 10, 0, 0, 0, 0, 0, 0, 0, v[49:52], s[28:35], s[56:59] ; F0900A00 01C7CC31 image_sample_l v[206:207], 10, 0, 0, 0, 0, 0, 0, 0, v[49:52], s[48:55], s[84:87] ; F0900A00 02ACCE31 image_sample_l v[208:209], 10, 0, 0, 0, 0, 0, 0, 0, v[49:52], s[20:27], s[44:47] ; F0900A00 0165D031 image_sample_l v[48:49], 10, 0, 0, 0, 0, 0, 0, 0, v[49:52], s[12:19], s[96:99] ; F0900A00 03033031 image_sample_l v[50:51], 10, 0, 0, 0, 0, 0, 0, 0, v[52:55], s[76:83], s[88:91] ; F0900A00 02D33234 image_sample_l v[210:211], 10, 0, 0, 0, 0, 0, 0, 0, v[52:55], s[28:35], s[56:59] ; F0900A00 01C7D234 image_sample_l v[212:213], 10, 0, 0, 0, 0, 0, 0, 0, v[52:55], s[48:55], s[84:87] ; F0900A00 02ACD434 image_sample_l v[214:215], 10, 0, 0, 0, 0, 0, 0, 0, v[52:55], s[20:27], s[44:47] ; F0900A00 0165D634 image_sample_l v[52:53], 10, 0, 0, 0, 0, 0, 0, 0, v[52:55], s[12:19], s[96:99] ; F0900A00 03033434 s_waitcnt vmcnt(10) ; BF8C077A image_sample_l v[54:55], 10, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[76:83], s[88:91] ; F0900A00 02D33619 image_sample_l v[216:217], 10, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[28:35], s[56:59] ; F0900A00 01C7D819 image_sample_l v[218:219], 10, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[48:55], s[84:87] ; F0900A00 02ACDA19 image_sample_l v[220:221], 10, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[20:27], s[44:47] ; F0900A00 0165DC19 image_sample_l v[25:26], 10, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[12:19], s[96:99] ; F0900A00 03031919 image_sample_l v[222:223], 10, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[76:83], s[88:91] ; F0900A00 02D3DE22 image_sample_l v[224:225], 10, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[28:35], s[56:59] ; F0900A00 01C7E022 image_sample_l v[226:227], 10, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[48:55], s[84:87] ; F0900A00 02ACE222 image_sample_l v[228:229], 10, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[20:27], s[44:47] ; F0900A00 0165E422 image_sample_l v[34:35], 10, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[12:19], s[96:99] ; F0900A00 03032222 image_sample_l v[230:231], 10, 0, 0, 0, 0, 0, 0, 0, v[37:40], s[76:83], s[88:91] ; F0900A00 02D3E625 image_sample_l v[232:233], 10, 0, 0, 0, 0, 0, 0, 0, v[37:40], s[28:35], s[56:59] ; F0900A00 01C7E825 image_sample_l v[234:235], 10, 0, 0, 0, 0, 0, 0, 0, v[37:40], s[48:55], s[84:87] ; F0900A00 02ACEA25 image_sample_l v[236:237], 10, 0, 0, 0, 0, 0, 0, 0, v[37:40], s[20:27], s[44:47] ; F0900A00 0165EC25 image_sample_l v[36:37], 10, 0, 0, 0, 0, 0, 0, 0, v[37:40], s[12:19], s[96:99] ; F0900A00 03032425 s_waitcnt vmcnt(10) ; BF8C077A image_sample_l v[38:39], 10, 0, 0, 0, 0, 0, 0, 0, v[22:25], s[76:83], s[88:91] ; F0900A00 02D32616 image_sample_l v[238:239], 10, 0, 0, 0, 0, 0, 0, 0, v[22:25], s[28:35], s[56:59] ; F0900A00 01C7EE16 image_sample_l v[240:241], 10, 0, 0, 0, 0, 0, 0, 0, v[22:25], s[48:55], s[84:87] ; F0900A00 02ACF016 image_sample_l v[242:243], 10, 0, 0, 0, 0, 0, 0, 0, v[22:25], s[20:27], s[44:47] ; F0900A00 0165F216 image_sample_l v[22:23], 10, 0, 0, 0, 0, 0, 0, 0, v[22:25], s[12:19], s[96:99] ; F0900A00 03031616 s_waitcnt vmcnt(10) ; BF8C077A image_sample_l v[244:245], 10, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[76:83], s[88:91] ; F0900A00 02D3F41F image_sample_l v[246:247], 10, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[28:35], s[56:59] ; F0900A00 01C7F61F image_sample_l v[248:249], 10, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[48:55], s[84:87] ; F0900A00 02ACF81F v_cndmask_b32_e64 v24, 0, 1.0, vcc ; D2000018 01A9E480 v_mov_b32_e32 v27, 0x40400000 ; 7E3602FF 40400000 v_cmp_eq_f32_e64 s[4:5], v15, v27 ; D0040004 0002370F v_cndmask_b32_e64 v15, 0, 1.0, s[4:5] ; D200000F 0011E480 v_readlane_b32 s0, v251, 17 ; 020123FB v_readlane_b32 s1, v251, 18 ; 020325FB s_nop 2 ; BF800002 v_cndmask_b32_e64 v30, 0, 1.0, s[0:1] ; D200001E 0001E480 v_readlane_b32 s0, v251, 13 ; 02011BFB v_readlane_b32 s1, v251, 14 ; 02031DFB s_nop 2 ; BF800002 v_cndmask_b32_e64 v40, 0, 1.0, s[0:1] ; D2000028 0001E480 v_readlane_b32 s0, v251, 15 ; 02011FFB v_readlane_b32 s1, v251, 16 ; 020321FB s_nop 2 ; BF800002 v_cndmask_b32_e64 v250, 0, 1.0, s[0:1] ; D20000FA 0001E480 v_mul_f32_e32 v65, v250, v65 ; 108283FA v_mul_f32_e32 v66, v250, v66 ; 108485FA v_mul_f32_e32 v67, v250, v67 ; 108687FA v_mac_f32_e32 v65, v40, v62 ; 3E827D28 v_mac_f32_e32 v66, v40, v63 ; 3E847F28 v_mac_f32_e32 v67, v40, v64 ; 3E868128 v_mac_f32_e32 v65, v30, v59 ; 3E82771E v_mac_f32_e32 v66, v30, v60 ; 3E84791E v_mac_f32_e32 v67, v30, v61 ; 3E867B1E v_mac_f32_e32 v65, v15, v56 ; 3E82710F v_mac_f32_e32 v66, v15, v57 ; 3E84730F v_mac_f32_e32 v67, v15, v58 ; 3E86750F v_mac_f32_e32 v65, v24, v41 ; 3E825318 v_mac_f32_e32 v66, v24, v42 ; 3E845518 v_mac_f32_e32 v67, v24, v43 ; 3E865718 v_mul_f32_e32 v41, v250, v80 ; 1052A1FA v_mul_f32_e32 v42, v250, v81 ; 1054A3FA v_mul_f32_e32 v43, v250, v82 ; 1056A5FA v_mac_f32_e32 v41, v40, v77 ; 3E529B28 v_mac_f32_e32 v42, v40, v78 ; 3E549D28 v_mac_f32_e32 v43, v40, v79 ; 3E569F28 v_mac_f32_e32 v41, v30, v74 ; 3E52951E v_mac_f32_e32 v42, v30, v75 ; 3E54971E v_mac_f32_e32 v43, v30, v76 ; 3E56991E v_mac_f32_e32 v41, v15, v71 ; 3E528F0F v_mac_f32_e32 v42, v15, v72 ; 3E54910F v_mac_f32_e32 v43, v15, v73 ; 3E56930F v_mac_f32_e32 v41, v24, v68 ; 3E528918 v_mac_f32_e32 v42, v24, v69 ; 3E548B18 v_mac_f32_e32 v43, v24, v70 ; 3E568D18 v_mul_f32_e32 v56, v250, v95 ; 1070BFFA v_mul_f32_e32 v57, v250, v96 ; 1072C1FA v_mul_f32_e32 v58, v250, v97 ; 1074C3FA v_mac_f32_e32 v56, v40, v92 ; 3E70B928 v_mac_f32_e32 v57, v40, v93 ; 3E72BB28 v_mac_f32_e32 v58, v40, v94 ; 3E74BD28 v_mac_f32_e32 v56, v30, v89 ; 3E70B31E v_mac_f32_e32 v57, v30, v90 ; 3E72B51E v_mac_f32_e32 v58, v30, v91 ; 3E74B71E v_mac_f32_e32 v56, v15, v86 ; 3E70AD0F v_mac_f32_e32 v57, v15, v87 ; 3E72AF0F v_mac_f32_e32 v58, v15, v88 ; 3E74B10F v_mac_f32_e32 v56, v24, v83 ; 3E70A718 v_mac_f32_e32 v57, v24, v84 ; 3E72A918 v_mac_f32_e32 v58, v24, v85 ; 3E74AB18 v_readlane_b32 s0, v251, 11 ; 020117FB v_readlane_b32 s1, v251, 12 ; 020319FB s_nop 2 ; BF800002 v_cndmask_b32_e64 v59, 0, 1.0, s[0:1] ; D200003B 0001E480 v_mul_f32_e32 v60, v59, v110 ; 1078DD3B v_mul_f32_e32 v61, v59, v111 ; 107ADF3B v_mul_f32_e32 v62, v59, v112 ; 107CE13B v_readlane_b32 s0, v251, 9 ; 020113FB v_readlane_b32 s1, v251, 10 ; 020315FB s_nop 2 ; BF800002 v_cndmask_b32_e64 v63, 0, 1.0, s[0:1] ; D200003F 0001E480 v_mac_f32_e32 v60, v63, v107 ; 3E78D73F v_mac_f32_e32 v61, v63, v108 ; 3E7AD93F v_mac_f32_e32 v62, v63, v109 ; 3E7CDB3F v_readlane_b32 s0, v251, 7 ; 02010FFB v_readlane_b32 s1, v251, 8 ; 020311FB s_nop 2 ; BF800002 v_cndmask_b32_e64 v64, 0, 1.0, s[0:1] ; D2000040 0001E480 v_mac_f32_e32 v60, v64, v104 ; 3E78D140 v_mac_f32_e32 v61, v64, v105 ; 3E7AD340 v_mac_f32_e32 v62, v64, v106 ; 3E7CD540 v_cmp_eq_f32_e64 s[2:3], v16, v27 ; D0040002 00023710 v_cndmask_b32_e64 v16, 0, 1.0, s[2:3] ; D2000010 0009E480 v_mac_f32_e32 v60, v16, v101 ; 3E78CB10 v_mac_f32_e32 v61, v16, v102 ; 3E7ACD10 v_mac_f32_e32 v62, v16, v103 ; 3E7CCF10 v_readlane_b32 s0, v251, 5 ; 02010BFB v_readlane_b32 s1, v251, 6 ; 02030DFB s_nop 2 ; BF800002 v_cndmask_b32_e64 v68, 0, 1.0, s[0:1] ; D2000044 0001E480 v_mac_f32_e32 v60, v68, v98 ; 3E78C544 v_mac_f32_e32 v61, v68, v99 ; 3E7AC744 v_mac_f32_e32 v62, v68, v100 ; 3E7CC944 v_mul_f32_e32 v69, v59, v125 ; 108AFB3B v_mul_f32_e32 v70, v59, v126 ; 108CFD3B v_mul_f32_e32 v71, v59, v127 ; 108EFF3B v_mac_f32_e32 v69, v63, v122 ; 3E8AF53F v_mac_f32_e32 v70, v63, v123 ; 3E8CF73F v_mac_f32_e32 v71, v63, v124 ; 3E8EF93F v_mac_f32_e32 v69, v64, v119 ; 3E8AEF40 v_mac_f32_e32 v70, v64, v120 ; 3E8CF140 v_mac_f32_e32 v71, v64, v121 ; 3E8EF340 v_mac_f32_e32 v69, v16, v116 ; 3E8AE910 v_mac_f32_e32 v70, v16, v117 ; 3E8CEB10 v_mac_f32_e32 v71, v16, v118 ; 3E8EED10 v_mac_f32_e32 v69, v68, v113 ; 3E8AE344 v_mac_f32_e32 v70, v68, v114 ; 3E8CE544 v_mac_f32_e32 v71, v68, v115 ; 3E8EE744 v_mul_f32_e32 v72, v59, v140 ; 1091193B v_mul_f32_e32 v73, v59, v141 ; 10931B3B v_mul_f32_e32 v74, v59, v142 ; 10951D3B v_mac_f32_e32 v72, v63, v137 ; 3E91133F v_mac_f32_e32 v73, v63, v138 ; 3E93153F v_mac_f32_e32 v74, v63, v139 ; 3E95173F v_mac_f32_e32 v72, v64, v134 ; 3E910D40 v_mac_f32_e32 v73, v64, v135 ; 3E930F40 v_mac_f32_e32 v74, v64, v136 ; 3E951140 v_mac_f32_e32 v72, v16, v131 ; 3E910710 v_mac_f32_e32 v73, v16, v132 ; 3E930910 v_mac_f32_e32 v74, v16, v133 ; 3E950B10 v_mac_f32_e32 v72, v68, v128 ; 3E910144 v_mac_f32_e32 v73, v68, v129 ; 3E930344 v_mac_f32_e32 v74, v68, v130 ; 3E950544 v_cmp_eq_f32_e64 s[0:1], 0, v21 ; D0040000 00022A80 v_cndmask_b32_e64 v75, 0, 1.0, s[0:1] ; D200004B 0001E480 v_mul_f32_e32 v76, v75, v155 ; 1099374B v_mul_f32_e32 v77, v75, v156 ; 109B394B v_mul_f32_e32 v78, v75, v157 ; 109D3B4B v_cmp_eq_f32_e64 s[0:1], 1.0, v21 ; D0040000 00022AF2 v_cndmask_b32_e64 v79, 0, 1.0, s[0:1] ; D200004F 0001E480 v_mac_f32_e32 v76, v79, v152 ; 3E99314F v_mac_f32_e32 v77, v79, v153 ; 3E9B334F v_mac_f32_e32 v78, v79, v154 ; 3E9D354F v_cmp_eq_f32_e64 s[0:1], 2.0, v21 ; D0040000 00022AF4 v_cndmask_b32_e64 v80, 0, 1.0, s[0:1] ; D2000050 0001E480 v_mac_f32_e32 v76, v80, v149 ; 3E992B50 v_mac_f32_e32 v77, v80, v150 ; 3E9B2D50 v_mac_f32_e32 v78, v80, v151 ; 3E9D2F50 v_cmp_eq_f32_e64 s[0:1], v21, v27 ; D0040000 00023715 v_cndmask_b32_e64 v21, 0, 1.0, s[0:1] ; D2000015 0001E480 v_mac_f32_e32 v76, v21, v146 ; 3E992515 v_mac_f32_e32 v77, v21, v147 ; 3E9B2715 v_mac_f32_e32 v78, v21, v148 ; 3E9D2915 v_readlane_b32 s0, v251, 3 ; 020107FB v_readlane_b32 s1, v251, 4 ; 020309FB s_nop 2 ; BF800002 v_cndmask_b32_e64 v81, 0, 1.0, s[0:1] ; D2000051 0001E480 v_mac_f32_e32 v76, v81, v143 ; 3E991F51 v_mac_f32_e32 v77, v81, v144 ; 3E9B2151 v_mac_f32_e32 v78, v81, v145 ; 3E9D2351 v_mul_f32_e32 v82, v75, v170 ; 10A5554B v_mul_f32_e32 v83, v75, v171 ; 10A7574B v_mul_f32_e32 v84, v75, v172 ; 10A9594B v_mac_f32_e32 v82, v79, v167 ; 3EA54F4F v_mac_f32_e32 v83, v79, v168 ; 3EA7514F v_mac_f32_e32 v84, v79, v169 ; 3EA9534F v_mac_f32_e32 v82, v80, v164 ; 3EA54950 v_mac_f32_e32 v83, v80, v165 ; 3EA74B50 v_mac_f32_e32 v84, v80, v166 ; 3EA94D50 v_mac_f32_e32 v82, v21, v161 ; 3EA54315 v_mac_f32_e32 v83, v21, v162 ; 3EA74515 v_mac_f32_e32 v84, v21, v163 ; 3EA94715 v_mac_f32_e32 v82, v81, v158 ; 3EA53D51 v_mac_f32_e32 v83, v81, v159 ; 3EA73F51 v_mac_f32_e32 v84, v81, v160 ; 3EA94151 v_mul_f32_e32 v85, v75, v185 ; 10AB734B v_mul_f32_e32 v86, v75, v186 ; 10AD754B v_mul_f32_e32 v87, v75, v187 ; 10AF774B v_mac_f32_e32 v85, v79, v182 ; 3EAB6D4F v_mac_f32_e32 v86, v79, v183 ; 3EAD6F4F v_mac_f32_e32 v87, v79, v184 ; 3EAF714F v_mac_f32_e32 v85, v80, v179 ; 3EAB6750 v_mac_f32_e32 v86, v80, v180 ; 3EAD6950 v_mac_f32_e32 v87, v80, v181 ; 3EAF6B50 v_mac_f32_e32 v85, v21, v176 ; 3EAB6115 v_mac_f32_e32 v86, v21, v177 ; 3EAD6315 v_mac_f32_e32 v87, v21, v178 ; 3EAF6515 v_mac_f32_e32 v85, v81, v173 ; 3EAB5B51 v_mac_f32_e32 v86, v81, v174 ; 3EAD5D51 v_mac_f32_e32 v87, v81, v175 ; 3EAF5F51 v_mul_f32_e32 v44, v250, v44 ; 105859FA v_mul_f32_e32 v45, v250, v45 ; 105A5BFA v_mac_f32_e32 v44, v40, v194 ; 3E598528 v_mac_f32_e32 v45, v40, v195 ; 3E5B8728 v_mul_f32_e32 v28, v250, v28 ; 103839FA v_mul_f32_e32 v29, v250, v29 ; 103A3BFA v_mac_f32_e32 v28, v40, v200 ; 3E399128 v_mac_f32_e32 v29, v40, v201 ; 3E3B9328 v_mul_f32_e32 v48, v250, v48 ; 106061FA v_mul_f32_e32 v49, v250, v49 ; 106263FA v_mac_f32_e32 v48, v40, v208 ; 3E61A128 v_mac_f32_e32 v49, v40, v209 ; 3E63A328 v_mac_f32_e32 v44, v30, v192 ; 3E59811E v_mac_f32_e32 v45, v30, v193 ; 3E5B831E v_mac_f32_e32 v28, v30, v198 ; 3E398D1E v_mac_f32_e32 v29, v30, v199 ; 3E3B8F1E v_mac_f32_e32 v48, v30, v206 ; 3E619D1E v_mac_f32_e32 v49, v30, v207 ; 3E639F1E v_mac_f32_e32 v44, v15, v190 ; 3E597D0F v_mac_f32_e32 v45, v15, v191 ; 3E5B7F0F v_mac_f32_e32 v28, v15, v196 ; 3E39890F v_mac_f32_e32 v29, v15, v197 ; 3E3B8B0F v_mac_f32_e32 v48, v15, v204 ; 3E61990F v_mac_f32_e32 v49, v15, v205 ; 3E639B0F v_mac_f32_e32 v44, v24, v188 ; 3E597918 v_mac_f32_e32 v45, v24, v189 ; 3E5B7B18 v_mac_f32_e32 v28, v24, v46 ; 3E385D18 v_mac_f32_e32 v29, v24, v47 ; 3E3A5F18 v_mac_f32_e32 v48, v24, v202 ; 3E619518 v_mac_f32_e32 v49, v24, v203 ; 3E639718 v_mul_f32_e32 v15, v59, v52 ; 101E693B v_mul_f32_e32 v24, v59, v53 ; 10306B3B v_mac_f32_e32 v15, v63, v214 ; 3E1FAD3F v_mac_f32_e32 v24, v63, v215 ; 3E31AF3F v_mul_f32_e32 v25, v59, v25 ; 1032333B v_mul_f32_e32 v26, v59, v26 ; 1034353B v_mac_f32_e32 v25, v63, v220 ; 3E33B93F v_mac_f32_e32 v26, v63, v221 ; 3E35BB3F v_mul_f32_e32 v30, v59, v34 ; 103C453B v_mul_f32_e32 v34, v59, v35 ; 1044473B v_mac_f32_e32 v30, v63, v228 ; 3E3DC93F v_mac_f32_e32 v34, v63, v229 ; 3E45CB3F v_mac_f32_e32 v15, v64, v212 ; 3E1FA940 v_mac_f32_e32 v24, v64, v213 ; 3E31AB40 v_mac_f32_e32 v25, v64, v218 ; 3E33B540 v_mac_f32_e32 v26, v64, v219 ; 3E35B740 v_mac_f32_e32 v30, v64, v226 ; 3E3DC540 v_mac_f32_e32 v34, v64, v227 ; 3E45C740 v_mac_f32_e32 v15, v16, v210 ; 3E1FA510 v_mac_f32_e32 v24, v16, v211 ; 3E31A710 v_mac_f32_e32 v25, v16, v216 ; 3E33B110 v_mac_f32_e32 v26, v16, v217 ; 3E35B310 v_mac_f32_e32 v30, v16, v224 ; 3E3DC110 v_mac_f32_e32 v34, v16, v225 ; 3E45C310 v_mac_f32_e32 v15, v68, v50 ; 3E1E6544 v_mac_f32_e32 v24, v68, v51 ; 3E306744 v_mac_f32_e32 v25, v68, v54 ; 3E326D44 v_mac_f32_e32 v26, v68, v55 ; 3E346F44 v_mac_f32_e32 v30, v68, v222 ; 3E3DBD44 v_mac_f32_e32 v34, v68, v223 ; 3E45BF44 s_waitcnt vmcnt(8) ; BF8C0778 v_mul_f32_e32 v16, v75, v36 ; 1020494B v_mul_f32_e32 v35, v75, v37 ; 10464B4B v_mac_f32_e32 v16, v79, v236 ; 3E21D94F v_mac_f32_e32 v35, v79, v237 ; 3E47DB4F s_waitcnt vmcnt(3) ; BF8C0773 v_mul_f32_e32 v22, v75, v22 ; 102C2D4B v_mul_f32_e32 v23, v75, v23 ; 102E2F4B v_mac_f32_e32 v22, v79, v242 ; 3E2DE54F v_mac_f32_e32 v23, v79, v243 ; 3E2FE74F image_sample_l v[36:37], 10, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[20:27], s[44:47] ; F0900A00 0165241F image_sample_l v[31:32], 10, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[12:19], s[96:99] ; F0900A00 03031F1F s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v31, v75, v31 ; 103E3F4B v_mul_f32_e32 v32, v75, v32 ; 1040414B v_mac_f32_e32 v31, v79, v36 ; 3E3E494F v_mac_f32_e32 v32, v79, v37 ; 3E404B4F v_mac_f32_e32 v16, v80, v234 ; 3E21D550 v_mac_f32_e32 v35, v80, v235 ; 3E47D750 v_mac_f32_e32 v22, v80, v240 ; 3E2DE150 v_mac_f32_e32 v23, v80, v241 ; 3E2FE350 v_mac_f32_e32 v31, v80, v248 ; 3E3FF150 v_mac_f32_e32 v32, v80, v249 ; 3E41F350 v_mac_f32_e32 v16, v21, v232 ; 3E21D115 v_mac_f32_e32 v35, v21, v233 ; 3E47D315 v_mac_f32_e32 v22, v21, v238 ; 3E2DDD15 v_mac_f32_e32 v23, v21, v239 ; 3E2FDF15 v_mac_f32_e32 v31, v21, v246 ; 3E3FED15 v_mac_f32_e32 v32, v21, v247 ; 3E41EF15 v_mac_f32_e32 v16, v81, v230 ; 3E21CD51 v_mac_f32_e32 v35, v81, v231 ; 3E47CF51 v_max_f32_e32 v18, v20, v18 ; 20242514 v_max_f32_e32 v19, v20, v19 ; 20262714 v_add_f32_e32 v20, v18, v17 ; 06282312 v_add_f32_e32 v20, v19, v20 ; 06282913 v_rcp_f32_e32 v20, v20 ; 7E285514 v_mac_f32_e32 v22, v81, v38 ; 3E2C4D51 v_mac_f32_e32 v23, v81, v39 ; 3E2E4F51 v_mac_f32_e32 v31, v81, v244 ; 3E3FE951 v_mac_f32_e32 v32, v81, v245 ; 3E41EB51 v_mul_f32_e32 v17, v20, v17 ; 10222314 v_mul_f32_e32 v18, v20, v18 ; 10242514 v_mul_f32_e32 v19, v20, v19 ; 10262714 v_mul_f32_e32 v20, v19, v60 ; 10287913 v_mac_f32_e32 v20, v17, v69 ; 3E288B11 v_mul_f32_e32 v21, v19, v61 ; 102A7B13 v_mac_f32_e32 v21, v17, v70 ; 3E2A8D11 v_mul_f32_e32 v33, v19, v62 ; 10427D13 v_mac_f32_e32 v33, v17, v71 ; 3E428F11 v_mac_f32_e32 v20, v18, v72 ; 3E289112 v_mac_f32_e32 v21, v18, v73 ; 3E2A9312 v_mac_f32_e32 v33, v18, v74 ; 3E429512 v_mul_f32_e32 v36, v19, v65 ; 10488313 v_mac_f32_e32 v36, v17, v41 ; 3E485311 v_mul_f32_e32 v37, v19, v66 ; 104A8513 v_mac_f32_e32 v37, v17, v42 ; 3E4A5511 v_mul_f32_e32 v38, v19, v67 ; 104C8713 v_mac_f32_e32 v38, v17, v43 ; 3E4C5711 v_mac_f32_e32 v36, v18, v56 ; 3E487112 v_mac_f32_e32 v37, v18, v57 ; 3E4A7312 v_mac_f32_e32 v38, v18, v58 ; 3E4C7512 v_mul_f32_e32 v36, v36, v7 ; 10480F24 v_mac_f32_e32 v36, v20, v8 ; 3E481114 v_mul_f32_e32 v20, v37, v7 ; 10280F25 v_mac_f32_e32 v20, v21, v8 ; 3E281115 v_mul_f32_e32 v21, v38, v7 ; 102A0F26 v_mac_f32_e32 v21, v33, v8 ; 3E2A1121 v_mad_f32 v33, 2.0, v44, -1.0 ; D2820021 03CE58F4 v_mad_f32 v29, 2.0, v29, -1.0 ; D282001D 03CE3AF4 v_mul_f32_e32 v33, v17, v33 ; 10424311 v_mac_f32_e32 v33, v18, v29 ; 3E423B12 v_mad_f32 v28, 2.0, v28, -1.0 ; D282001C 03CE38F4 v_mad_f32 v29, 2.0, v49, -1.0 ; D282001D 03CE62F4 v_mul_f32_e32 v37, 0, v17 ; 104A2280 v_mad_f32 v28, v18, v28, v37 ; D282001C 04963912 v_mac_f32_e32 v28, v19, v29 ; 3E383B13 v_mad_f32 v29, 2.0, v45, -1.0 ; D282001D 03CE5AF4 v_mad_f32 v38, 2.0, v48, -1.0 ; D2820026 03CE60F4 v_mul_f32_e32 v29, v17, v29 ; 103A3B11 v_mac_f32_e32 v29, 0, v18 ; 3E3A2480 v_mac_f32_e32 v29, v19, v38 ; 3E3A4D13 v_mad_f32 v15, 2.0, v15, -1.0 ; D282000F 03CE1EF4 v_mad_f32 v26, 2.0, v26, -1.0 ; D282001A 03CE34F4 v_mul_f32_e32 v15, v17, v15 ; 101E1F11 v_mac_f32_e32 v15, v18, v26 ; 3E1E3512 v_mad_f32 v25, 2.0, v25, -1.0 ; D2820019 03CE32F4 v_mad_f32 v26, 2.0, v34, -1.0 ; D282001A 03CE44F4 v_mad_f32 v25, v18, v25, v37 ; D2820019 04963312 v_mac_f32_e32 v25, v19, v26 ; 3E323513 v_mad_f32 v24, 2.0, v24, -1.0 ; D2820018 03CE30F4 v_mad_f32 v26, 2.0, v30, -1.0 ; D282001A 03CE3CF4 v_mul_f32_e32 v24, v17, v24 ; 10303111 v_mac_f32_e32 v24, 0, v18 ; 3E302480 v_mac_f32_e32 v24, v19, v26 ; 3E303513 v_mac_f32_e32 v33, 0, v19 ; 3E422680 v_mul_f32_e32 v26, v28, v7 ; 10340F1C v_mul_f32_e32 v28, v29, v7 ; 10380F1D v_mul_f32_e32 v7, v33, v7 ; 100E0F21 v_mac_f32_e32 v26, v25, v8 ; 3E341119 v_mac_f32_e32 v28, v24, v8 ; 3E381118 v_mac_f32_e32 v15, 0, v19 ; 3E1E2680 v_mac_f32_e32 v7, v15, v8 ; 3E0E110F v_mul_f32_e32 v8, v19, v76 ; 10109913 v_mac_f32_e32 v8, v17, v82 ; 3E10A511 v_mul_f32_e32 v15, v19, v77 ; 101E9B13 v_mac_f32_e32 v15, v17, v83 ; 3E1EA711 v_mul_f32_e32 v24, v19, v78 ; 10309D13 v_mac_f32_e32 v24, v17, v84 ; 3E30A911 v_mac_f32_e32 v8, v18, v85 ; 3E10AB12 v_mac_f32_e32 v15, v18, v86 ; 3E1EAD12 v_mac_f32_e32 v24, v18, v87 ; 3E30AF12 v_mad_f32 v25, 2.0, v35, -1.0 ; D2820019 03CE46F4 v_mad_f32 v16, 2.0, v16, -1.0 ; D2820010 03CE20F4 v_mul_f32_e32 v25, v17, v25 ; 10323311 v_mul_f32_e32 v16, v17, v16 ; 10202111 v_mad_f32 v17, 2.0, v22, -1.0 ; D2820011 03CE2CF4 v_mac_f32_e32 v37, v18, v17 ; 3E4A2312 v_mad_f32 v17, 2.0, v23, -1.0 ; D2820011 03CE2EF4 v_mac_f32_e32 v16, v18, v17 ; 3E202312 v_mac_f32_e32 v25, 0, v18 ; 3E322480 v_mad_f32 v17, 2.0, v32, -1.0 ; D2820011 03CE40F4 v_mac_f32_e32 v37, v19, v17 ; 3E4A2313 v_mad_f32 v17, 2.0, v31, -1.0 ; D2820011 03CE3EF4 v_mac_f32_e32 v25, v19, v17 ; 3E322313 v_mac_f32_e32 v16, 0, v19 ; 3E202680 v_mac_f32_e32 v26, v37, v4 ; 3E340925 v_mac_f32_e32 v28, v25, v4 ; 3E380919 v_mac_f32_e32 v7, v16, v4 ; 3E0E0910 v_mul_f32_e32 v16, v28, v28 ; 1020391C v_mac_f32_e32 v16, v26, v26 ; 3E20351A v_mac_f32_e32 v16, v7, v7 ; 3E200F07 v_add_f32_e32 v16, 1.0, v16 ; 062020F2 v_rsq_clamp_f32_e32 v16, v16 ; 7E205910 v_mac_f32_e32 v36, v8, v4 ; 3E480908 v_mac_f32_e32 v20, v15, v4 ; 3E28090F v_mac_f32_e32 v21, v24, v4 ; 3E2A0918 v_mul_f32_e32 v4, v16, v26 ; 10083510 v_mul_f32_e32 v8, v16, v28 ; 10103910 v_mul_f32_e32 v7, v16, v7 ; 100E0F10 v_mad_f32 v4, -v4, v3, v5 ; D2820004 24160704 v_mad_f32 v5, -v8, v3, v6 ; D2820005 241A0708 v_mad_f32 v3, -v7, v3, v9 ; D2820003 24260707 v_mul_f32_e32 v6, v4, v4 ; 100C0904 v_mac_f32_e32 v6, v5, v5 ; 3E0C0B05 v_mac_f32_e32 v6, v3, v3 ; 3E0C0703 v_rsq_clamp_f32_e32 v6, v6 ; 7E0C5906 v_mul_f32_e32 v7, v13, v13 ; 100E1B0D v_mac_f32_e32 v7, v14, v14 ; 3E0E1D0E v_mac_f32_e32 v7, v12, v12 ; 3E0E190C v_rsq_clamp_f32_e32 v7, v7 ; 7E0E5907 v_mul_f32_e32 v4, v6, v4 ; 10080906 v_mul_f32_e32 v5, v6, v5 ; 100A0B06 v_mul_f32_e32 v3, v6, v3 ; 10060706 v_mul_f32_e32 v6, v7, v13 ; 100C1B07 v_mul_f32_e32 v8, v7, v14 ; 10101D07 v_mul_f32_e32 v7, v7, v12 ; 100E1907 v_mul_f32_e32 v6, v6, v4 ; 100C0906 v_mac_f32_e32 v6, v8, v5 ; 3E0C0B08 v_mac_f32_e32 v6, v7, v3 ; 3E0C0707 v_max_f32_e32 v6, 0x38d1b717, v6 ; 200C0CFF 38D1B717 v_log_f32_e32 v6, v6 ; 7E0C4F06 v_mul_f32_e32 v4, v11, v4 ; 1008090B v_mac_f32_e32 v4, v10, v5 ; 3E080B0A v_mul_f32_e32 v5, 0x42000000, v2 ; 100A04FF 42000000 v_mul_legacy_f32_e32 v5, v5, v6 ; 0E0A0D05 v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_add_f32_e64 v5, 0, v5 clamp ; D2060805 00020A80 v_mac_f32_e32 v27, -2.0, v5 ; 3E360AF5 v_mul_f32_e32 v6, v27, v5 ; 100C0B1B v_mul_f32_e32 v5, v6, v5 ; 100A0B06 v_mul_f32_e32 v2, v2, v5 ; 10040B02 v_mul_f32_e32 v5, s100, v36 ; 100A4864 v_mac_f32_e32 v4, v1, v3 ; 3E080701 v_mul_f32_e32 v1, s101, v20 ; 10022865 v_readlane_b32 s1, v251, 19 ; 020327FB s_nop 2 ; BF800002 v_mul_f32_e32 v3, s1, v21 ; 10062A01 v_add_f32_e64 v4, 0, v4 clamp ; D2060804 00020880 v_readlane_b32 s0, v251, 0 ; 020101FB s_nop 2 ; BF800002 v_add_f32_e64 v6, s0, s0 ; D2060006 00000000 v_readlane_b32 s0, v251, 1 ; 020103FB s_nop 2 ; BF800002 v_add_f32_e64 v7, s0, s0 ; D2060007 00000000 v_readlane_b32 s0, v251, 2 ; 020105FB s_nop 2 ; BF800002 v_add_f32_e64 v8, s0, s0 ; D2060008 00000000 v_readlane_b32 s0, v251, 20 ; 020129FB s_nop 2 ; BF800002 v_max_f32_e32 v6, s0, v6 ; 200C0C00 v_readlane_b32 s0, v251, 21 ; 02012BFB s_nop 2 ; BF800002 v_max_f32_e32 v7, s0, v7 ; 200E0E00 v_readlane_b32 s0, v251, 22 ; 02012DFB s_nop 2 ; BF800002 v_max_f32_e32 v8, s0, v8 ; 20101000 v_min_f32_e32 v6, 1.0, v6 ; 1E0C0CF2 v_min_f32_e32 v7, 1.0, v7 ; 1E0E0EF2 v_min_f32_e32 v8, 1.0, v8 ; 1E1010F2 v_mul_f32_e32 v6, v36, v6 ; 100C0D24 v_mul_f32_e32 v7, v20, v7 ; 100E0F14 v_mul_f32_e32 v8, v21, v8 ; 10101115 v_mac_f32_e32 v6, v4, v5 ; 3E0C0B04 v_mac_f32_e32 v7, v4, v1 ; 3E0E0304 v_mac_f32_e32 v8, v4, v3 ; 3E100704 v_mac_f32_e32 v6, s100, v2 ; 3E0C0464 v_mac_f32_e32 v7, s101, v2 ; 3E0E0465 v_mac_f32_e32 v8, s1, v2 ; 3E100401 v_mul_f32_e32 v1, 0.5, v6 ; 10020CF0 v_mul_f32_e32 v2, 0.5, v7 ; 10040EF0 v_mul_f32_e32 v3, 0.5, v8 ; 100610F0 v_mul_f32_e32 v1, v0, v1 ; 10020300 v_mul_f32_e32 v2, v0, v2 ; 10040500 v_mul_f32_e32 v0, v0, v3 ; 10000700 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_cvt_pkrtz_f16_f32_e64 v0, v0, 1.0 ; D25E0000 0001E500 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 104 VGPRS: 252 Code Size: 4776 bytes LDS: 0 blocks Scratch: 12288 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..7] DCL TEMP[0..1], LOCAL 0: MUL TEMP[0], CONST[4], IN[0].xxxx 1: MAD TEMP[0], CONST[5], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[6], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[7], IN[0].wwww, TEMP[0] 4: MUL TEMP[1], CONST[0], IN[0].xxxx 5: MAD TEMP[1], CONST[1], IN[0].yyyy, TEMP[1] 6: MAD TEMP[1], CONST[2], IN[0].zzzz, TEMP[1] 7: MAD TEMP[1].xyz, CONST[3], IN[0].wwww, TEMP[1] 8: MOV TEMP[1].yzw, TEMP[1].yxyz 9: MOV TEMP[1].x, TEMP[0].zzzz 10: MOV OUT[1], TEMP[0] 11: MOV OUT[2], TEMP[1] 12: MOV OUT[0], TEMP[0] 13: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %41 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0 %43 = add i32 %5, %7 %44 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %43) %45 = extractelement <4 x float> %44, i32 0 %46 = extractelement <4 x float> %44, i32 1 %47 = extractelement <4 x float> %44, i32 2 %48 = extractelement <4 x float> %44, i32 3 %49 = fmul float %25, %45 %50 = fmul float %26, %45 %51 = fmul float %27, %45 %52 = fmul float %28, %45 %53 = fmul float %29, %46 %54 = fadd float %53, %49 %55 = fmul float %30, %46 %56 = fadd float %55, %50 %57 = fmul float %31, %46 %58 = fadd float %57, %51 %59 = fmul float %32, %46 %60 = fadd float %59, %52 %61 = fmul float %33, %47 %62 = fadd float %61, %54 %63 = fmul float %34, %47 %64 = fadd float %63, %56 %65 = fmul float %35, %47 %66 = fadd float %65, %58 %67 = fmul float %36, %47 %68 = fadd float %67, %60 %69 = fmul float %37, %48 %70 = fadd float %69, %62 %71 = fmul float %38, %48 %72 = fadd float %71, %64 %73 = fmul float %39, %48 %74 = fadd float %73, %66 %75 = fmul float %40, %48 %76 = fadd float %75, %68 %77 = fmul float %13, %45 %78 = fmul float %14, %45 %79 = fmul float %15, %45 %80 = fmul float %16, %46 %81 = fadd float %80, %77 %82 = fmul float %17, %46 %83 = fadd float %82, %78 %84 = fmul float %18, %46 %85 = fadd float %84, %79 %86 = fmul float %19, %47 %87 = fadd float %86, %81 %88 = fmul float %20, %47 %89 = fadd float %88, %83 %90 = fmul float %21, %47 %91 = fadd float %90, %85 %92 = fmul float %22, %48 %93 = fadd float %92, %87 %94 = fmul float %23, %48 %95 = fadd float %94, %89 %96 = fmul float %24, %48 %97 = fadd float %96, %91 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %70, float %72, float %74, float %76) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %74, float %93, float %95, float %97) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %70, float %72, float %74, float %76) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x4 ; C2038104 s_buffer_load_dword s8, s[0:3], 0x5 ; C2040105 s_buffer_load_dword s9, s[0:3], 0x6 ; C2048106 s_buffer_load_dword s10, s[0:3], 0x8 ; C2050108 s_buffer_load_dword s11, s[0:3], 0x9 ; C2058109 s_buffer_load_dword s12, s[0:3], 0xa ; C206010A s_buffer_load_dword s13, s[0:3], 0xc ; C206810C s_buffer_load_dword s14, s[0:3], 0xd ; C207010D s_buffer_load_dword s15, s[0:3], 0xe ; C207810E s_buffer_load_dword s16, s[0:3], 0x10 ; C2080110 s_buffer_load_dword s17, s[0:3], 0x11 ; C2088111 s_buffer_load_dword s18, s[0:3], 0x12 ; C2090112 s_buffer_load_dword s19, s[0:3], 0x13 ; C2098113 s_buffer_load_dword s20, s[0:3], 0x14 ; C20A0114 s_buffer_load_dword s21, s[0:3], 0x15 ; C20A8115 s_buffer_load_dword s22, s[0:3], 0x16 ; C20B0116 s_buffer_load_dword s23, s[0:3], 0x17 ; C20B8117 s_buffer_load_dword s24, s[0:3], 0x18 ; C20C0118 s_buffer_load_dword s25, s[0:3], 0x19 ; C20C8119 s_buffer_load_dword s26, s[0:3], 0x1a ; C20D011A s_buffer_load_dword s27, s[0:3], 0x1b ; C20D811B s_buffer_load_dword s28, s[0:3], 0x1c ; C20E011C s_buffer_load_dword s29, s[0:3], 0x1d ; C20E811D s_buffer_load_dword s30, s[0:3], 0x1e ; C20F011E s_buffer_load_dword s0, s[0:3], 0x1f ; C200011F s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s16, v0 ; 10080010 v_mac_f32_e32 v4, s20, v1 ; 3E080214 v_mul_f32_e32 v5, s17, v0 ; 100A0011 v_mac_f32_e32 v5, s21, v1 ; 3E0A0215 v_mul_f32_e32 v6, s18, v0 ; 100C0012 v_mac_f32_e32 v6, s22, v1 ; 3E0C0216 v_mul_f32_e32 v7, s19, v0 ; 100E0013 v_mac_f32_e32 v7, s23, v1 ; 3E0E0217 v_mul_f32_e32 v8, s4, v0 ; 10100004 v_mac_f32_e32 v8, s7, v1 ; 3E100207 v_mul_f32_e32 v9, s5, v0 ; 10120005 v_mac_f32_e32 v9, s8, v1 ; 3E120208 v_mul_f32_e32 v0, s6, v0 ; 10000006 v_mac_f32_e32 v0, s9, v1 ; 3E000209 v_mac_f32_e32 v4, s24, v2 ; 3E080418 v_mac_f32_e32 v5, s25, v2 ; 3E0A0419 v_mac_f32_e32 v6, s26, v2 ; 3E0C041A v_mac_f32_e32 v7, s27, v2 ; 3E0E041B v_mac_f32_e32 v8, s10, v2 ; 3E10040A v_mac_f32_e32 v9, s11, v2 ; 3E12040B v_mac_f32_e32 v0, s12, v2 ; 3E00040C v_mac_f32_e32 v4, s28, v3 ; 3E08061C v_mac_f32_e32 v5, s29, v3 ; 3E0A061D v_mac_f32_e32 v6, s30, v3 ; 3E0C061E v_mac_f32_e32 v7, s0, v3 ; 3E0E0600 v_mac_f32_e32 v8, s13, v3 ; 3E10060D v_mac_f32_e32 v9, s14, v3 ; 3E12060E v_mac_f32_e32 v0, s15, v3 ; 3E00060F exp 15, 32, 0, 0, 0, v4, v5, v6, v7 ; F800020F 07060504 exp 15, 33, 0, 0, 0, v6, v8, v9, v0 ; F800021F 00090806 exp 15, 12, 0, 1, 0, v4, v5, v6, v7 ; F80008CF 07060504 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 12 Code Size: 284 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL CONST[0..4] DCL CONST[6] DCL CONST[9..13] DCL TEMP[0..8], LOCAL IMM[0] FLT32 { 0.5000, 2.0000, 1.0000, 0.0000} IMM[1] FLT32 { -1.0000, -0.5000, 8.0000, 0.8000} IMM[2] FLT32 { 4.0000, 0.0000, 0.0000, 0.0000} 0: MUL TEMP[0].xyw, IN[0], IMM[0].xxxx 1: MOV TEMP[1].x, TEMP[0].xxxx 2: MUL TEMP[2].x, TEMP[0].yyyy, CONST[2].xxxx 3: MOV TEMP[1].y, TEMP[2].xxxx 4: ADD TEMP[0].xy, TEMP[1].xyyy, TEMP[0].wwww 5: MUL TEMP[1].x, CONST[9].xxxx, IMM[0].yyyy 6: MUL TEMP[2].xy, IN[1].ywww, TEMP[1].xxxx 7: MUL TEMP[3].x, IMM[0].xxxx, CONST[0].xxxx 8: MUL TEMP[4].x, IMM[0].yyyy, TEMP[3].xxxx 9: ADD TEMP[5].x, TEMP[2].xxxx, IMM[0].xxxx 10: MOV TEMP[5].y, TEMP[2].yyyy 11: MOV TEMP[6].x, -TEMP[2].xxxx 12: MOV TEMP[6].y, TEMP[2].yyyy 13: MOV TEMP[7].x, TEMP[2].xxxx 14: ADD TEMP[8].x, TEMP[2].yyyy, IMM[0].xxxx 15: MOV TEMP[7].y, TEMP[8].xxxx 16: MOV TEMP[8].x, TEMP[2].xxxx 17: MOV TEMP[8].y, -TEMP[2].yyyy 18: MAD TEMP[2].xy, IMM[0].zwww, TEMP[4].xxxx, TEMP[5].xyyy 19: MOV TEMP[2].xy, TEMP[2].xyyy 20: TEX TEMP[2], TEMP[2], SAMP[1], 2D 21: MAD TEMP[5].xy, IMM[0].zwww, TEMP[4].xxxx, TEMP[6].xyyy 22: MOV TEMP[5].xy, TEMP[5].xyyy 23: TEX TEMP[5], TEMP[5], SAMP[1], 2D 24: ADD TEMP[2], TEMP[2], TEMP[5] 25: MAD TEMP[5].xy, IMM[0].wzzz, TEMP[4].xxxx, TEMP[7].xyyy 26: MOV TEMP[5].xy, TEMP[5].xyyy 27: TEX TEMP[5], TEMP[5], SAMP[1], 2D 28: MAD TEMP[4].xy, IMM[0].wzzz, TEMP[4].xxxx, TEMP[8].xyyy 29: MOV TEMP[4].xy, TEMP[4].xyyy 30: TEX TEMP[4], TEMP[4], SAMP[1], 2D 31: ADD TEMP[4], TEMP[5], TEMP[4] 32: ADD TEMP[2].xy, TEMP[2], TEMP[4] 33: MAD TEMP[4].xy, TEMP[2].xyyy, IMM[0].xxxx, IMM[1].xxxx 34: MUL TEMP[2].xy, TEMP[4].xyyy, IMM[0].yyyy 35: MOV TEMP[2].z, IMM[0].zzzz 36: MUL TEMP[1].xy, IN[1].ywww, TEMP[1].xxxx 37: MUL TEMP[3].x, IMM[0].yyyy, TEMP[3].xxxx 38: ADD TEMP[4].x, TEMP[1].xxxx, IMM[0].xxxx 39: MOV TEMP[4].y, TEMP[1].yyyy 40: MOV TEMP[5].x, -TEMP[1].xxxx 41: MOV TEMP[5].y, TEMP[1].yyyy 42: MOV TEMP[6].x, TEMP[1].xxxx 43: ADD TEMP[7].x, TEMP[1].yyyy, IMM[0].xxxx 44: MOV TEMP[6].y, TEMP[7].xxxx 45: MOV TEMP[7].x, TEMP[1].xxxx 46: MOV TEMP[7].y, -TEMP[1].yyyy 47: MAD TEMP[1].xy, IMM[0].zwww, TEMP[3].xxxx, TEMP[4].xyyy 48: MOV TEMP[1].xy, TEMP[1].xyyy 49: TEX TEMP[1], TEMP[1], SAMP[1], 2D 50: MAD TEMP[4].xy, IMM[0].zwww, TEMP[3].xxxx, TEMP[5].xyyy 51: MOV TEMP[4].xy, TEMP[4].xyyy 52: TEX TEMP[4], TEMP[4], SAMP[1], 2D 53: ADD TEMP[1], TEMP[1], TEMP[4] 54: MAD TEMP[4].xy, IMM[0].wzzz, TEMP[3].xxxx, TEMP[6].xyyy 55: MOV TEMP[4].xy, TEMP[4].xyyy 56: TEX TEMP[4], TEMP[4], SAMP[1], 2D 57: MAD TEMP[3].xy, IMM[0].wzzz, TEMP[3].xxxx, TEMP[7].xyyy 58: MOV TEMP[3].xy, TEMP[3].xyyy 59: TEX TEMP[3], TEMP[3], SAMP[1], 2D 60: ADD TEMP[3], TEMP[4], TEMP[3] 61: ADD TEMP[1].xy, TEMP[1], TEMP[3] 62: MAD TEMP[3].xy, TEMP[1].xyyy, IMM[0].xxxx, IMM[1].xxxx 63: MUL TEMP[1].xy, TEMP[3].xyyy, IMM[0].yyyy 64: MOV TEMP[1].z, IMM[0].zzzz 65: MAX TEMP[3].x, IN[0].wwww, IMM[0].wwww 66: MUL TEMP[4].x, CONST[12].xxxx, IMM[0].xxxx 67: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[4].xxxx 68: POW TEMP[3].x, TEMP[3].xxxx, IMM[0].xxxx 69: MOV_SAT TEMP[3].x, TEMP[3].xxxx 70: MOV TEMP[4].zw, IN[0].wwzw 71: DP3 TEMP[5].x, TEMP[1].xyzz, TEMP[1].xyzz 72: RSQ TEMP[5].x, TEMP[5].xxxx 73: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[5].xxxx 74: DP3 TEMP[5].x, TEMP[2].xyzz, TEMP[2].xyzz 75: RSQ TEMP[5].x, TEMP[5].xxxx 76: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[5].xxxx 77: MUL TEMP[5].x, IMM[0].yyyy, CONST[0].xxxx 78: FRC TEMP[5].x, TEMP[5].xxxx 79: LRP TEMP[1].xyz, TEMP[5].xxxx, TEMP[2].xzyy, TEMP[1].xzyy 80: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz 81: RSQ TEMP[2].x, TEMP[2].xxxx 82: MOV TEMP[0].xy, TEMP[0].xyyy 83: MOV TEMP[0].w, IN[0].wwww 84: TXP TEMP[0].xyz, TEMP[0], SAMP[2], 2D 85: ADD TEMP[0].xyz, TEMP[0].xzyy, IMM[1].yxyy 86: MAD TEMP[0].xz, TEMP[1].xyzz, TEMP[2].xxxx, TEMP[0].xyzz 87: MUL TEMP[0].xy, TEMP[0].xzzz, CONST[11].xxxx 88: MUL TEMP[1].xy, CONST[6].xyyy, IMM[1].zzzz 89: MUL TEMP[0].xy, TEMP[0].xyyy, TEMP[1].xyyy 90: MOV_SAT TEMP[1].x, TEMP[3].xxxx 91: MUL TEMP[0].xy, TEMP[0].xyyy, TEMP[1].xxxx 92: POW TEMP[1].x, IN[0].zzzz, IMM[1].wwww 93: MAD TEMP[4].xy, TEMP[0].xyyy, TEMP[1].xxxx, IN[0].xyyy 94: MUL TEMP[0].xyw, TEMP[4], IMM[0].xxxx 95: ADD TEMP[0].xy, TEMP[0].xyyy, TEMP[0].wwww 96: ADD TEMP[1].xyz, IN[1].yzww, -CONST[1].xyzz 97: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz 98: RSQ TEMP[2].x, TEMP[2].xxxx 99: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx 100: MOV_SAT TEMP[2].x, TEMP[1].yyyy 101: POW TEMP[2].x, TEMP[2].xxxx, CONST[12].zzzz 102: MOV TEMP[0].xy, TEMP[0].xyyy 103: MOV TEMP[0].w, IN[0].wwww 104: TXP TEMP[0].xyz, TEMP[0], SAMP[0], 2D 105: DP3 TEMP[1].x, TEMP[1].xyzz, -CONST[13].xyzz 106: MOV_SAT TEMP[1].x, TEMP[1].xxxx 107: MUL TEMP[4].x, CONST[12].zzzz, IMM[2].xxxx 108: POW TEMP[1].x, TEMP[1].xxxx, TEMP[4].xxxx 109: MAD TEMP[1].x, TEMP[1].xxxx, CONST[12].zzzz, IMM[0].zzzz 110: ADD TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx 111: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xxxx 112: ADD TEMP[1].x, TEMP[3].xxxx, CONST[12].yyyy 113: MOV_SAT TEMP[1].x, TEMP[1].xxxx 114: LRP TEMP[0].xyz, TEMP[1].xxxx, CONST[10].xyzz, TEMP[0].xyzz 115: ADD TEMP[1].x, IMM[0].zzzz, -TEMP[2].xxxx 116: MOV_SAT TEMP[1].x, TEMP[1].xxxx 117: LRP TEMP[0].xyz, TEMP[1].xxxx, CONST[10].xyzz, TEMP[0].xyzz 118: MUL TEMP[1].x, IN[0].wwww, IMM[0].yyyy 119: MOV_SAT TEMP[1].x, TEMP[1].xxxx 120: MOV TEMP[1].w, TEMP[1].xxxx 121: MAD TEMP[2].x, IN[1].xxxx, CONST[4].zzzz, CONST[4].wwww 122: MOV_SAT TEMP[2].x, TEMP[2].xxxx 123: LRP TEMP[1].xyz, TEMP[2].xxxx, TEMP[0].xyzz, CONST[3].xyzz 124: MOV OUT[0], TEMP[1] 125: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216) %47 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %48 = load <32 x i8>, <32 x i8> addrspace(2)* %47, align 32, !tbaa !0 %49 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %50 = load <16 x i8>, <16 x i8> addrspace(2)* %49, align 16, !tbaa !0 %51 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %52 = load <8 x i32>, <8 x i32> addrspace(2)* %51, align 32, !tbaa !0 %53 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %54 = load <4 x i32>, <4 x i32> addrspace(2)* %53, align 16, !tbaa !0 %55 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %56 = bitcast <8 x i32> addrspace(2)* %55 to <32 x i8> addrspace(2)* %57 = load <32 x i8>, <32 x i8> addrspace(2)* %56, align 32, !tbaa !0 %58 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %59 = bitcast <4 x i32> addrspace(2)* %58 to <16 x i8> addrspace(2)* %60 = load <16 x i8>, <16 x i8> addrspace(2)* %59, align 16, !tbaa !0 %61 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %62 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %63 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %64 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %65 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %66 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %67 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %68 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %69 = fmul float %61, 5.000000e-01 %70 = fmul float %62, 5.000000e-01 %71 = fmul float %64, 5.000000e-01 %72 = fmul float %70, %28 %73 = fadd float %69, %71 %74 = fadd float %72, %71 %75 = fmul float %36, 2.000000e+00 %76 = fmul float %66, %75 %77 = fmul float %68, %75 %78 = fmul float %24, 5.000000e-01 %79 = fmul float %78, 2.000000e+00 %80 = fadd float %76, 5.000000e-01 %81 = fadd float %77, 5.000000e-01 %82 = fadd float %79, %80 %83 = fmul float %79, 0.000000e+00 %84 = fadd float %83, %77 %85 = bitcast float %82 to i32 %86 = bitcast float %84 to i32 %87 = insertelement <2 x i32> undef, i32 %85, i32 0 %88 = insertelement <2 x i32> %87, i32 %86, i32 1 %89 = bitcast <8 x i32> %52 to <32 x i8> %90 = bitcast <4 x i32> %54 to <16 x i8> %91 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %88, <32 x i8> %89, <16 x i8> %90, i32 2) %92 = extractelement <4 x float> %91, i32 0 %93 = extractelement <4 x float> %91, i32 1 %94 = fsub float %79, %76 %95 = fmul float %79, 0.000000e+00 %96 = fadd float %95, %77 %97 = bitcast float %94 to i32 %98 = bitcast float %96 to i32 %99 = insertelement <2 x i32> undef, i32 %97, i32 0 %100 = insertelement <2 x i32> %99, i32 %98, i32 1 %101 = bitcast <8 x i32> %52 to <32 x i8> %102 = bitcast <4 x i32> %54 to <16 x i8> %103 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %100, <32 x i8> %101, <16 x i8> %102, i32 2) %104 = extractelement <4 x float> %103, i32 0 %105 = extractelement <4 x float> %103, i32 1 %106 = fadd float %92, %104 %107 = fadd float %93, %105 %108 = fmul float %79, 0.000000e+00 %109 = fadd float %108, %76 %110 = fadd float %79, %81 %111 = bitcast float %109 to i32 %112 = bitcast float %110 to i32 %113 = insertelement <2 x i32> undef, i32 %111, i32 0 %114 = insertelement <2 x i32> %113, i32 %112, i32 1 %115 = bitcast <8 x i32> %52 to <32 x i8> %116 = bitcast <4 x i32> %54 to <16 x i8> %117 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %114, <32 x i8> %115, <16 x i8> %116, i32 2) %118 = extractelement <4 x float> %117, i32 0 %119 = extractelement <4 x float> %117, i32 1 %120 = fmul float %79, 0.000000e+00 %121 = fadd float %120, %76 %122 = fsub float %79, %77 %123 = bitcast float %121 to i32 %124 = bitcast float %122 to i32 %125 = insertelement <2 x i32> undef, i32 %123, i32 0 %126 = insertelement <2 x i32> %125, i32 %124, i32 1 %127 = bitcast <8 x i32> %52 to <32 x i8> %128 = bitcast <4 x i32> %54 to <16 x i8> %129 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %126, <32 x i8> %127, <16 x i8> %128, i32 2) %130 = extractelement <4 x float> %129, i32 0 %131 = extractelement <4 x float> %129, i32 1 %132 = fadd float %118, %130 %133 = fadd float %119, %131 %134 = fadd float %106, %132 %135 = fadd float %107, %133 %136 = fmul float %134, 5.000000e-01 %137 = fadd float %136, -1.000000e+00 %138 = fmul float %135, 5.000000e-01 %139 = fadd float %138, -1.000000e+00 %140 = fmul float %137, 2.000000e+00 %141 = fmul float %139, 2.000000e+00 %142 = fmul float %66, %75 %143 = fmul float %68, %75 %144 = fmul float %78, 2.000000e+00 %145 = fadd float %142, 5.000000e-01 %146 = fadd float %143, 5.000000e-01 %147 = fadd float %144, %145 %148 = fmul float %144, 0.000000e+00 %149 = fadd float %148, %143 %150 = bitcast float %147 to i32 %151 = bitcast float %149 to i32 %152 = insertelement <2 x i32> undef, i32 %150, i32 0 %153 = insertelement <2 x i32> %152, i32 %151, i32 1 %154 = bitcast <8 x i32> %52 to <32 x i8> %155 = bitcast <4 x i32> %54 to <16 x i8> %156 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %153, <32 x i8> %154, <16 x i8> %155, i32 2) %157 = extractelement <4 x float> %156, i32 0 %158 = extractelement <4 x float> %156, i32 1 %159 = fsub float %144, %142 %160 = fmul float %144, 0.000000e+00 %161 = fadd float %160, %143 %162 = bitcast float %159 to i32 %163 = bitcast float %161 to i32 %164 = insertelement <2 x i32> undef, i32 %162, i32 0 %165 = insertelement <2 x i32> %164, i32 %163, i32 1 %166 = bitcast <8 x i32> %52 to <32 x i8> %167 = bitcast <4 x i32> %54 to <16 x i8> %168 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %165, <32 x i8> %166, <16 x i8> %167, i32 2) %169 = extractelement <4 x float> %168, i32 0 %170 = extractelement <4 x float> %168, i32 1 %171 = fadd float %157, %169 %172 = fadd float %158, %170 %173 = fmul float %144, 0.000000e+00 %174 = fadd float %173, %142 %175 = fadd float %144, %146 %176 = bitcast float %174 to i32 %177 = bitcast float %175 to i32 %178 = insertelement <2 x i32> undef, i32 %176, i32 0 %179 = insertelement <2 x i32> %178, i32 %177, i32 1 %180 = bitcast <8 x i32> %52 to <32 x i8> %181 = bitcast <4 x i32> %54 to <16 x i8> %182 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %179, <32 x i8> %180, <16 x i8> %181, i32 2) %183 = extractelement <4 x float> %182, i32 0 %184 = extractelement <4 x float> %182, i32 1 %185 = fmul float %144, 0.000000e+00 %186 = fadd float %185, %142 %187 = fsub float %144, %143 %188 = bitcast float %186 to i32 %189 = bitcast float %187 to i32 %190 = insertelement <2 x i32> undef, i32 %188, i32 0 %191 = insertelement <2 x i32> %190, i32 %189, i32 1 %192 = bitcast <8 x i32> %52 to <32 x i8> %193 = bitcast <4 x i32> %54 to <16 x i8> %194 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %191, <32 x i8> %192, <16 x i8> %193, i32 2) %195 = extractelement <4 x float> %194, i32 0 %196 = extractelement <4 x float> %194, i32 1 %197 = fadd float %183, %195 %198 = fadd float %184, %196 %199 = fadd float %171, %197 %200 = fadd float %172, %198 %201 = fmul float %199, 5.000000e-01 %202 = fadd float %201, -1.000000e+00 %203 = fmul float %200, 5.000000e-01 %204 = fadd float %203, -1.000000e+00 %205 = fmul float %202, 2.000000e+00 %206 = fmul float %204, 2.000000e+00 %207 = call float @llvm.maxnum.f32(float %64, float 0.000000e+00) %208 = fmul float %41, 5.000000e-01 %209 = fmul float %207, %208 %sqrtf = call float @sqrtf(float %209) #1 %fabsf = call float @fabsf(float %sqrtf) #1 %210 = fcmp oeq float %209, 0xFFF0000000000000 %211 = select i1 %210, float 0x7FF0000000000000, float %fabsf %212 = call float @llvm.AMDIL.clamp.(float %211, float 0.000000e+00, float 1.000000e+00) %213 = fmul float %205, %205 %214 = fmul float %206, %206 %215 = fadd float %214, %213 %216 = fadd float %215, 1.000000e+00 %217 = call float @llvm.AMDGPU.rsq.clamped.f32(float %216) %218 = fmul float %205, %217 %219 = fmul float %206, %217 %220 = fmul float %140, %140 %221 = fmul float %141, %141 %222 = fadd float %221, %220 %223 = fadd float %222, 1.000000e+00 %224 = call float @llvm.AMDGPU.rsq.clamped.f32(float %223) %225 = fmul float %140, %224 %226 = fmul float %141, %224 %227 = fmul float %24, 2.000000e+00 %228 = call float @llvm.floor.f32(float %227) %229 = fsub float %227, %228 %230 = call float @llvm.AMDGPU.lrp(float %229, float %225, float %218) %231 = call float @llvm.AMDGPU.lrp(float %229, float %224, float %217) %232 = call float @llvm.AMDGPU.lrp(float %229, float %226, float %219) %233 = fmul float %230, %230 %234 = fmul float %231, %231 %235 = fadd float %234, %233 %236 = fmul float %232, %232 %237 = fadd float %235, %236 %238 = call float @llvm.AMDGPU.rsq.clamped.f32(float %237) %239 = fdiv float %73, %64 %240 = fdiv float %74, %64 %241 = bitcast float %239 to i32 %242 = bitcast float %240 to i32 %243 = insertelement <2 x i32> undef, i32 %241, i32 0 %244 = insertelement <2 x i32> %243, i32 %242, i32 1 %245 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %244, <32 x i8> %57, <16 x i8> %60, i32 2) %246 = extractelement <4 x float> %245, i32 0 %247 = extractelement <4 x float> %245, i32 1 %248 = fadd float %246, -5.000000e-01 %249 = fadd float %247, -5.000000e-01 %250 = fmul float %230, %238 %251 = fadd float %250, %248 %252 = fmul float %232, %238 %253 = fadd float %252, %249 %254 = fmul float %251, %40 %255 = fmul float %253, %40 %256 = fmul float %34, 8.000000e+00 %257 = fmul float %35, 8.000000e+00 %258 = fmul float %254, %256 %259 = fmul float %255, %257 %260 = call float @llvm.AMDIL.clamp.(float %212, float 0.000000e+00, float 1.000000e+00) %261 = fmul float %258, %260 %262 = fmul float %259, %260 %263 = call float @llvm.pow.f32(float %63, float 0x3FE99999A0000000) %264 = fmul float %261, %263 %265 = fadd float %264, %61 %266 = fmul float %262, %263 %267 = fadd float %266, %62 %268 = fmul float %265, 5.000000e-01 %269 = fmul float %267, 5.000000e-01 %270 = fmul float %64, 5.000000e-01 %271 = fadd float %268, %270 %272 = fadd float %269, %270 %273 = fsub float %66, %25 %274 = fsub float %67, %26 %275 = fsub float %68, %27 %276 = fmul float %273, %273 %277 = fmul float %274, %274 %278 = fadd float %277, %276 %279 = fmul float %275, %275 %280 = fadd float %278, %279 %281 = call float @llvm.AMDGPU.rsq.clamped.f32(float %280) %282 = fmul float %273, %281 %283 = fmul float %274, %281 %284 = fmul float %275, %281 %285 = call float @llvm.AMDIL.clamp.(float %283, float 0.000000e+00, float 1.000000e+00) %286 = call float @llvm.pow.f32(float %285, float %43) %287 = fdiv float %271, %64 %288 = fdiv float %272, %64 %289 = bitcast float %287 to i32 %290 = bitcast float %288 to i32 %291 = insertelement <2 x i32> undef, i32 %289, i32 0 %292 = insertelement <2 x i32> %291, i32 %290, i32 1 %293 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %292, <32 x i8> %48, <16 x i8> %50, i32 2) %294 = extractelement <4 x float> %293, i32 0 %295 = extractelement <4 x float> %293, i32 1 %296 = extractelement <4 x float> %293, i32 2 %297 = fmul float %44, %282 %298 = fsub float -0.000000e+00, %297 %299 = fmul float %45, %283 %300 = fsub float %298, %299 %301 = fmul float %46, %284 %302 = fsub float %300, %301 %303 = call float @llvm.AMDIL.clamp.(float %302, float 0.000000e+00, float 1.000000e+00) %304 = fmul float %43, 4.000000e+00 %305 = call float @llvm.pow.f32(float %303, float %304) %306 = fmul float %305, %43 %307 = fadd float %306, 1.000000e+00 %308 = fadd float %307, %286 %309 = fmul float %294, %308 %310 = fmul float %295, %308 %311 = fmul float %296, %308 %312 = fadd float %212, %42 %313 = call float @llvm.AMDIL.clamp.(float %312, float 0.000000e+00, float 1.000000e+00) %314 = call float @llvm.AMDGPU.lrp(float %313, float %37, float %309) %315 = call float @llvm.AMDGPU.lrp(float %313, float %38, float %310) %316 = call float @llvm.AMDGPU.lrp(float %313, float %39, float %311) %317 = fsub float 1.000000e+00, %286 %318 = call float @llvm.AMDIL.clamp.(float %317, float 0.000000e+00, float 1.000000e+00) %319 = call float @llvm.AMDGPU.lrp(float %318, float %37, float %314) %320 = call float @llvm.AMDGPU.lrp(float %318, float %38, float %315) %321 = call float @llvm.AMDGPU.lrp(float %318, float %39, float %316) %322 = fmul float %64, 2.000000e+00 %323 = call float @llvm.AMDIL.clamp.(float %322, float 0.000000e+00, float 1.000000e+00) %324 = fmul float %65, %32 %325 = fadd float %324, %33 %326 = call float @llvm.AMDIL.clamp.(float %325, float 0.000000e+00, float 1.000000e+00) %327 = call float @llvm.AMDGPU.lrp(float %326, float %319, float %29) %328 = call float @llvm.AMDGPU.lrp(float %326, float %320, float %30) %329 = call float @llvm.AMDGPU.lrp(float %326, float %321, float %31) %330 = call i32 @llvm.SI.packf16(float %327, float %328) %331 = bitcast i32 %330 to float %332 = call i32 @llvm.SI.packf16(float %329, float %323) %333 = bitcast i32 %332 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %331, float %333, float %331, float %333) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.floor.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) declare float @sqrtf(float) declare float @fabsf(float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s2, s[8:11], 0x0 ; C2010900 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 s_buffer_load_dword s3, s[8:11], 0x4 ; C2018904 s_buffer_load_dword s36, s[8:11], 0x5 ; C2120905 s_buffer_load_dword s37, s[8:11], 0x6 ; C2128906 s_buffer_load_dword s38, s[8:11], 0x8 ; C2130908 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600 v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601 v_interp_p1_f32 v0, v0, 3, 1, [m0] ; C8000700 v_interp_p2_f32 v0, [v0], v1, 3, 1, [m0] ; C8010701 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e64 v1, 0.5, s2 ; D2100001 000004F0 v_mad_f32 v9, 0.5, s2, v1 ; D2820009 040404F0 s_buffer_load_dword s24, s[8:11], 0x24 ; C20C0924 s_buffer_load_dword s1, s[8:11], 0x28 ; C2008928 s_buffer_load_dword s0, s[8:11], 0x29 ; C2000929 s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504 s_load_dwordx8 s[16:23], s[6:7], 0x8 ; C0C80708 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e64 v10, s24, s24 ; D206000A 00003018 v_mul_f32_e32 v12, v10, v0 ; 1018010A v_mad_f32 v11, v7, v10, 0.5 ; D282000B 03C21507 v_mul_f32_e32 v13, v10, v7 ; 101A0F0A v_mad_f32 v14, v0, v10, 0.5 ; D282000E 03C21500 v_mac_f32_e32 v11, 2.0, v1 ; 3E1602F4 v_mac_f32_e32 v13, 0, v9 ; 3E1A1280 v_mac_f32_e32 v14, 2.0, v1 ; 3E1C02F4 v_mac_f32_e32 v12, 0, v9 ; 3E181280 s_load_dwordx4 s[24:27], s[4:5], 0x8 ; C08C0508 s_load_dwordx8 s[28:35], s[6:7], 0x10 ; C0CE0710 image_sample v[15:16], 3, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[16:23], s[12:15] ; F0800300 00640F0B v_mad_f32 v11, -v7, v10, v9 ; D282000B 24261507 image_sample v[11:12], 3, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[16:23], s[12:15] ; F0800300 00640B0B image_sample v[17:18], 3, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[16:23], s[12:15] ; F0800300 0064110D v_mad_f32 v14, -v0, v10, v9 ; D282000E 24261500 image_sample v[9:10], 3, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[16:23], s[12:15] ; F0800300 0064090D v_mov_b32_e32 v1, 0x6f800000 ; 7E0202FF 6F800000 v_cmp_gt_f32_e64 vcc, |v5|, v1 ; D008016A 00020305 v_mov_b32_e32 v1, 0x2f800000 ; 7E0202FF 2F800000 v_cndmask_b32_e32 v1, 1.0, v1 ; 000202F2 v_mul_f32_e32 v13, v1, v5 ; 101A0B01 v_rcp_f32_e32 v13, v13 ; 7E1A550D v_mul_f32_e32 v14, 0.5, v3 ; 101C06F0 v_mul_f32_e32 v19, 0.5, v5 ; 10260AF0 v_mad_f32 v14, s38, v14, v19 ; D282000E 044E1C26 v_mad_f32 v20, 0.5, v2, v19 ; D2820014 044E04F0 v_mul_f32_e32 v20, v13, v20 ; 1028290D v_mul_f32_e32 v14, v13, v14 ; 101C1D0D v_mul_f32_e32 v20, v20, v1 ; 10280314 v_mul_f32_e32 v21, v14, v1 ; 102A030E s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[20:21], 3, 0, 0, 0, 0, 0, 0, 0, v[20:21], s[28:35], s[24:27] ; F0800300 00C71414 s_waitcnt vmcnt(3) ; BF8C0773 v_add_f32_e32 v11, v11, v15 ; 06161F0B v_add_f32_e32 v12, v12, v16 ; 0618210C s_waitcnt vmcnt(1) ; BF8C0771 v_add_f32_e32 v9, v9, v17 ; 06122309 v_add_f32_e32 v10, v10, v18 ; 0614250A v_add_f32_e32 v9, v9, v11 ; 06121709 v_add_f32_e32 v10, v10, v12 ; 0614190A v_mad_f32 v9, 0.5, v9, -1.0 ; D2820009 03CE12F0 v_mad_f32 v10, 0.5, v10, -1.0 ; D282000A 03CE14F0 v_add_f32_e32 v9, v9, v9 ; 06121309 v_add_f32_e32 v10, v10, v10 ; 0614150A v_mad_f32 v11, v9, v9, 1.0 ; D282000B 03CA1309 v_mac_f32_e32 v11, v10, v10 ; 3E16150A v_rsq_clamp_f32_e32 v11, v11 ; 7E16590B v_add_f32_e64 v12, s2, s2 ; D206000C 00000402 v_floor_f32_e32 v12, v12 ; 7E18490C v_mad_f32 v12, 2.0, s2, -v12 ; D282000C 843004F4 v_mul_f32_e32 v9, v11, v9 ; 1012130B v_sub_f32_e32 v14, 1.0, v12 ; 081C18F2 s_buffer_load_dword s2, s[8:11], 0x30 ; C2010930 v_mul_f32_e32 v15, v9, v14 ; 101E1D09 v_mac_f32_e32 v15, v9, v12 ; 3E1E1909 v_mul_f32_e32 v9, v11, v10 ; 1012150B v_mul_f32_e32 v10, v11, v14 ; 10141D0B v_mac_f32_e32 v10, v11, v12 ; 3E14190B v_mul_f32_e32 v11, v9, v14 ; 10161D09 v_mac_f32_e32 v11, v9, v12 ; 3E161909 s_buffer_load_dword s12, s[8:11], 0x31 ; C2060931 s_buffer_load_dword s13, s[8:11], 0x32 ; C2068932 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e64 v9, 0.5, s2 ; D2100009 000004F0 v_max_f32_e32 v12, 0, v5 ; 20180A80 v_mul_f32_e32 v9, v9, v12 ; 10121909 v_mul_f32_e32 v12, v15, v15 ; 10181F0F v_mac_f32_e32 v12, v10, v10 ; 3E18150A v_sqrt_f32_e32 v10, v9 ; 7E146709 v_and_b32_e32 v10, 0x7fffffff, v10 ; 361414FF 7FFFFFFF v_mov_b32_e32 v14, 0xff800000 ; 7E1C02FF FF800000 v_cmp_eq_f32_e32 vcc, v14, v9 ; 7C04130E v_mac_f32_e32 v12, v11, v11 ; 3E18170B s_buffer_load_dword s2, s[8:11], 0x2c ; C201092C v_rsq_clamp_f32_e32 v9, v12 ; 7E12590C s_buffer_load_dword s14, s[8:11], 0x18 ; C2070918 s_buffer_load_dword s15, s[8:11], 0x19 ; C2078919 v_mov_b32_e32 v12, 0x7f800000 ; 7E1802FF 7F800000 v_cndmask_b32_e32 v10, v10, v12 ; 0014190A v_add_f32_e32 v12, -0.5, v20 ; 061828F1 v_add_f32_e32 v14, -0.5, v21 ; 061C2AF1 v_mac_f32_e32 v12, v9, v15 ; 3E181F09 v_mac_f32_e32 v14, v9, v11 ; 3E1C1709 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v9, s2, v12 ; 10121802 v_mul_f32_e32 v11, s2, v14 ; 10161C02 v_mov_b32_e32 v12, 0x41000000 ; 7E1802FF 41000000 v_mul_f32_e32 v14, s14, v12 ; 101C180E v_mul_f32_e32 v12, s15, v12 ; 1018180F v_mul_f32_e32 v9, v14, v9 ; 1012130E v_mul_f32_e32 v11, v12, v11 ; 1016170C v_add_f32_e64 v10, 0, v10 clamp ; D206080A 00021480 v_log_f32_e32 v4, v4 ; 7E084F04 v_add_f32_e64 v12, 0, v10 clamp ; D206080C 00021480 v_mul_f32_e32 v9, v12, v9 ; 1012130C v_mul_f32_e32 v11, v12, v11 ; 1016170C v_mul_legacy_f32_e32 v4, 0x3f4ccccd, v4 ; 0E0808FF 3F4CCCCD v_exp_f32_e32 v4, v4 ; 7E084B04 v_mac_f32_e32 v2, v4, v9 ; 3E041304 v_mac_f32_e32 v3, v4, v11 ; 3E061704 v_mad_f32 v2, 0.5, v2, v19 ; D2820002 044E04F0 v_mac_f32_e32 v19, 0.5, v3 ; 3E2606F0 v_subrev_f32_e32 v3, s3, v7 ; 0A060E03 v_subrev_f32_e32 v4, s36, v8 ; 0A081024 v_subrev_f32_e32 v0, s37, v0 ; 0A000025 v_mul_f32_e32 v2, v13, v2 ; 1004050D v_mul_f32_e32 v7, v13, v19 ; 100E270D s_load_dwordx4 s[16:19], s[4:5], 0x0 ; C0880500 s_load_dwordx8 s[20:27], s[6:7], 0x0 ; C0CA0700 v_mul_f32_e32 v8, v3, v3 ; 10100703 v_mac_f32_e32 v8, v4, v4 ; 3E100904 v_mac_f32_e32 v8, v0, v0 ; 3E100100 s_buffer_load_dword s2, s[8:11], 0x34 ; C2010934 s_buffer_load_dword s3, s[8:11], 0x35 ; C2018935 s_buffer_load_dword s4, s[8:11], 0x36 ; C2020936 v_rsq_clamp_f32_e32 v8, v8 ; 7E105908 v_mul_f32_e32 v11, v2, v1 ; 10160302 v_mul_f32_e32 v12, v7, v1 ; 10180307 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[11:13], 7, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[20:27], s[16:19] ; F0800700 00850B0B v_mul_f32_e32 v1, v8, v3 ; 10020708 v_mul_f32_e32 v2, v8, v4 ; 10040908 v_mul_f32_e32 v0, v8, v0 ; 10000108 v_mul_f32_e32 v1, s2, v1 ; 10020202 v_mad_f32 v1, -s3, v2, -v1 ; D2820001 A4060403 v_mad_f32 v0, -s4, v0, v1 ; D2820000 24060004 s_buffer_load_dword s2, s[8:11], 0x2a ; C201092A v_add_f32_e64 v1, 0, v2 clamp ; D2060801 00020480 v_log_f32_e32 v1, v1 ; 7E024F01 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_log_f32_e32 v0, v0 ; 7E004F00 s_buffer_load_dword s3, s[8:11], 0xc ; C201890C s_buffer_load_dword s4, s[8:11], 0xd ; C202090D s_buffer_load_dword s5, s[8:11], 0xe ; C202890E s_buffer_load_dword s6, s[8:11], 0x12 ; C2030912 s_buffer_load_dword s7, s[8:11], 0x13 ; C2038913 v_mul_f32_e64 v2, 4.0, s13 ; D2100002 00001AF6 v_mul_legacy_f32_e32 v0, v2, v0 ; 0E000102 v_mul_legacy_f32_e32 v1, s13, v1 ; 0E02020D v_exp_f32_e32 v0, v0 ; 7E004B00 v_mad_f32 v0, v0, s13, 1.0 ; D2820000 03C81B00 v_add_f32_e32 v2, s12, v10 ; 0604140C v_exp_f32_e32 v1, v1 ; 7E024B01 v_add_f32_e32 v0, v1, v0 ; 06000101 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v3, v0, v11 ; 10061700 v_mul_f32_e32 v4, v0, v12 ; 10081900 v_mul_f32_e32 v0, v0, v13 ; 10001B00 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_sub_f32_e32 v7, 1.0, v2 ; 080E04F2 v_mul_f32_e32 v3, v3, v7 ; 10060F03 v_mac_f32_e32 v3, s1, v2 ; 3E060401 v_sub_f32_e32 v1, 1.0, v1 ; 080202F2 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_sub_f32_e32 v8, 1.0, v1 ; 081002F2 v_mul_f32_e32 v3, v3, v8 ; 10061103 v_mac_f32_e32 v3, s1, v1 ; 3E060201 v_mul_f32_e32 v4, v4, v7 ; 10080F04 v_mul_f32_e32 v0, v0, v7 ; 10000F00 v_mac_f32_e32 v4, s0, v2 ; 3E080400 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v0, s2, v2 ; 3E000402 v_mul_f32_e32 v2, v4, v8 ; 10041104 v_mac_f32_e32 v2, s0, v1 ; 3E040200 v_mul_f32_e32 v0, v0, v8 ; 10001100 v_mac_f32_e32 v0, s2, v1 ; 3E000202 v_add_f32_e32 v1, v5, v5 ; 06020B05 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_mov_b32_e32 v4, s7 ; 7E080207 v_mac_f32_e32 v4, s6, v6 ; 3E080C06 v_add_f32_e64 v4, 0, v4 clamp ; D2060804 00020880 v_sub_f32_e32 v5, 1.0, v4 ; 080A08F2 v_mul_f32_e32 v6, s3, v5 ; 100C0A03 v_mac_f32_e32 v6, v3, v4 ; 3E0C0903 v_mul_f32_e32 v3, s4, v5 ; 10060A04 v_mac_f32_e32 v3, v2, v4 ; 3E060902 v_mul_f32_e32 v2, s5, v5 ; 10040A05 v_mac_f32_e32 v2, v0, v4 ; 3E040900 v_cvt_pkrtz_f16_f32_e32 v0, v6, v3 ; 5E000706 v_cvt_pkrtz_f16_f32_e32 v1, v2, v1 ; 5E020302 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 48 VGPRS: 24 Code Size: 1040 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..7] DCL TEMP[0..1], LOCAL 0: MUL TEMP[0], CONST[4], IN[0].xxxx 1: MAD TEMP[0], CONST[5], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[6], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[7], IN[0].wwww, TEMP[0] 4: MUL TEMP[1], CONST[0], IN[0].xxxx 5: MAD TEMP[1], CONST[1], IN[0].yyyy, TEMP[1] 6: MAD TEMP[1], CONST[2], IN[0].zzzz, TEMP[1] 7: MAD TEMP[1].xyz, CONST[3], IN[0].wwww, TEMP[1] 8: MOV TEMP[1].yzw, TEMP[1].yxyz 9: MOV TEMP[1].x, TEMP[0].zzzz 10: MOV OUT[1], TEMP[0] 11: MOV OUT[2], TEMP[1] 12: MOV OUT[0], TEMP[0] 13: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %41 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0 %43 = add i32 %5, %7 %44 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %43) %45 = extractelement <4 x float> %44, i32 0 %46 = extractelement <4 x float> %44, i32 1 %47 = extractelement <4 x float> %44, i32 2 %48 = extractelement <4 x float> %44, i32 3 %49 = fmul float %25, %45 %50 = fmul float %26, %45 %51 = fmul float %27, %45 %52 = fmul float %28, %45 %53 = fmul float %29, %46 %54 = fadd float %53, %49 %55 = fmul float %30, %46 %56 = fadd float %55, %50 %57 = fmul float %31, %46 %58 = fadd float %57, %51 %59 = fmul float %32, %46 %60 = fadd float %59, %52 %61 = fmul float %33, %47 %62 = fadd float %61, %54 %63 = fmul float %34, %47 %64 = fadd float %63, %56 %65 = fmul float %35, %47 %66 = fadd float %65, %58 %67 = fmul float %36, %47 %68 = fadd float %67, %60 %69 = fmul float %37, %48 %70 = fadd float %69, %62 %71 = fmul float %38, %48 %72 = fadd float %71, %64 %73 = fmul float %39, %48 %74 = fadd float %73, %66 %75 = fmul float %40, %48 %76 = fadd float %75, %68 %77 = fmul float %13, %45 %78 = fmul float %14, %45 %79 = fmul float %15, %45 %80 = fmul float %16, %46 %81 = fadd float %80, %77 %82 = fmul float %17, %46 %83 = fadd float %82, %78 %84 = fmul float %18, %46 %85 = fadd float %84, %79 %86 = fmul float %19, %47 %87 = fadd float %86, %81 %88 = fmul float %20, %47 %89 = fadd float %88, %83 %90 = fmul float %21, %47 %91 = fadd float %90, %85 %92 = fmul float %22, %48 %93 = fadd float %92, %87 %94 = fmul float %23, %48 %95 = fadd float %94, %89 %96 = fmul float %24, %48 %97 = fadd float %96, %91 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %70, float %72, float %74, float %76) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %74, float %93, float %95, float %97) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %70, float %72, float %74, float %76) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x4 ; C2038104 s_buffer_load_dword s8, s[0:3], 0x5 ; C2040105 s_buffer_load_dword s9, s[0:3], 0x6 ; C2048106 s_buffer_load_dword s10, s[0:3], 0x8 ; C2050108 s_buffer_load_dword s11, s[0:3], 0x9 ; C2058109 s_buffer_load_dword s12, s[0:3], 0xa ; C206010A s_buffer_load_dword s13, s[0:3], 0xc ; C206810C s_buffer_load_dword s14, s[0:3], 0xd ; C207010D s_buffer_load_dword s15, s[0:3], 0xe ; C207810E s_buffer_load_dword s16, s[0:3], 0x10 ; C2080110 s_buffer_load_dword s17, s[0:3], 0x11 ; C2088111 s_buffer_load_dword s18, s[0:3], 0x12 ; C2090112 s_buffer_load_dword s19, s[0:3], 0x13 ; C2098113 s_buffer_load_dword s20, s[0:3], 0x14 ; C20A0114 s_buffer_load_dword s21, s[0:3], 0x15 ; C20A8115 s_buffer_load_dword s22, s[0:3], 0x16 ; C20B0116 s_buffer_load_dword s23, s[0:3], 0x17 ; C20B8117 s_buffer_load_dword s24, s[0:3], 0x18 ; C20C0118 s_buffer_load_dword s25, s[0:3], 0x19 ; C20C8119 s_buffer_load_dword s26, s[0:3], 0x1a ; C20D011A s_buffer_load_dword s27, s[0:3], 0x1b ; C20D811B s_buffer_load_dword s28, s[0:3], 0x1c ; C20E011C s_buffer_load_dword s29, s[0:3], 0x1d ; C20E811D s_buffer_load_dword s30, s[0:3], 0x1e ; C20F011E s_buffer_load_dword s0, s[0:3], 0x1f ; C200011F s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s16, v0 ; 10080010 v_mac_f32_e32 v4, s20, v1 ; 3E080214 v_mul_f32_e32 v5, s17, v0 ; 100A0011 v_mac_f32_e32 v5, s21, v1 ; 3E0A0215 v_mul_f32_e32 v6, s18, v0 ; 100C0012 v_mac_f32_e32 v6, s22, v1 ; 3E0C0216 v_mul_f32_e32 v7, s19, v0 ; 100E0013 v_mac_f32_e32 v7, s23, v1 ; 3E0E0217 v_mul_f32_e32 v8, s4, v0 ; 10100004 v_mac_f32_e32 v8, s7, v1 ; 3E100207 v_mul_f32_e32 v9, s5, v0 ; 10120005 v_mac_f32_e32 v9, s8, v1 ; 3E120208 v_mul_f32_e32 v0, s6, v0 ; 10000006 v_mac_f32_e32 v0, s9, v1 ; 3E000209 v_mac_f32_e32 v4, s24, v2 ; 3E080418 v_mac_f32_e32 v5, s25, v2 ; 3E0A0419 v_mac_f32_e32 v6, s26, v2 ; 3E0C041A v_mac_f32_e32 v7, s27, v2 ; 3E0E041B v_mac_f32_e32 v8, s10, v2 ; 3E10040A v_mac_f32_e32 v9, s11, v2 ; 3E12040B v_mac_f32_e32 v0, s12, v2 ; 3E00040C v_mac_f32_e32 v4, s28, v3 ; 3E08061C v_mac_f32_e32 v5, s29, v3 ; 3E0A061D v_mac_f32_e32 v6, s30, v3 ; 3E0C061E v_mac_f32_e32 v7, s0, v3 ; 3E0E0600 v_mac_f32_e32 v8, s13, v3 ; 3E10060D v_mac_f32_e32 v9, s14, v3 ; 3E12060E v_mac_f32_e32 v0, s15, v3 ; 3E00060F exp 15, 32, 0, 0, 0, v4, v5, v6, v7 ; F800020F 07060504 exp 15, 33, 0, 0, 0, v6, v8, v9, v0 ; F800021F 00090806 exp 15, 12, 0, 1, 0, v4, v5, v6, v7 ; F80008CF 07060504 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 12 Code Size: 284 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL SVIEW[4], 2D, FLOAT DCL SVIEW[5], 2D, FLOAT DCL CONST[0..5] DCL CONST[8] DCL CONST[13..24] DCL TEMP[0..14], LOCAL IMM[0] FLT32 { 0.5000, 1.0000, 2.0000, 0.0000} IMM[1] FLT32 { -1.0000, -0.5000, 0.8000, 0.7000} IMM[2] FLT32 { 8.0000, 0.0100, 20.0000, 0.0050} IMM[3] FLT32 { 3.0000, 0.0500, 0.3000, 3.1416} IMM[4] FLT32 { -0.3000, 1.5000, 0.6000, 6.0000} IMM[5] FLT32 { 0.2000, 0.1000, 0.0000, 0.0000} 0: MUL TEMP[0].xyw, IN[0], IMM[0].xxxx 1: ADD TEMP[1].xy, TEMP[0].xyyy, TEMP[0].wwww 2: MOV TEMP[1].zw, IN[0].wwzw 3: MOV TEMP[2].x, TEMP[0].xxxx 4: MUL TEMP[3].x, TEMP[0].yyyy, CONST[2].xxxx 5: MOV TEMP[2].y, TEMP[3].xxxx 6: ADD TEMP[0].xy, TEMP[2].xyyy, TEMP[0].wwww 7: MOV TEMP[0].zw, IN[0].wwzw 8: ADD TEMP[2].xyz, IN[1].yzww, -CONST[1].xyzz 9: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz 10: RSQ TEMP[3].x, TEMP[3].xxxx 11: MUL TEMP[3].xyz, TEMP[2].xyzz, TEMP[3].xxxx 12: DP3 TEMP[2].x, TEMP[2].xyzz, TEMP[2].xyzz 13: SQRT TEMP[2].x, TEMP[2].xxxx 14: MUL TEMP[4].xy, IN[1].ywww, CONST[13].xxxx 15: MUL TEMP[5].x, IMM[0].xxxx, CONST[0].xxxx 16: MUL TEMP[6].x, IMM[0].zzzz, TEMP[5].xxxx 17: ADD TEMP[7].x, TEMP[4].xxxx, IMM[0].xxxx 18: MOV TEMP[7].y, TEMP[4].yyyy 19: MOV TEMP[8].x, -TEMP[4].xxxx 20: MOV TEMP[8].y, TEMP[4].yyyy 21: MOV TEMP[9].x, TEMP[4].xxxx 22: ADD TEMP[10].x, TEMP[4].yyyy, IMM[0].xxxx 23: MOV TEMP[9].y, TEMP[10].xxxx 24: MOV TEMP[10].x, TEMP[4].xxxx 25: MOV TEMP[10].y, -TEMP[4].yyyy 26: MAD TEMP[7].xy, IMM[0].ywww, TEMP[6].xxxx, TEMP[7].xyyy 27: MOV TEMP[7].xy, TEMP[7].xyyy 28: TEX TEMP[7], TEMP[7], SAMP[2], 2D 29: MAD TEMP[8].xy, IMM[0].ywww, TEMP[6].xxxx, TEMP[8].xyyy 30: MOV TEMP[8].xy, TEMP[8].xyyy 31: TEX TEMP[8], TEMP[8], SAMP[2], 2D 32: ADD TEMP[7], TEMP[7], TEMP[8] 33: MAD TEMP[8].xy, IMM[0].wyyy, TEMP[6].xxxx, TEMP[9].xyyy 34: MOV TEMP[8].xy, TEMP[8].xyyy 35: TEX TEMP[8], TEMP[8], SAMP[2], 2D 36: MAD TEMP[6].xy, IMM[0].wyyy, TEMP[6].xxxx, TEMP[10].xyyy 37: MOV TEMP[6].xy, TEMP[6].xyyy 38: TEX TEMP[6], TEMP[6], SAMP[2], 2D 39: ADD TEMP[6], TEMP[8], TEMP[6] 40: ADD TEMP[6].xy, TEMP[7], TEMP[6] 41: MAD TEMP[7].xy, TEMP[6].xyyy, IMM[0].xxxx, IMM[1].xxxx 42: MUL TEMP[6].xy, TEMP[7].xyyy, IMM[0].zzzz 43: MOV TEMP[6].z, IMM[0].yyyy 44: MUL TEMP[5].x, IMM[0].zzzz, TEMP[5].xxxx 45: ADD TEMP[7].x, TEMP[4].xxxx, IMM[0].xxxx 46: MOV TEMP[7].y, TEMP[4].yyyy 47: MOV TEMP[8].x, -TEMP[4].xxxx 48: MOV TEMP[8].y, TEMP[4].yyyy 49: MOV TEMP[9].x, TEMP[4].xxxx 50: ADD TEMP[10].x, TEMP[4].yyyy, IMM[0].xxxx 51: MOV TEMP[9].y, TEMP[10].xxxx 52: MOV TEMP[10].x, TEMP[4].xxxx 53: MOV TEMP[10].y, -TEMP[4].yyyy 54: MAD TEMP[4].xy, IMM[0].ywww, TEMP[5].xxxx, TEMP[7].xyyy 55: MOV TEMP[4].xy, TEMP[4].xyyy 56: TEX TEMP[4], TEMP[4], SAMP[2], 2D 57: MAD TEMP[7].xy, IMM[0].ywww, TEMP[5].xxxx, TEMP[8].xyyy 58: MOV TEMP[7].xy, TEMP[7].xyyy 59: TEX TEMP[7], TEMP[7], SAMP[2], 2D 60: ADD TEMP[4], TEMP[4], TEMP[7] 61: MAD TEMP[7].xy, IMM[0].wyyy, TEMP[5].xxxx, TEMP[9].xyyy 62: MOV TEMP[7].xy, TEMP[7].xyyy 63: TEX TEMP[7], TEMP[7], SAMP[2], 2D 64: MAD TEMP[5].xy, IMM[0].wyyy, TEMP[5].xxxx, TEMP[10].xyyy 65: MOV TEMP[5].xy, TEMP[5].xyyy 66: TEX TEMP[5], TEMP[5], SAMP[2], 2D 67: ADD TEMP[5], TEMP[7], TEMP[5] 68: ADD TEMP[4].xy, TEMP[4], TEMP[5] 69: MAD TEMP[5].xy, TEMP[4].xyyy, IMM[0].xxxx, IMM[1].xxxx 70: MUL TEMP[4].xy, TEMP[5].xyyy, IMM[0].zzzz 71: MOV TEMP[4].z, IMM[0].yyyy 72: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz 73: RSQ TEMP[5].x, TEMP[5].xxxx 74: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx 75: DP3 TEMP[5].x, TEMP[6].xyzz, TEMP[6].xyzz 76: RSQ TEMP[5].x, TEMP[5].xxxx 77: MUL TEMP[5].xyz, TEMP[6].xyzz, TEMP[5].xxxx 78: MUL TEMP[6].x, IMM[0].zzzz, CONST[0].xxxx 79: FRC TEMP[6].x, TEMP[6].xxxx 80: LRP TEMP[4].xyz, TEMP[6].xxxx, TEMP[5].xzyy, TEMP[4].xzyy 81: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz 82: RSQ TEMP[5].x, TEMP[5].xxxx 83: MOV TEMP[6].xy, TEMP[0].xyyy 84: MOV TEMP[6].w, IN[0].wwww 85: TXP TEMP[6].xyz, TEMP[6], SAMP[5], 2D 86: ADD TEMP[6].xyz, TEMP[6].xzyy, IMM[1].yxyy 87: MAD TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx, TEMP[6].xyzz 88: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz 89: RSQ TEMP[5].x, TEMP[5].xxxx 90: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx 91: MOV TEMP[5].x, -IN[0].wwww 92: MOV TEMP[6].xy, TEMP[0].xyyy 93: MOV TEMP[6].w, IN[0].wwww 94: TXP TEMP[6].x, TEMP[6], SAMP[1], 2D 95: MAD TEMP[6].x, CONST[3].zzzz, TEMP[6].xxxx, CONST[3].wwww 96: RCP TEMP[6].x, TEMP[6].xxxx 97: ADD TEMP[6].x, TEMP[6].xxxx, TEMP[5].xxxx 98: MAX TEMP[7].x, TEMP[6].xxxx, IMM[0].wwww 99: MUL TEMP[7].x, TEMP[7].xxxx, CONST[20].xxxx 100: POW TEMP[7].x, TEMP[7].xxxx, IMM[0].xxxx 101: MOV_SAT TEMP[7].x, TEMP[7].xxxx 102: MOV TEMP[8].zw, IN[0].wwzw 103: MUL TEMP[9].xy, TEMP[4].xzzz, CONST[19].xxxx 104: MOV_SAT TEMP[10].x, TEMP[7].xxxx 105: MUL TEMP[10].xy, CONST[8].xyyy, TEMP[10].xxxx 106: POW TEMP[11].x, IN[0].zzzz, IMM[1].zzzz 107: MUL TEMP[10].xy, TEMP[10].xyyy, TEMP[11].xxxx 108: MAD TEMP[8].xy, TEMP[9].xyyy, TEMP[10].xyyy, IN[0].xyyy 109: MUL TEMP[9].xyw, TEMP[8], IMM[0].xxxx 110: ADD TEMP[9].xy, TEMP[9].xyyy, TEMP[9].wwww 111: MOV TEMP[9].zw, IN[0].wwzw 112: MUL TEMP[8].xyw, TEMP[8], IMM[0].xxxx 113: MOV TEMP[10].x, TEMP[8].xxxx 114: MUL TEMP[11].x, TEMP[8].yyyy, CONST[2].xxxx 115: MOV TEMP[10].y, TEMP[11].xxxx 116: ADD TEMP[8].xy, TEMP[10].xyyy, TEMP[8].wwww 117: MOV TEMP[8].xy, TEMP[8].xyyy 118: MOV TEMP[8].w, IN[0].wwww 119: TXP TEMP[8].x, TEMP[8], SAMP[1], 2D 120: MAD TEMP[8].x, CONST[3].zzzz, TEMP[8].xxxx, CONST[3].wwww 121: RCP TEMP[8].x, TEMP[8].xxxx 122: ADD TEMP[5].x, TEMP[8].xxxx, TEMP[5].xxxx 123: MAX TEMP[5].x, TEMP[5].xxxx, IMM[0].wwww 124: MUL TEMP[5].x, TEMP[5].xxxx, CONST[20].xxxx 125: POW TEMP[5].x, TEMP[5].xxxx, IMM[0].xxxx 126: MOV_SAT TEMP[5].x, TEMP[5].xxxx 127: MUL TEMP[8].x, TEMP[7].xxxx, IMM[1].wwww 128: FSLT TEMP[8].x, TEMP[8].xxxx, TEMP[5].xxxx 129: UIF TEMP[8].xxxx :0 130: MOV TEMP[8].xyw, TEMP[9] 131: ELSE :0 132: MOV TEMP[8].xyw, TEMP[1] 133: ENDIF 134: MOV TEMP[1].xy, TEMP[8].xyyy 135: MOV TEMP[1].w, TEMP[8].wwww 136: TXP TEMP[1].xyz, TEMP[1], SAMP[0], 2D 137: MUL TEMP[8].x, TEMP[7].xxxx, IMM[1].wwww 138: FSLT TEMP[8].x, TEMP[8].xxxx, TEMP[5].xxxx 139: UIF TEMP[8].xxxx :0 140: MOV TEMP[5].x, TEMP[5].xxxx 141: ELSE :0 142: MOV TEMP[5].x, TEMP[7].xxxx 143: ENDIF 144: MOV TEMP[7].zw, IMM[0].wwww 145: MUL TEMP[8].xy, TEMP[4].xzzz, IMM[2].xxxx 146: MUL TEMP[9].x, TEMP[2].xxxx, IMM[2].yyyy 147: MOV_SAT TEMP[9].x, TEMP[9].xxxx 148: MUL TEMP[7].xy, TEMP[8].xyyy, TEMP[9].xxxx 149: ADD TEMP[8].x, TEMP[6].xxxx, TEMP[4].xxxx 150: RCP TEMP[9].x, CONST[21].xxxx 151: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[9].xxxx 152: MUL TEMP[9].x, CONST[0].xxxx, IMM[2].zzzz 153: ADD TEMP[10].x, IN[1].yyyy, IN[1].wwww 154: MAD TEMP[11].x, TEMP[9].xxxx, CONST[21].zzzz, TEMP[8].xxxx 155: MAD TEMP[10].x, TEMP[10].xxxx, IMM[2].wwww, TEMP[11].xxxx 156: MUL TEMP[10].x, TEMP[10].xxxx, IMM[3].xxxx 157: ADD TEMP[11].x, IN[1].yyyy, TEMP[9].xxxx 158: MOV TEMP[11].y, IN[1].wwww 159: ADD TEMP[9].x, -IN[1].yyyy, TEMP[9].xxxx 160: MOV TEMP[9].y, IN[1].wwww 161: DP3 TEMP[12].x, TEMP[4].xyzz, TEMP[4].xyzz 162: RSQ TEMP[12].x, TEMP[12].xxxx 163: MUL TEMP[12].xyz, TEMP[4].xyzz, TEMP[12].xxxx 164: DP4 TEMP[13].x, CONST[22], CONST[22] 165: RSQ TEMP[13].x, TEMP[13].xxxx 166: MUL TEMP[13].xyz, CONST[22], TEMP[13].xxxx 167: ADD TEMP[0].xyw, TEMP[0], TEMP[7] 168: DP3 TEMP[4].x, -TEMP[3].xyzz, TEMP[4].xyzz 169: MOV_SAT TEMP[4].x, TEMP[4].xxxx 170: ADD TEMP[4].x, IMM[0].yyyy, -TEMP[4].xxxx 171: MOV_SAT TEMP[7].x, -TEMP[3].yyyy 172: ADD TEMP[7].x, IMM[0].yyyy, -TEMP[7].xxxx 173: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[7].xxxx 174: DP3 TEMP[14].x, TEMP[12].xyzz, TEMP[13].xyzz 175: MUL TEMP[12].xyz, TEMP[14].xxxx, TEMP[12].xyzz 176: MUL TEMP[12].xyz, IMM[0].zzzz, TEMP[12].xyzz 177: ADD TEMP[12].xyz, TEMP[13].xyzz, -TEMP[12].xyzz 178: DP3 TEMP[3].x, -TEMP[3].xyzz, TEMP[12].xyzz 179: MAX TEMP[3].x, IMM[0].wwww, TEMP[3].xxxx 180: POW TEMP[3].x, TEMP[3].xxxx, CONST[24].xxxx 181: MUL TEMP[3].xyz, CONST[23].xyzz, TEMP[3].xxxx 182: ADD TEMP[12].x, -TEMP[13].yyyy, IMM[3].yyyy 183: MUL TEMP[12].x, TEMP[12].xxxx, IMM[2].zzzz 184: MOV_SAT TEMP[12].x, TEMP[12].xxxx 185: MUL TEMP[12].x, IMM[3].xxxx, TEMP[12].xxxx 186: ADD TEMP[13].x, TEMP[5].xxxx, CONST[20].yyyy 187: MOV_SAT TEMP[13].x, TEMP[13].xxxx 188: LRP TEMP[1].xyz, TEMP[13].xxxx, CONST[14].xyzz, TEMP[1].xyzz 189: MUL TEMP[11].xy, TEMP[11].xyyy, CONST[21].yyyy 190: MOV TEMP[11].xy, TEMP[11].xyyy 191: TEX TEMP[11].x, TEMP[11], SAMP[4], 2D 192: MUL TEMP[9].xy, TEMP[9].xyyy, CONST[21].yyyy 193: MOV TEMP[9].xy, TEMP[9].xyyy 194: TEX TEMP[9].x, TEMP[9], SAMP[4], 2D 195: ADD TEMP[9].x, TEMP[11].xxxx, TEMP[9].xxxx 196: MUL TEMP[9].x, TEMP[9].xxxx, IMM[0].xxxx 197: ADD TEMP[8].x, IMM[0].yyyy, -TEMP[8].xxxx 198: MOV_SAT TEMP[8].x, TEMP[8].xxxx 199: POW TEMP[8].x, TEMP[8].xxxx, IMM[3].zzzz 200: FLR TEMP[11].x, TEMP[10].xxxx 201: FRC TEMP[10].x, TEMP[10].xxxx 202: POW TEMP[10].x, TEMP[10].xxxx, IMM[3].zzzz 203: ADD TEMP[10].x, TEMP[11].xxxx, TEMP[10].xxxx 204: MUL TEMP[10].x, TEMP[10].xxxx, IMM[3].wwww 205: SIN TEMP[10].x, TEMP[10].xxxx 206: ADD TEMP[10].x, TEMP[10].xxxx, IMM[4].xxxx 207: MUL TEMP[10].x, TEMP[10].xxxx, IMM[4].yyyy 208: POW TEMP[10].x, TEMP[10].xxxx, IMM[3].xxxx 209: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[10].xxxx 210: MOV_SAT TEMP[8].x, TEMP[8].xxxx 211: MUL TEMP[8].x, TEMP[9].xxxx, TEMP[8].xxxx 212: MUL TEMP[8].x, TEMP[8].xxxx, CONST[21].wwww 213: MUL TEMP[9].x, TEMP[2].xxxx, IMM[2].yyyy 214: ADD TEMP[9].x, IMM[0].yyyy, -TEMP[9].xxxx 215: MAX TEMP[9].x, TEMP[9].xxxx, IMM[4].zzzz 216: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[9].xxxx 217: MOV_SAT TEMP[8].x, TEMP[8].xxxx 218: LRP TEMP[1].xyz, TEMP[8].xxxx, CONST[15].xyzz, TEMP[1].xyzz 219: MOV TEMP[8].xy, TEMP[0].xyyy 220: MOV TEMP[8].w, TEMP[0].wwww 221: TXP TEMP[0].xyz, TEMP[8], SAMP[3], 2D 222: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[4].xxxx 223: LRP TEMP[4].xyz, TEMP[4].xxxx, CONST[17], CONST[16] 224: LRP TEMP[0].xyz, CONST[18].xxxx, TEMP[4].xyzz, TEMP[0].xyzz 225: MAD TEMP[4].x, CONST[17].zzzz, IMM[4].wwww, IMM[5].xxxx 226: MOV_SAT TEMP[4].x, TEMP[4].xxxx 227: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[4].xxxx 228: MUL TEMP[4].x, TEMP[7].xxxx, TEMP[7].xxxx 229: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[5].xxxx 230: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx 231: LRP TEMP[4].x, IMM[5].xxxx, IMM[0].yyyy, TEMP[4].xxxx 232: MOV_SAT TEMP[4].x, TEMP[4].xxxx 233: MUL TEMP[2].x, TEMP[2].xxxx, IMM[5].yyyy 234: MOV_SAT TEMP[2].x, TEMP[2].xxxx 235: MUL TEMP[2].x, TEMP[4].xxxx, TEMP[2].xxxx 236: LRP TEMP[0].xyz, TEMP[2].xxxx, TEMP[0].xyzz, TEMP[1].xyzz 237: MAD TEMP[0].xyz, TEMP[3].xyzz, TEMP[12].xxxx, TEMP[0].xyzz 238: MUL TEMP[1].x, TEMP[6].xxxx, IMM[0].zzzz 239: MOV_SAT TEMP[1].x, TEMP[1].xxxx 240: MOV TEMP[1].w, TEMP[1].xxxx 241: MAD TEMP[2].x, IN[1].xxxx, CONST[5].zzzz, CONST[5].wwww 242: MOV_SAT TEMP[2].x, TEMP[2].xxxx 243: LRP TEMP[1].xyz, TEMP[2].xxxx, TEMP[0].xyzz, CONST[4].xyzz 244: MOV OUT[0], TEMP[1] 245: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 232) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 244) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 248) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 260) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 264) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 272) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 276) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 280) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 288) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 304) %53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 320) %54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 324) %55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 336) %56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 340) %57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 344) %58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 348) %59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 352) %60 = call float @llvm.SI.load.const(<16 x i8> %23, i32 356) %61 = call float @llvm.SI.load.const(<16 x i8> %23, i32 360) %62 = call float @llvm.SI.load.const(<16 x i8> %23, i32 364) %63 = call float @llvm.SI.load.const(<16 x i8> %23, i32 368) %64 = call float @llvm.SI.load.const(<16 x i8> %23, i32 372) %65 = call float @llvm.SI.load.const(<16 x i8> %23, i32 376) %66 = call float @llvm.SI.load.const(<16 x i8> %23, i32 384) %67 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %68 = load <32 x i8>, <32 x i8> addrspace(2)* %67, align 32, !tbaa !0 %69 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %70 = load <16 x i8>, <16 x i8> addrspace(2)* %69, align 16, !tbaa !0 %71 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %72 = load <8 x i32>, <8 x i32> addrspace(2)* %71, align 32, !tbaa !0 %73 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %74 = load <4 x i32>, <4 x i32> addrspace(2)* %73, align 16, !tbaa !0 %75 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %76 = load <8 x i32>, <8 x i32> addrspace(2)* %75, align 32, !tbaa !0 %77 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %78 = load <4 x i32>, <4 x i32> addrspace(2)* %77, align 16, !tbaa !0 %79 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %80 = bitcast <8 x i32> addrspace(2)* %79 to <32 x i8> addrspace(2)* %81 = load <32 x i8>, <32 x i8> addrspace(2)* %80, align 32, !tbaa !0 %82 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %83 = bitcast <4 x i32> addrspace(2)* %82 to <16 x i8> addrspace(2)* %84 = load <16 x i8>, <16 x i8> addrspace(2)* %83, align 16, !tbaa !0 %85 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %86 = load <8 x i32>, <8 x i32> addrspace(2)* %85, align 32, !tbaa !0 %87 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %88 = load <4 x i32>, <4 x i32> addrspace(2)* %87, align 16, !tbaa !0 %89 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5 %90 = bitcast <8 x i32> addrspace(2)* %89 to <32 x i8> addrspace(2)* %91 = load <32 x i8>, <32 x i8> addrspace(2)* %90, align 32, !tbaa !0 %92 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5 %93 = bitcast <4 x i32> addrspace(2)* %92 to <16 x i8> addrspace(2)* %94 = load <16 x i8>, <16 x i8> addrspace(2)* %93, align 16, !tbaa !0 %95 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %96 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %97 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %98 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %99 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %100 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %101 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %102 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %103 = fmul float %95, 5.000000e-01 %104 = fmul float %96, 5.000000e-01 %105 = fmul float %98, 5.000000e-01 %106 = fadd float %103, %105 %107 = fadd float %104, %105 %108 = fmul float %104, %28 %109 = fadd float %103, %105 %110 = fadd float %108, %105 %111 = fsub float %100, %25 %112 = fsub float %101, %26 %113 = fsub float %102, %27 %114 = fmul float %111, %111 %115 = fmul float %112, %112 %116 = fadd float %115, %114 %117 = fmul float %113, %113 %118 = fadd float %116, %117 %119 = call float @llvm.AMDGPU.rsq.clamped.f32(float %118) %120 = fmul float %111, %119 %121 = fmul float %112, %119 %122 = fmul float %113, %119 %123 = fmul float %111, %111 %124 = fmul float %112, %112 %125 = fadd float %124, %123 %126 = fmul float %113, %113 %127 = fadd float %125, %126 %128 = call float @llvm.sqrt.f32(float %127) %129 = fmul float %100, %38 %130 = fmul float %102, %38 %131 = fmul float %24, 5.000000e-01 %132 = fmul float %131, 2.000000e+00 %133 = fadd float %129, 5.000000e-01 %134 = fadd float %130, 5.000000e-01 %135 = fadd float %132, %133 %136 = fmul float %132, 0.000000e+00 %137 = fadd float %136, %130 %138 = bitcast float %135 to i32 %139 = bitcast float %137 to i32 %140 = insertelement <2 x i32> undef, i32 %138, i32 0 %141 = insertelement <2 x i32> %140, i32 %139, i32 1 %142 = bitcast <8 x i32> %76 to <32 x i8> %143 = bitcast <4 x i32> %78 to <16 x i8> %144 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %141, <32 x i8> %142, <16 x i8> %143, i32 2) %145 = extractelement <4 x float> %144, i32 0 %146 = extractelement <4 x float> %144, i32 1 %147 = fsub float %132, %129 %148 = fmul float %132, 0.000000e+00 %149 = fadd float %148, %130 %150 = bitcast float %147 to i32 %151 = bitcast float %149 to i32 %152 = insertelement <2 x i32> undef, i32 %150, i32 0 %153 = insertelement <2 x i32> %152, i32 %151, i32 1 %154 = bitcast <8 x i32> %76 to <32 x i8> %155 = bitcast <4 x i32> %78 to <16 x i8> %156 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %153, <32 x i8> %154, <16 x i8> %155, i32 2) %157 = extractelement <4 x float> %156, i32 0 %158 = extractelement <4 x float> %156, i32 1 %159 = fadd float %145, %157 %160 = fadd float %146, %158 %161 = fmul float %132, 0.000000e+00 %162 = fadd float %161, %129 %163 = fadd float %132, %134 %164 = bitcast float %162 to i32 %165 = bitcast float %163 to i32 %166 = insertelement <2 x i32> undef, i32 %164, i32 0 %167 = insertelement <2 x i32> %166, i32 %165, i32 1 %168 = bitcast <8 x i32> %76 to <32 x i8> %169 = bitcast <4 x i32> %78 to <16 x i8> %170 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %167, <32 x i8> %168, <16 x i8> %169, i32 2) %171 = extractelement <4 x float> %170, i32 0 %172 = extractelement <4 x float> %170, i32 1 %173 = fmul float %132, 0.000000e+00 %174 = fadd float %173, %129 %175 = fsub float %132, %130 %176 = bitcast float %174 to i32 %177 = bitcast float %175 to i32 %178 = insertelement <2 x i32> undef, i32 %176, i32 0 %179 = insertelement <2 x i32> %178, i32 %177, i32 1 %180 = bitcast <8 x i32> %76 to <32 x i8> %181 = bitcast <4 x i32> %78 to <16 x i8> %182 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %179, <32 x i8> %180, <16 x i8> %181, i32 2) %183 = extractelement <4 x float> %182, i32 0 %184 = extractelement <4 x float> %182, i32 1 %185 = fadd float %171, %183 %186 = fadd float %172, %184 %187 = fadd float %159, %185 %188 = fadd float %160, %186 %189 = fmul float %187, 5.000000e-01 %190 = fadd float %189, -1.000000e+00 %191 = fmul float %188, 5.000000e-01 %192 = fadd float %191, -1.000000e+00 %193 = fmul float %190, 2.000000e+00 %194 = fmul float %192, 2.000000e+00 %195 = fmul float %131, 2.000000e+00 %196 = fadd float %129, 5.000000e-01 %197 = fadd float %130, 5.000000e-01 %198 = fadd float %195, %196 %199 = fmul float %195, 0.000000e+00 %200 = fadd float %199, %130 %201 = bitcast float %198 to i32 %202 = bitcast float %200 to i32 %203 = insertelement <2 x i32> undef, i32 %201, i32 0 %204 = insertelement <2 x i32> %203, i32 %202, i32 1 %205 = bitcast <8 x i32> %76 to <32 x i8> %206 = bitcast <4 x i32> %78 to <16 x i8> %207 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %204, <32 x i8> %205, <16 x i8> %206, i32 2) %208 = extractelement <4 x float> %207, i32 0 %209 = extractelement <4 x float> %207, i32 1 %210 = fsub float %195, %129 %211 = fmul float %195, 0.000000e+00 %212 = fadd float %211, %130 %213 = bitcast float %210 to i32 %214 = bitcast float %212 to i32 %215 = insertelement <2 x i32> undef, i32 %213, i32 0 %216 = insertelement <2 x i32> %215, i32 %214, i32 1 %217 = bitcast <8 x i32> %76 to <32 x i8> %218 = bitcast <4 x i32> %78 to <16 x i8> %219 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %216, <32 x i8> %217, <16 x i8> %218, i32 2) %220 = extractelement <4 x float> %219, i32 0 %221 = extractelement <4 x float> %219, i32 1 %222 = fadd float %208, %220 %223 = fadd float %209, %221 %224 = fmul float %195, 0.000000e+00 %225 = fadd float %224, %129 %226 = fadd float %195, %197 %227 = bitcast float %225 to i32 %228 = bitcast float %226 to i32 %229 = insertelement <2 x i32> undef, i32 %227, i32 0 %230 = insertelement <2 x i32> %229, i32 %228, i32 1 %231 = bitcast <8 x i32> %76 to <32 x i8> %232 = bitcast <4 x i32> %78 to <16 x i8> %233 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %230, <32 x i8> %231, <16 x i8> %232, i32 2) %234 = extractelement <4 x float> %233, i32 0 %235 = extractelement <4 x float> %233, i32 1 %236 = fmul float %195, 0.000000e+00 %237 = fadd float %236, %129 %238 = fsub float %195, %130 %239 = bitcast float %237 to i32 %240 = bitcast float %238 to i32 %241 = insertelement <2 x i32> undef, i32 %239, i32 0 %242 = insertelement <2 x i32> %241, i32 %240, i32 1 %243 = bitcast <8 x i32> %76 to <32 x i8> %244 = bitcast <4 x i32> %78 to <16 x i8> %245 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %242, <32 x i8> %243, <16 x i8> %244, i32 2) %246 = extractelement <4 x float> %245, i32 0 %247 = extractelement <4 x float> %245, i32 1 %248 = fadd float %234, %246 %249 = fadd float %235, %247 %250 = fadd float %222, %248 %251 = fadd float %223, %249 %252 = fmul float %250, 5.000000e-01 %253 = fadd float %252, -1.000000e+00 %254 = fmul float %251, 5.000000e-01 %255 = fadd float %254, -1.000000e+00 %256 = fmul float %253, 2.000000e+00 %257 = fmul float %255, 2.000000e+00 %258 = fmul float %256, %256 %259 = fmul float %257, %257 %260 = fadd float %259, %258 %261 = fadd float %260, 1.000000e+00 %262 = call float @llvm.AMDGPU.rsq.clamped.f32(float %261) %263 = fmul float %256, %262 %264 = fmul float %257, %262 %265 = fmul float %193, %193 %266 = fmul float %194, %194 %267 = fadd float %266, %265 %268 = fadd float %267, 1.000000e+00 %269 = call float @llvm.AMDGPU.rsq.clamped.f32(float %268) %270 = fmul float %193, %269 %271 = fmul float %194, %269 %272 = fmul float %24, 2.000000e+00 %273 = call float @llvm.floor.f32(float %272) %274 = fsub float %272, %273 %275 = call float @llvm.AMDGPU.lrp(float %274, float %270, float %263) %276 = call float @llvm.AMDGPU.lrp(float %274, float %269, float %262) %277 = call float @llvm.AMDGPU.lrp(float %274, float %271, float %264) %278 = fmul float %275, %275 %279 = fmul float %276, %276 %280 = fadd float %279, %278 %281 = fmul float %277, %277 %282 = fadd float %280, %281 %283 = call float @llvm.AMDGPU.rsq.clamped.f32(float %282) %284 = fdiv float %109, %98 %285 = fdiv float %110, %98 %286 = bitcast float %284 to i32 %287 = bitcast float %285 to i32 %288 = insertelement <2 x i32> undef, i32 %286, i32 0 %289 = insertelement <2 x i32> %288, i32 %287, i32 1 %290 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %289, <32 x i8> %91, <16 x i8> %94, i32 2) %291 = extractelement <4 x float> %290, i32 0 %292 = extractelement <4 x float> %290, i32 1 %293 = extractelement <4 x float> %290, i32 2 %294 = fadd float %291, -5.000000e-01 %295 = fadd float %293, -1.000000e+00 %296 = fadd float %292, -5.000000e-01 %297 = fmul float %275, %283 %298 = fadd float %297, %294 %299 = fmul float %276, %283 %300 = fadd float %299, %295 %301 = fmul float %277, %283 %302 = fadd float %301, %296 %303 = fmul float %298, %298 %304 = fmul float %300, %300 %305 = fadd float %304, %303 %306 = fmul float %302, %302 %307 = fadd float %305, %306 %308 = call float @llvm.AMDGPU.rsq.clamped.f32(float %307) %309 = fmul float %298, %308 %310 = fmul float %300, %308 %311 = fmul float %302, %308 %312 = fdiv float %109, %98 %313 = fdiv float %110, %98 %314 = bitcast float %312 to i32 %315 = bitcast float %313 to i32 %316 = insertelement <2 x i32> undef, i32 %314, i32 0 %317 = insertelement <2 x i32> %316, i32 %315, i32 1 %318 = bitcast <8 x i32> %72 to <32 x i8> %319 = bitcast <4 x i32> %74 to <16 x i8> %320 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %317, <32 x i8> %318, <16 x i8> %319, i32 2) %321 = extractelement <4 x float> %320, i32 0 %322 = fmul float %29, %321 %323 = fadd float %322, %30 %324 = fdiv float 1.000000e+00, %323 %325 = fsub float %324, %98 %326 = call float @llvm.maxnum.f32(float %325, float 0.000000e+00) %327 = fmul float %326, %53 %sqrtf = call float @sqrtf(float %327) #1 %fabsf = call float @fabsf(float %sqrtf) #1 %328 = fcmp oeq float %327, 0xFFF0000000000000 %329 = select i1 %328, float 0x7FF0000000000000, float %fabsf %330 = call float @llvm.AMDIL.clamp.(float %329, float 0.000000e+00, float 1.000000e+00) %331 = fmul float %309, %52 %332 = fmul float %311, %52 %333 = call float @llvm.AMDIL.clamp.(float %330, float 0.000000e+00, float 1.000000e+00) %334 = fmul float %36, %333 %335 = fmul float %37, %333 %336 = call float @llvm.pow.f32(float %97, float 0x3FE99999A0000000) %337 = fmul float %334, %336 %338 = fmul float %335, %336 %339 = fmul float %331, %337 %340 = fadd float %339, %95 %341 = fmul float %332, %338 %342 = fadd float %341, %96 %343 = fmul float %340, 5.000000e-01 %344 = fmul float %342, 5.000000e-01 %345 = fmul float %98, 5.000000e-01 %346 = fadd float %343, %345 %347 = fadd float %344, %345 %348 = fmul float %340, 5.000000e-01 %349 = fmul float %342, 5.000000e-01 %350 = fmul float %98, 5.000000e-01 %351 = fmul float %349, %28 %352 = fadd float %348, %350 %353 = fadd float %351, %350 %354 = fdiv float %352, %98 %355 = fdiv float %353, %98 %356 = bitcast float %354 to i32 %357 = bitcast float %355 to i32 %358 = insertelement <2 x i32> undef, i32 %356, i32 0 %359 = insertelement <2 x i32> %358, i32 %357, i32 1 %360 = bitcast <8 x i32> %72 to <32 x i8> %361 = bitcast <4 x i32> %74 to <16 x i8> %362 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %359, <32 x i8> %360, <16 x i8> %361, i32 2) %363 = extractelement <4 x float> %362, i32 0 %364 = fmul float %29, %363 %365 = fadd float %364, %30 %366 = fdiv float 1.000000e+00, %365 %367 = fsub float %366, %98 %368 = call float @llvm.maxnum.f32(float %367, float 0.000000e+00) %369 = fmul float %368, %53 %sqrtf64 = call float @sqrtf(float %369) #1 %fabsf65 = call float @fabsf(float %sqrtf64) #1 %370 = fcmp oeq float %369, 0xFFF0000000000000 %371 = select i1 %370, float 0x7FF0000000000000, float %fabsf65 %372 = call float @llvm.AMDIL.clamp.(float %371, float 0.000000e+00, float 1.000000e+00) %373 = fmul float %330, 0x3FE6666660000000 %374 = fcmp olt float %373, %372 %. = select i1 %374, float %346, float %106 %.63 = select i1 %374, float %347, float %107 %375 = fdiv float %., %98 %376 = fdiv float %.63, %98 %377 = bitcast float %375 to i32 %378 = bitcast float %376 to i32 %379 = insertelement <2 x i32> undef, i32 %377, i32 0 %380 = insertelement <2 x i32> %379, i32 %378, i32 1 %381 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %380, <32 x i8> %68, <16 x i8> %70, i32 2) %382 = extractelement <4 x float> %381, i32 0 %383 = extractelement <4 x float> %381, i32 1 %384 = extractelement <4 x float> %381, i32 2 %385 = fmul float %330, 0x3FE6666660000000 %386 = fcmp olt float %385, %372 %temp20.0 = select i1 %386, float %372, float %330 %387 = fmul float %309, 8.000000e+00 %388 = fmul float %311, 8.000000e+00 %389 = fmul float %128, 0x3F847AE140000000 %390 = call float @llvm.AMDIL.clamp.(float %389, float 0.000000e+00, float 1.000000e+00) %391 = fmul float %387, %390 %392 = fmul float %388, %390 %393 = fadd float %325, %309 %394 = fdiv float 1.000000e+00, %55 %395 = fmul float %393, %394 %396 = fmul float %24, 2.000000e+01 %397 = fadd float %100, %102 %398 = fmul float %396, %57 %399 = fadd float %398, %395 %400 = fmul float %397, 0x3F747AE140000000 %401 = fadd float %400, %399 %402 = fmul float %401, 3.000000e+00 %403 = fadd float %100, %396 %404 = fsub float %396, %100 %405 = fmul float %309, %309 %406 = fmul float %310, %310 %407 = fadd float %406, %405 %408 = fmul float %311, %311 %409 = fadd float %407, %408 %410 = call float @llvm.AMDGPU.rsq.clamped.f32(float %409) %411 = fmul float %309, %410 %412 = fmul float %310, %410 %413 = fmul float %311, %410 %414 = fmul float %59, %59 %415 = fmul float %60, %60 %416 = fadd float %414, %415 %417 = fmul float %61, %61 %418 = fadd float %416, %417 %419 = fmul float %62, %62 %420 = fadd float %418, %419 %421 = call float @llvm.AMDGPU.rsq.clamped.f32(float %420) %422 = fmul float %59, %421 %423 = fmul float %60, %421 %424 = fmul float %61, %421 %425 = fadd float %109, %391 %426 = fadd float %110, %392 %427 = fadd float %98, 0.000000e+00 %428 = fmul float %120, %309 %429 = fsub float -0.000000e+00, %428 %430 = fmul float %121, %310 %431 = fsub float %429, %430 %432 = fmul float %122, %311 %433 = fsub float %431, %432 %434 = call float @llvm.AMDIL.clamp.(float %433, float 0.000000e+00, float 1.000000e+00) %435 = fsub float 1.000000e+00, %434 %436 = fsub float -0.000000e+00, %121 %437 = call float @llvm.AMDIL.clamp.(float %436, float 0.000000e+00, float 1.000000e+00) %438 = fsub float 1.000000e+00, %437 %439 = fmul float %438, %438 %440 = fmul float %411, %422 %441 = fmul float %412, %423 %442 = fadd float %441, %440 %443 = fmul float %413, %424 %444 = fadd float %442, %443 %445 = fmul float %444, %411 %446 = fmul float %444, %412 %447 = fmul float %444, %413 %448 = fmul float %445, 2.000000e+00 %449 = fmul float %446, 2.000000e+00 %450 = fmul float %447, 2.000000e+00 %451 = fsub float %422, %448 %452 = fsub float %423, %449 %453 = fsub float %424, %450 %454 = fmul float %120, %451 %455 = fsub float -0.000000e+00, %454 %456 = fmul float %121, %452 %457 = fsub float %455, %456 %458 = fmul float %122, %453 %459 = fsub float %457, %458 %460 = call float @llvm.maxnum.f32(float %459, float 0.000000e+00) %461 = call float @llvm.pow.f32(float %460, float %66) %462 = fmul float %63, %461 %463 = fmul float %64, %461 %464 = fmul float %65, %461 %465 = fsub float 0x3FA99999A0000000, %423 %466 = fmul float %465, 2.000000e+01 %467 = call float @llvm.AMDIL.clamp.(float %466, float 0.000000e+00, float 1.000000e+00) %468 = fmul float %467, 3.000000e+00 %469 = fadd float %temp20.0, %54 %470 = call float @llvm.AMDIL.clamp.(float %469, float 0.000000e+00, float 1.000000e+00) %471 = call float @llvm.AMDGPU.lrp(float %470, float %39, float %382) %472 = call float @llvm.AMDGPU.lrp(float %470, float %40, float %383) %473 = call float @llvm.AMDGPU.lrp(float %470, float %41, float %384) %474 = fmul float %403, %56 %475 = fmul float %102, %56 %476 = bitcast float %474 to i32 %477 = bitcast float %475 to i32 %478 = insertelement <2 x i32> undef, i32 %476, i32 0 %479 = insertelement <2 x i32> %478, i32 %477, i32 1 %480 = bitcast <8 x i32> %86 to <32 x i8> %481 = bitcast <4 x i32> %88 to <16 x i8> %482 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %479, <32 x i8> %480, <16 x i8> %481, i32 2) %483 = extractelement <4 x float> %482, i32 0 %484 = fmul float %404, %56 %485 = fmul float %102, %56 %486 = bitcast float %484 to i32 %487 = bitcast float %485 to i32 %488 = insertelement <2 x i32> undef, i32 %486, i32 0 %489 = insertelement <2 x i32> %488, i32 %487, i32 1 %490 = bitcast <8 x i32> %86 to <32 x i8> %491 = bitcast <4 x i32> %88 to <16 x i8> %492 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %489, <32 x i8> %490, <16 x i8> %491, i32 2) %493 = extractelement <4 x float> %492, i32 0 %494 = fadd float %483, %493 %495 = fmul float %494, 5.000000e-01 %496 = fsub float 1.000000e+00, %395 %497 = call float @llvm.AMDIL.clamp.(float %496, float 0.000000e+00, float 1.000000e+00) %498 = call float @llvm.pow.f32(float %497, float 0x3FD3333340000000) %499 = call float @llvm.floor.f32(float %402) %500 = call float @llvm.floor.f32(float %402) %501 = fsub float %402, %500 %502 = call float @llvm.pow.f32(float %501, float 0x3FD3333340000000) %503 = fadd float %499, %502 %504 = fmul float %503, 0x400921FB80000000 %505 = call float @llvm.sin.f32(float %504) %506 = fadd float %505, 0xBFD3333340000000 %507 = fmul float %506, 1.500000e+00 %508 = call float @llvm.pow.f32(float %507, float 3.000000e+00) %509 = fmul float %498, %508 %510 = call float @llvm.AMDIL.clamp.(float %509, float 0.000000e+00, float 1.000000e+00) %511 = fmul float %495, %510 %512 = fmul float %511, %58 %513 = fmul float %128, 0x3F847AE140000000 %514 = fsub float 1.000000e+00, %513 %515 = call float @llvm.maxnum.f32(float %514, float 0x3FE3333340000000) %516 = fmul float %512, %515 %517 = call float @llvm.AMDIL.clamp.(float %516, float 0.000000e+00, float 1.000000e+00) %518 = call float @llvm.AMDGPU.lrp(float %517, float %42, float %471) %519 = call float @llvm.AMDGPU.lrp(float %517, float %43, float %472) %520 = call float @llvm.AMDGPU.lrp(float %517, float %44, float %473) %521 = fdiv float %425, %427 %522 = fdiv float %426, %427 %523 = bitcast float %521 to i32 %524 = bitcast float %522 to i32 %525 = insertelement <2 x i32> undef, i32 %523, i32 0 %526 = insertelement <2 x i32> %525, i32 %524, i32 1 %527 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %526, <32 x i8> %81, <16 x i8> %84, i32 2) %528 = extractelement <4 x float> %527, i32 0 %529 = extractelement <4 x float> %527, i32 1 %530 = extractelement <4 x float> %527, i32 2 %531 = fmul float %435, %435 %532 = call float @llvm.AMDGPU.lrp(float %531, float %48, float %45) %533 = call float @llvm.AMDGPU.lrp(float %531, float %49, float %46) %534 = call float @llvm.AMDGPU.lrp(float %531, float %50, float %47) %535 = call float @llvm.AMDGPU.lrp(float %51, float %532, float %528) %536 = call float @llvm.AMDGPU.lrp(float %51, float %533, float %529) %537 = call float @llvm.AMDGPU.lrp(float %51, float %534, float %530) %538 = fmul float %50, 6.000000e+00 %539 = fadd float %538, 0x3FC99999A0000000 %540 = call float @llvm.AMDIL.clamp.(float %539, float 0.000000e+00, float 1.000000e+00) %541 = fmul float %535, %540 %542 = fmul float %536, %540 %543 = fmul float %537, %540 %544 = fmul float %439, %439 %545 = fmul float %temp20.0, %temp20.0 %546 = fmul float %544, %545 %547 = call float @llvm.AMDGPU.lrp(float 0x3FC99999A0000000, float 1.000000e+00, float %546) %548 = call float @llvm.AMDIL.clamp.(float %547, float 0.000000e+00, float 1.000000e+00) %549 = fmul float %128, 0x3FB99999A0000000 %550 = call float @llvm.AMDIL.clamp.(float %549, float 0.000000e+00, float 1.000000e+00) %551 = fmul float %548, %550 %552 = call float @llvm.AMDGPU.lrp(float %551, float %541, float %518) %553 = call float @llvm.AMDGPU.lrp(float %551, float %542, float %519) %554 = call float @llvm.AMDGPU.lrp(float %551, float %543, float %520) %555 = fmul float %462, %468 %556 = fadd float %555, %552 %557 = fmul float %463, %468 %558 = fadd float %557, %553 %559 = fmul float %464, %468 %560 = fadd float %559, %554 %561 = fmul float %325, 2.000000e+00 %562 = call float @llvm.AMDIL.clamp.(float %561, float 0.000000e+00, float 1.000000e+00) %563 = fmul float %99, %34 %564 = fadd float %563, %35 %565 = call float @llvm.AMDIL.clamp.(float %564, float 0.000000e+00, float 1.000000e+00) %566 = call float @llvm.AMDGPU.lrp(float %565, float %556, float %31) %567 = call float @llvm.AMDGPU.lrp(float %565, float %558, float %32) %568 = call float @llvm.AMDGPU.lrp(float %565, float %560, float %33) %569 = call i32 @llvm.SI.packf16(float %566, float %567) %570 = bitcast i32 %569 to float %571 = call i32 @llvm.SI.packf16(float %568, float %562) %572 = bitcast i32 %571 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %570, float %572, float %570, float %572) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.floor.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.sin.f32(float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) declare float @sqrtf(float) declare float @fabsf(float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300 s_load_dwordx4 s[48:51], s[4:5], 0x0 ; C0980500 v_interp_p1_f32 v3, v0, 0, 0, [m0] ; C80C0000 v_interp_p2_f32 v3, [v3], v1, 0, 0, [m0] ; C80D0001 v_interp_p1_f32 v4, v0, 1, 0, [m0] ; C8100100 v_interp_p2_f32 v4, [v4], v1, 1, 0, [m0] ; C8110101 v_interp_p1_f32 v2, v0, 2, 0, [m0] ; C8080200 v_interp_p2_f32 v2, [v2], v1, 2, 0, [m0] ; C8090201 v_log_f32_e32 v5, v2 ; 7E0A4F02 v_interp_p1_f32 v6, v0, 3, 0, [m0] ; C8180300 v_interp_p2_f32 v6, [v6], v1, 3, 0, [m0] ; C8190301 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[8:11], 0x0 ; C2060900 v_interp_p1_f32 v2, v0, 0, 1, [m0] ; C8080400 v_interp_p2_f32 v2, [v2], v1, 0, 1, [m0] ; C8090401 s_buffer_load_dword s0, s[8:11], 0x4 ; C2000904 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 s_buffer_load_dword s20, s[8:11], 0x5 ; C20A0905 s_buffer_load_dword s21, s[8:11], 0x6 ; C20A8906 s_buffer_load_dword s47, s[8:11], 0x8 ; C2178908 s_buffer_load_dword s45, s[8:11], 0xe ; C216890E s_buffer_load_dword s46, s[8:11], 0xf ; C217090F s_buffer_load_dword s2, s[8:11], 0x10 ; C2010910 s_buffer_load_dword s1, s[8:11], 0x11 ; C2008911 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v8, s0, v7 ; 0A100E00 s_buffer_load_dword s0, s[8:11], 0x12 ; C2000912 s_buffer_load_dword s44, s[8:11], 0x51 ; C2160951 s_buffer_load_dword s17, s[8:11], 0x54 ; C2088954 s_buffer_load_dword s19, s[8:11], 0x55 ; C2098955 s_buffer_load_dword s16, s[8:11], 0x56 ; C2080956 s_buffer_load_dword s3, s[8:11], 0x57 ; C2018957 s_buffer_load_dword s14, s[8:11], 0x58 ; C2070958 s_buffer_load_dword s13, s[8:11], 0x59 ; C2068959 s_buffer_load_dword s15, s[8:11], 0x5a ; C207895A s_buffer_load_dword s18, s[8:11], 0x5b ; C209095B v_interp_p1_f32 v9, v0, 2, 1, [m0] ; C8240600 v_interp_p2_f32 v9, [v9], v1, 2, 1, [m0] ; C8250601 v_subrev_f32_e32 v9, s20, v9 ; 0A121214 v_interp_p1_f32 v0, v0, 3, 1, [m0] ; C8000700 v_interp_p2_f32 v0, [v0], v1, 3, 1, [m0] ; C8010701 v_subrev_f32_e32 v1, s21, v0 ; 0A020015 v_mul_f32_e32 v10, 0.5, v4 ; 101408F0 v_mul_f32_e32 v11, 0.5, v6 ; 10160CF0 v_mad_f32 v10, s47, v10, v11 ; D282000A 042E142F v_mad_f32 v12, 0.5, v3, v11 ; D282000C 042E06F0 v_mad_f32 v13, 0.5, v4, v11 ; D282000D 042E08F0 v_mul_f32_e32 v14, v8, v8 ; 101C1108 v_mac_f32_e32 v14, v9, v9 ; 3E1C1309 v_mac_f32_e32 v14, v1, v1 ; 3E1C0301 v_mul_f32_e64 v15, 0.5, s12 ; D210000F 000018F0 v_mad_f32 v16, 0.5, s12, v15 ; D2820010 043C18F0 v_add_f32_e64 v17, s12, s12 ; D2060011 0000180C v_floor_f32_e32 v17, v17 ; 7E224911 v_mad_f32 v17, 2.0, s12, -v17 ; D2820011 844418F4 v_sub_f32_e32 v18, 1.0, v17 ; 082422F2 v_mov_b32_e32 v19, 0x6f800000 ; 7E2602FF 6F800000 v_mov_b32_e32 v20, 0x2f800000 ; 7E2802FF 2F800000 v_mov_b32_e32 v21, 0x7fffffff ; 7E2A02FF 7FFFFFFF s_buffer_load_dword s53, s[8:11], 0x20 ; C21A8920 s_buffer_load_dword s54, s[8:11], 0x21 ; C21B0921 s_buffer_load_dword s55, s[8:11], 0x34 ; C21B8934 s_buffer_load_dword s84, s[8:11], 0x4c ; C22A094C s_buffer_load_dword s52, s[8:11], 0x50 ; C21A0950 v_cmp_gt_f32_e64 vcc, |v6|, v19 ; D008016A 00022706 s_load_dwordx4 s[56:59], s[4:5], 0x4 ; C09C0504 s_load_dwordx4 s[72:75], s[4:5], 0x8 ; C0A40508 v_cndmask_b32_e32 v22, 1.0, v20 ; 002C28F2 v_mul_f32_e32 v23, v22, v6 ; 102E0D16 v_rcp_f32_e32 v23, v23 ; 7E2E5517 v_mov_b32_e32 v24, s46 ; 7E30022E s_load_dwordx4 s[20:23], s[4:5], 0xc ; C08A050C s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v25, s55, v7 ; 10320E37 v_mad_f32 v26, v0, s55, 0.5 ; D282001A 03C06F00 v_mac_f32_e32 v25, 0, v16 ; 3E322080 v_mac_f32_e32 v26, 2.0, v15 ; 3E341EF4 v_mul_f32_e32 v28, s55, v0 ; 10380037 v_mad_f32 v27, v7, s55, 0.5 ; D282001B 03C06F07 v_mac_f32_e32 v27, 2.0, v15 ; 3E361EF4 v_mac_f32_e32 v28, 0, v16 ; 3E382080 s_load_dwordx4 s[24:27], s[4:5], 0x10 ; C08C0510 s_load_dwordx4 s[60:63], s[4:5], 0x14 ; C09E0514 s_load_dwordx8 s[64:71], s[6:7], 0x28 ; C0E00728 s_load_dwordx8 s[76:83], s[6:7], 0x10 ; C0E60710 s_load_dwordx8 s[28:35], s[6:7], 0x18 ; C0CE0718 s_load_dwordx8 s[36:43], s[6:7], 0x20 ; C0D20720 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[29:30], 3, 0, 0, 0, 0, 0, 0, 0, v[27:28], s[76:83], s[72:75] ; F0800300 02531D1B v_mad_f32 v27, -v7, s55, v16 ; D282001B 24406F07 image_sample v[27:28], 3, 0, 0, 0, 0, 0, 0, 0, v[27:28], s[76:83], s[72:75] ; F0800300 02531B1B image_sample v[31:32], 3, 0, 0, 0, 0, 0, 0, 0, v[25:26], s[76:83], s[72:75] ; F0800300 02531F19 v_mad_f32 v26, -v0, s55, v16 ; D282001A 24406F00 image_sample v[15:16], 3, 0, 0, 0, 0, 0, 0, 0, v[25:26], s[76:83], s[72:75] ; F0800300 02530F19 s_waitcnt vmcnt(2) ; BF8C0772 v_add_f32_e32 v25, v27, v29 ; 06323B1B v_add_f32_e32 v26, v28, v30 ; 06343D1C s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v15, v15, v31 ; 061E3F0F v_add_f32_e32 v16, v16, v32 ; 06204110 v_add_f32_e32 v15, v15, v25 ; 061E330F v_add_f32_e32 v16, v16, v26 ; 06203510 v_mad_f32 v15, 0.5, v15, -1.0 ; D282000F 03CE1EF0 v_mad_f32 v16, 0.5, v16, -1.0 ; D2820010 03CE20F0 v_add_f32_e32 v15, v15, v15 ; 061E1F0F v_add_f32_e32 v16, v16, v16 ; 06202110 v_mad_f32 v25, v15, v15, 1.0 ; D2820019 03CA1F0F v_mac_f32_e32 v25, v16, v16 ; 3E322110 v_rsq_clamp_f32_e32 v25, v25 ; 7E325919 v_mul_f32_e32 v26, v23, v12 ; 10341917 v_mul_f32_e32 v26, v26, v22 ; 10342D1A v_mul_f32_e32 v27, v23, v10 ; 10361517 v_mul_f32_e32 v27, v27, v22 ; 10362D1B image_sample v[28:30], 7, 0, 0, 0, 0, 0, 0, 0, v[26:27], s[64:71], s[60:63] ; F0800700 01F01C1A v_mul_f32_e32 v15, v25, v15 ; 101E1F19 v_mul_f32_e32 v31, v15, v18 ; 103E250F v_mac_f32_e32 v31, v15, v17 ; 3E3E230F v_mul_f32_e32 v15, v25, v16 ; 101E2119 v_mul_f32_e32 v16, v25, v18 ; 10202519 v_mac_f32_e32 v16, v25, v17 ; 3E202319 v_mul_f32_e32 v18, v15, v18 ; 1024250F v_mac_f32_e32 v18, v15, v17 ; 3E24230F v_mul_f32_e32 v15, v31, v31 ; 101E3F1F v_mac_f32_e32 v15, v16, v16 ; 3E1E2110 v_mac_f32_e32 v15, v18, v18 ; 3E1E2512 v_rsq_clamp_f32_e32 v15, v15 ; 7E1E590F s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v17, -0.5, v28 ; 062238F1 v_add_f32_e32 v25, -1.0, v30 ; 06323CF3 s_load_dwordx8 s[60:67], s[6:7], 0x8 ; C0DE0708 v_add_f32_e32 v28, -0.5, v29 ; 06383AF1 v_mac_f32_e32 v17, v15, v31 ; 3E223F0F v_mac_f32_e32 v25, v15, v16 ; 3E32210F v_mac_f32_e32 v28, v15, v18 ; 3E38250F v_mul_f32_e32 v15, v17, v17 ; 101E2311 v_mac_f32_e32 v15, v25, v25 ; 3E1E3319 v_mac_f32_e32 v15, v28, v28 ; 3E1E391C v_rsq_clamp_f32_e32 v15, v15 ; 7E1E590F s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v16, 1, 0, 0, 0, 0, 0, 0, 0, v[26:27], s[60:67], s[56:59] ; F0800100 01CF101A s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v16, s45, v16, v24 ; D2820010 0462202D v_mul_f32_e32 v18, v15, v28 ; 1024390F v_rcp_f32_e32 v16, v16 ; 7E205510 v_mul_f32_e32 v24, v15, v17 ; 1030230F v_mul_f32_e32 v26, s84, v24 ; 10343054 v_mul_f32_e32 v27, s84, v18 ; 10362454 v_subrev_f32_e32 v16, v6, v16 ; 0A202106 v_max_f32_e32 v28, 0, v16 ; 20382080 v_mul_f32_e32 v28, s52, v28 ; 10383834 v_sqrt_f32_e32 v29, v28 ; 7E3A671C v_and_b32_e32 v29, v29, v21 ; 363A2B1D v_mov_b32_e32 v30, 0xff800000 ; 7E3C02FF FF800000 v_mov_b32_e32 v31, 0x7f800000 ; 7E3E02FF 7F800000 v_cmp_eq_f32_e32 vcc, v28, v30 ; 7C043D1C v_cndmask_b32_e32 v28, v29, v31 ; 00383F1D v_add_f32_e64 v28, 0, v28 clamp ; D206081C 00023880 v_add_f32_e64 v29, 0, v28 clamp ; D206081D 00023880 v_mul_f32_e32 v32, s53, v29 ; 10403A35 v_mul_f32_e32 v29, s54, v29 ; 103A3A36 v_mov_b32_e32 v33, 0x3f4ccccd ; 7E4202FF 3F4CCCCD v_mul_legacy_f32_e32 v5, v33, v5 ; 0E0A0B21 v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_mul_f32_e32 v29, v5, v29 ; 103A3B05 v_mac_f32_e32 v4, v29, v27 ; 3E08371D v_mul_f32_e32 v27, 0.5, v4 ; 103608F0 v_mad_f32 v27, s47, v27, v11 ; D282001B 042E362F v_mul_f32_e32 v5, v5, v32 ; 100A4105 v_mac_f32_e32 v3, v5, v26 ; 3E063505 v_mad_f32 v3, 0.5, v3, v11 ; D2820003 042E06F0 v_mul_f32_e32 v5, v23, v27 ; 100A3717 v_mul_f32_e32 v26, v23, v3 ; 10340717 v_mul_f32_e32 v26, v26, v22 ; 10342D1A v_mul_f32_e32 v27, v5, v22 ; 10362D05 image_sample v5, 1, 0, 0, 0, 0, 0, 0, 0, v[26:27], s[60:67], s[56:59] ; F0800100 01CF051A v_mov_b32_e32 v26, s46 ; 7E34022E s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v26, s45, v5 ; 3E340A2D v_rcp_f32_e32 v5, v26 ; 7E0A551A v_subrev_f32_e32 v5, v6, v5 ; 0A0A0B06 v_max_f32_e32 v5, 0, v5 ; 200A0A80 v_mul_f32_e32 v5, s52, v5 ; 100A0A34 v_sqrt_f32_e32 v26, v5 ; 7E346705 v_and_b32_e32 v21, v26, v21 ; 362A2B1A v_cmp_eq_f32_e32 vcc, v5, v30 ; 7C043D05 v_cndmask_b32_e32 v5, v21, v31 ; 000A3F15 v_mad_f32 v4, 0.5, v4, v11 ; D2820004 042E08F0 s_load_dwordx8 s[52:59], s[6:7], 0x0 ; C0DA0700 v_add_f32_e64 v5, 0, v5 clamp ; D2060805 00020A80 v_mul_f32_e32 v11, 0x3f333333, v28 ; 101638FF 3F333333 v_cmp_lt_f32_e32 vcc, v11, v5 ; 7C020B0B v_cndmask_b32_e32 v4, v13, v4 ; 0008090D v_cndmask_b32_e32 v3, v12, v3 ; 0006070C v_mul_f32_e32 v3, v23, v3 ; 10060717 v_mul_f32_e32 v4, v23, v4 ; 10080917 v_mul_f32_e32 v26, v3, v22 ; 10342D03 v_mul_f32_e32 v27, v4, v22 ; 10362D04 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[21:23], 7, 0, 0, 0, 0, 0, 0, 0, v[26:27], s[52:59], s[48:51] ; F0800700 018D151A v_cndmask_b32_e32 v3, v28, v5 ; 00060B1C v_add_f32_e32 v4, s44, v3 ; 0608062C v_add_f32_e64 v4, 0, v4 clamp ; D2060804 00020880 v_sub_f32_e32 v5, 1.0, v4 ; 080A08F2 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v11, v21, v5 ; 10160B15 v_mul_f32_e32 v13, v22, v5 ; 101A0B16 v_mul_f32_e32 v5, v23, v5 ; 100A0B17 v_add_f32_e32 v6, 0, v6 ; 060C0C80 v_cmp_gt_f32_e64 vcc, |v6|, v19 ; D008016A 00022706 v_cndmask_b32_e32 v19, 1.0, v20 ; 002628F2 v_sqrt_f32_e32 v20, v14 ; 7E28670E v_mov_b32_e32 v21, 0x3c23d70a ; 7E2A02FF 3C23D70A v_mul_f32_e32 v22, v21, v20 ; 102C2915 v_add_f32_e64 v22, 0, v22 clamp ; D2060816 00022C80 v_mul_f32_e32 v6, v19, v6 ; 100C0D13 v_rcp_f32_e32 v6, v6 ; 7E0C5506 v_mov_b32_e32 v23, 0x41000000 ; 7E2E02FF 41000000 v_mul_f32_e32 v26, v23, v24 ; 10343117 v_mac_f32_e32 v12, v22, v26 ; 3E183516 v_mul_f32_e32 v23, v23, v18 ; 102E2517 v_mac_f32_e32 v10, v22, v23 ; 3E142F16 v_mul_f32_e32 v12, v6, v12 ; 10181906 v_mul_f32_e32 v6, v6, v10 ; 100C1506 v_mul_f32_e32 v22, v12, v19 ; 102C270C v_mul_f32_e32 v23, v6, v19 ; 102E2706 v_mov_b32_e32 v6, 0x41a00000 ; 7E0C02FF 41A00000 v_mad_f32 v10, s12, v6, v7 ; D282000A 041E0C0C v_mul_f32_e32 v26, s19, v10 ; 10341413 v_mul_f32_e32 v27, s19, v0 ; 10360013 v_mad_f32 v10, s12, v6, -v7 ; D282000A 841E0C0C image_sample v12, 1, 0, 0, 0, 0, 0, 0, 0, v[26:27], s[36:43], s[24:27] ; F0800100 00C90C1A v_mul_f32_e32 v26, s19, v10 ; 10341413 image_sample v10, 1, 0, 0, 0, 0, 0, 0, 0, v[26:27], s[36:43], s[24:27] ; F0800100 00C90A1A s_buffer_load_dword s4, s[8:11], 0x5c ; C202095C v_mul_f32_e64 v19, s13, s13 ; D2100013 00001A0D v_mac_f32_e64 v19, s14, s14 ; D23E0013 00001C0E s_buffer_load_dword s5, s[8:11], 0x48 ; C2028948 v_mac_f32_e64 v19, s15, s15 ; D23E0013 00001E0F v_mac_f32_e64 v19, s18, s18 ; D23E0013 00002412 v_mul_f32_e32 v25, v15, v25 ; 1032330F v_mul_f32_e32 v26, v24, v24 ; 10343118 v_mac_f32_e32 v26, v25, v25 ; 3E343319 v_mac_f32_e32 v26, v18, v18 ; 3E342512 v_rsq_clamp_f32_e32 v26, v26 ; 7E34591A v_rsq_clamp_f32_e32 v19, v19 ; 7E265913 image_sample v[27:29], 7, 0, 0, 0, 0, 0, 0, 0, v[22:23], s[28:35], s[20:23] ; F0800700 00A71B16 s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 v_sub_f32_e64 v22, 1.0, s5 ; D2080016 00000AF2 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v23, v27, v22 ; 102E2D1B v_mul_f32_e32 v27, v28, v22 ; 10362D1C v_mul_f32_e32 v22, v29, v22 ; 102C2D1D v_mul_f32_e32 v28, v26, v24 ; 1038311A v_mul_f32_e32 v29, s14, v19 ; 103A260E v_mul_f32_e32 v29, v29, v28 ; 103A391D v_mul_f32_e32 v30, v26, v25 ; 103C331A v_mul_f32_e32 v31, s13, v19 ; 103E260D v_mac_f32_e32 v29, v31, v30 ; 3E3A3D1F v_mul_f32_e32 v26, v26, v18 ; 1034251A v_mul_f32_e32 v31, s15, v19 ; 103E260F v_mac_f32_e32 v29, v31, v26 ; 3E3A351F v_mul_f32_e32 v31, v28, v29 ; 103E3B1C v_mac_f32_e32 v31, v28, v29 ; 3E3E3B1C v_rsq_clamp_f32_e32 v14, v14 ; 7E1C590E v_mul_f32_e32 v28, v30, v29 ; 10383B1E v_mac_f32_e32 v28, v30, v29 ; 3E383B1E v_mul_f32_e32 v30, v26, v29 ; 103C3B1A v_mac_f32_e32 v30, v26, v29 ; 3E3C3B1A v_mul_f32_e32 v8, v14, v8 ; 1010110E v_mul_f32_e32 v9, v14, v9 ; 1012130E v_rcp_f32_e32 v26, s17 ; 7E345411 v_mul_f32_e32 v1, v14, v1 ; 1002030E v_mad_f32 v14, v15, v17, v16 ; D282000E 0442230F v_add_f32_e32 v0, v0, v7 ; 06000F00 v_mul_f32_e32 v7, v26, v14 ; 100E1D1A v_mul_f32_e32 v15, s12, v6 ; 101E0C0C v_mac_f32_e32 v7, s16, v15 ; 3E0E1E10 v_madmk_f32_e32 v0, v0, v7, 0x3ba3d70a ; 40000F00 3BA3D70A v_mul_f32_e32 v7, v24, v8 ; 100E1118 v_mad_f32 v7, -v9, v25, -v7 ; D2820007 A41E3309 s_buffer_load_dword s6, s[8:11], 0x38 ; C2030938 s_buffer_load_dword s7, s[8:11], 0x39 ; C2038939 v_mad_f32 v15, s14, v19, -v31 ; D282000F 847E260E v_mad_f32 v17, s15, v19, -v30 ; D2820011 847A260F v_mad_f32 v24, s13, v19, -v28 ; D2820018 8472260D v_mov_b32_e32 v25, 0x3d4ccccd ; 7E3202FF 3D4CCCCD v_mad_f32 v19, -s13, v19, v25 ; D2820013 2466260D s_buffer_load_dword s12, s[8:11], 0x3a ; C206093A s_buffer_load_dword s13, s[8:11], 0x3c ; C206893C s_buffer_load_dword s14, s[8:11], 0x3d ; C207093D s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v11, s6, v4 ; 3E160806 v_mac_f32_e32 v13, s7, v4 ; 3E1A0807 v_mov_b32_e32 v25, 0x40400000 ; 7E3202FF 40400000 v_mul_f32_e32 v28, v25, v0 ; 10380119 v_floor_f32_e32 v28, v28 ; 7E38491C v_mad_f32 v0, v0, v25, -v28 ; D2820000 84723300 v_log_f32_e32 v0, v0 ; 7E004F00 v_mac_f32_e32 v5, s12, v4 ; 3E0A080C v_mad_f32 v4, -v14, v26, 1.0 ; D2820004 23CA350E v_mov_b32_e32 v14, 0x3e99999a ; 7E1C02FF 3E99999A v_mul_legacy_f32_e32 v0, v14, v0 ; 0E00010E v_exp_f32_e32 v0, v0 ; 7E004B00 v_add_f32_e32 v0, v0, v28 ; 06003900 v_mul_f32_e32 v0, 0x40490fdc, v0 ; 100000FF 40490FDC v_mul_f32_e32 v0, 0x3e22f983, v0 ; 100000FF 3E22F983 v_fract_f32_e32 v0, v0 ; 7E004100 v_sin_f32_e32 v0, v0 ; 7E006B00 v_mov_b32_e32 v26, 0xbe99999a ; 7E3402FF BE99999A v_add_f32_e32 v0, v0, v26 ; 06003500 v_add_f32_e64 v4, 0, v4 clamp ; D2060804 00020880 v_log_f32_e32 v4, v4 ; 7E084F04 v_mul_f32_e32 v0, 0x3fc00000, v0 ; 100000FF 3FC00000 v_log_f32_e32 v0, v0 ; 7E004F00 v_add_f32_e32 v10, v10, v12 ; 0614190A v_mul_legacy_f32_e32 v4, v14, v4 ; 0E08090E v_exp_f32_e32 v4, v4 ; 7E084B04 v_mul_legacy_f32_e32 v0, v25, v0 ; 0E000119 v_exp_f32_e32 v0, v0 ; 7E004B00 v_mul_f32_e32 v0, v0, v4 ; 10000900 v_mul_f32_e32 v4, 0.5, v10 ; 100814F0 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_mul_f32_e32 v0, v0, v4 ; 10000900 v_mul_f32_e32 v0, s3, v0 ; 10000003 v_mad_f32 v4, -v20, v21, 1.0 ; D2820004 23CA2B14 v_max_f32_e32 v4, 0x3f19999a, v4 ; 200808FF 3F19999A v_mul_f32_e32 v0, v4, v0 ; 10000104 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 s_buffer_load_dword s3, s[8:11], 0x3e ; C201893E v_sub_f32_e32 v4, 1.0, v0 ; 080800F2 v_mul_f32_e32 v10, v11, v4 ; 1014090B v_mul_f32_e32 v11, v13, v4 ; 1016090D v_mul_f32_e32 v4, v5, v4 ; 10080905 v_mac_f32_e32 v10, s13, v0 ; 3E14000D v_mac_f32_e32 v11, s14, v0 ; 3E16000E s_buffer_load_dword s6, s[8:11], 0x40 ; C2030940 s_buffer_load_dword s7, s[8:11], 0x41 ; C2038941 s_buffer_load_dword s12, s[8:11], 0x42 ; C2060942 s_buffer_load_dword s13, s[8:11], 0x44 ; C2068944 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v4, s3, v0 ; 3E080003 s_buffer_load_dword s3, s[8:11], 0x45 ; C2018945 v_mad_f32 v0, -v1, v18, v7 ; D2820000 241E2501 s_buffer_load_dword s14, s[8:11], 0x46 ; C2070946 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_sub_f32_e32 v0, 1.0, v0 ; 080000F2 v_mul_f32_e32 v5, v0, v0 ; 100A0100 v_mad_f32 v0, -v0, v0, 1.0 ; D2820000 23CA0100 v_mul_f32_e32 v7, s6, v0 ; 100E0006 v_mac_f32_e32 v7, s13, v5 ; 3E0E0A0D v_mul_f32_e32 v12, s7, v0 ; 10180007 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v12, s3, v5 ; 3E180A03 v_mul_f32_e32 v0, s12, v0 ; 1000000C v_mac_f32_e32 v0, s14, v5 ; 3E000A0E v_mac_f32_e32 v23, s5, v7 ; 3E2E0E05 s_buffer_load_dword s3, s[8:11], 0x16 ; C2018916 s_buffer_load_dword s6, s[8:11], 0x17 ; C2030917 s_buffer_load_dword s7, s[8:11], 0x5d ; C203895D s_buffer_load_dword s12, s[8:11], 0x5e ; C206095E s_buffer_load_dword s8, s[8:11], 0x60 ; C2040960 v_mac_f32_e32 v27, s5, v12 ; 3E361805 v_mac_f32_e32 v22, s5, v0 ; 3E2C0005 v_mul_f32_e32 v0, v15, v8 ; 1000110F v_mad_f32 v0, -v9, v24, -v0 ; D2820000 A4023109 v_mad_f32 v0, -v1, v17, v0 ; D2820000 24022301 v_mov_b32_e32 v1, 0x3e4ccccd ; 7E0202FF 3E4CCCCD v_mov_b32_e32 v5, 0x40c00000 ; 7E0A02FF 40C00000 v_mad_f32 v5, s14, v5, v1 ; D2820005 04060A0E v_add_f32_e64 v7, 0, -v9 clamp ; D2060807 40021280 v_sub_f32_e32 v7, 1.0, v7 ; 080E0EF2 v_mul_f32_e32 v7, v7, v7 ; 100E0F07 v_mul_f32_e32 v7, v7, v7 ; 100E0F07 v_mul_f32_e32 v3, v3, v3 ; 10060703 v_mul_f32_e32 v3, v3, v7 ; 10060F03 v_mac_f32_e32 v1, v33, v3 ; 3E020721 v_add_f32_e64 v3, 0, v5 clamp ; D2060803 00020A80 v_mul_f32_e32 v5, v3, v23 ; 100A2F03 v_mul_f32_e32 v7, v3, v27 ; 100E3703 v_mul_f32_e32 v3, v3, v22 ; 10062D03 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_mul_f32_e32 v8, 0x3dcccccd, v20 ; 101028FF 3DCCCCCD v_add_f32_e64 v8, 0, v8 clamp ; D2060808 00021080 v_mul_f32_e32 v9, v8, v1 ; 10120308 v_mad_f32 v1, -v1, v8, 1.0 ; D2820001 23CA1101 v_mul_f32_e32 v8, v10, v1 ; 1010030A v_mac_f32_e32 v8, v5, v9 ; 3E101305 v_max_f32_e32 v0, 0, v0 ; 20000080 v_log_f32_e32 v0, v0 ; 7E004F00 v_mul_f32_e32 v5, v11, v1 ; 100A030B v_mac_f32_e32 v5, v7, v9 ; 3E0A1307 v_mul_f32_e32 v1, v4, v1 ; 10020304 v_mac_f32_e32 v1, v3, v9 ; 3E021303 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_legacy_f32_e32 v0, s8, v0 ; 0E000008 v_exp_f32_e32 v0, v0 ; 7E004B00 v_mul_f32_e32 v3, s4, v0 ; 10060004 v_mul_f32_e32 v4, v6, v19 ; 10082706 v_add_f32_e64 v4, 0, v4 clamp ; D2060804 00020880 v_mul_f32_e32 v4, v25, v4 ; 10080919 v_mac_f32_e32 v8, v4, v3 ; 3E100704 v_mul_f32_e32 v3, s7, v0 ; 10060007 v_mac_f32_e32 v5, v4, v3 ; 3E0A0704 v_mul_f32_e32 v0, s12, v0 ; 1000000C v_mac_f32_e32 v1, v4, v0 ; 3E020104 v_mov_b32_e32 v0, s6 ; 7E000206 v_mac_f32_e32 v0, s3, v2 ; 3E000403 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_sub_f32_e32 v2, 1.0, v0 ; 080400F2 v_mul_f32_e32 v3, s2, v2 ; 10060402 v_mac_f32_e32 v3, v8, v0 ; 3E060108 v_mul_f32_e32 v4, s1, v2 ; 10080401 v_mac_f32_e32 v4, v5, v0 ; 3E080105 v_mul_f32_e32 v2, s0, v2 ; 10040400 v_mac_f32_e32 v2, v1, v0 ; 3E040101 v_cvt_pkrtz_f16_f32_e32 v0, v3, v4 ; 5E000903 v_add_f32_e32 v1, v16, v16 ; 06022110 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_cvt_pkrtz_f16_f32_e32 v1, v2, v1 ; 5E020302 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 88 VGPRS: 36 Code Size: 2064 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL CONST[0..3] DCL TEMP[0..1], LOCAL 0: MUL TEMP[0], CONST[0], IN[0].xxxx 1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0] 4: MOV TEMP[1].xy, IN[1].xyxx 5: MOV OUT[1], TEMP[1] 6: MOV OUT[0], TEMP[0] 7: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = fmul float %13, %33 %44 = fmul float %14, %33 %45 = fmul float %15, %33 %46 = fmul float %16, %33 %47 = fmul float %17, %34 %48 = fadd float %47, %43 %49 = fmul float %18, %34 %50 = fadd float %49, %44 %51 = fmul float %19, %34 %52 = fadd float %51, %45 %53 = fmul float %20, %34 %54 = fadd float %53, %46 %55 = fmul float %21, %35 %56 = fadd float %55, %48 %57 = fmul float %22, %35 %58 = fadd float %57, %50 %59 = fmul float %23, %35 %60 = fadd float %59, %52 %61 = fmul float %24, %35 %62 = fadd float %61, %54 %63 = fmul float %25, %36 %64 = fadd float %63, %56 %65 = fmul float %26, %36 %66 = fadd float %65, %58 %67 = fmul float %27, %36 %68 = fadd float %67, %60 %69 = fmul float %28, %36 %70 = fadd float %69, %62 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %64, float %66, float %68, float %70) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_mov_b32_e32 v1, 0 ; 7E020280 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[0:3], 0x0 ; C2060100 s_buffer_load_dword s13, s[0:3], 0x1 ; C2068101 buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[8:11], 0 idxen ; E00C2000 80020600 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_buffer_load_dword s5, s[0:3], 0x3 ; C2028103 s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104 s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105 s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106 s_buffer_load_dword s9, s[0:3], 0x7 ; C2048107 s_buffer_load_dword s10, s[0:3], 0x8 ; C2050108 s_buffer_load_dword s11, s[0:3], 0x9 ; C2058109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 v_mul_f32_e32 v0, s12, v2 ; 1000040C s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v0, s6, v3 ; 3E000606 v_mul_f32_e32 v8, s13, v2 ; 1010040D v_mac_f32_e32 v8, s7, v3 ; 3E100607 v_mul_f32_e32 v9, s4, v2 ; 10120404 v_mac_f32_e32 v9, s8, v3 ; 3E120608 v_mul_f32_e32 v2, s5, v2 ; 10040405 v_mac_f32_e32 v2, s9, v3 ; 3E040609 v_mac_f32_e32 v0, s10, v4 ; 3E00080A v_mac_f32_e32 v8, s11, v4 ; 3E10080B v_mac_f32_e32 v9, s14, v4 ; 3E12080E v_mac_f32_e32 v2, s15, v4 ; 3E04080F v_mac_f32_e32 v0, s16, v5 ; 3E000A10 v_mac_f32_e32 v8, s17, v5 ; 3E100A11 v_mac_f32_e32 v9, s18, v5 ; 3E120A12 v_mac_f32_e32 v2, s0, v5 ; 3E040A00 exp 15, 32, 0, 0, 0, v6, v7, v1, v1 ; F800020F 01010706 exp 15, 12, 0, 1, 0, v0, v8, v9, v2 ; F80008CF 02090800 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 196 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL CONST[0..1] DCL CONST[6..10] DCL TEMP[0..5], LOCAL IMM[0] FLT32 { 0.0000, 0.0156, 0.3000, 4.0000} IMM[1] FLT32 { 64.0000, 10.0000, 0.1000, 3.1416} IMM[2] INT32 {64, 0, 0, 0} IMM[3] FLT32 { 0.2000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].x, IN[0].xxxx 1: MOV TEMP[1].y, IMM[0].xxxx 2: MUL TEMP[2].x, CONST[9].xxxx, IN[0].xxxx 3: MOV TEMP[1].x, TEMP[2].xxxx 4: MUL TEMP[1].xy, TEMP[1].xyyy, IMM[0].yyyy 5: MOV TEMP[1].xy, TEMP[1].xyyy 6: TEX TEMP[1].y, TEMP[1], SAMP[2], 2D 7: ADD TEMP[1].x, TEMP[1].yyyy, IMM[0].zzzz 8: MUL TEMP[1].x, CONST[0].yyyy, TEMP[1].xxxx 9: MAD TEMP[1].x, TEMP[1].xxxx, CONST[7].xxxx, IN[0].yyyy 10: MOV TEMP[0].y, TEMP[1].xxxx 11: MOV TEMP[1].y, IMM[0].xxxx 12: MOV TEMP[1].x, TEMP[2].xxxx 13: MUL TEMP[1].xy, TEMP[1].xyyy, IMM[0].yyyy 14: MOV TEMP[1].xy, TEMP[1].xyyy 15: TEX TEMP[1].y, TEMP[1], SAMP[2], 2D 16: ADD TEMP[1].x, TEMP[1].yyyy, IMM[0].zzzz 17: MUL TEMP[1].x, CONST[0].yyyy, TEMP[1].xxxx 18: MAD TEMP[1].x, TEMP[1].xxxx, CONST[7].xxxx, IN[0].yyyy 19: MOV TEMP[1].y, TEMP[1].xxxx 20: MUL TEMP[3].x, CONST[0].yyyy, IMM[0].wwww 21: MUL TEMP[4].x, TEMP[3].xxxx, IMM[1].xxxx 22: F2I TEMP[4].x, TEMP[4].xxxx 23: IDIV TEMP[4].x, TEMP[4].xxxx, IMM[2].xxxx 24: I2F TEMP[4].x, TEMP[4].xxxx 25: ADD TEMP[1].x, IN[0].xxxx, TEMP[4].xxxx 26: MOV TEMP[4].x, IN[0].xxxx 27: MOV TEMP[5].y, IMM[0].xxxx 28: MUL TEMP[1].xy, TEMP[1].xyyy, CONST[9].xyyy 29: MUL TEMP[1].xy, TEMP[1].xyyy, IMM[0].yyyy 30: MOV TEMP[1].xy, TEMP[1].xyyy 31: TEX TEMP[1].x, TEMP[1], SAMP[2], 2D 32: MUL TEMP[1].x, TEMP[1].xxxx, IMM[1].yyyy 33: F2I TEMP[1].x, TEMP[1].xxxx 34: I2F TEMP[1].x, TEMP[1].xxxx 35: MUL TEMP[5].x, TEMP[1].xxxx, IMM[1].zzzz 36: MOV TEMP[1].y, IMM[0].xxxx 37: MOV TEMP[1].x, TEMP[2].xxxx 38: MUL TEMP[1].xy, TEMP[1].xyyy, IMM[0].yyyy 39: MOV TEMP[1].xy, TEMP[1].xyyy 40: TEX TEMP[1].y, TEMP[1], SAMP[2], 2D 41: ADD TEMP[1].x, TEMP[1].yyyy, IMM[0].zzzz 42: MUL TEMP[1].x, CONST[0].yyyy, TEMP[1].xxxx 43: MAD TEMP[1].x, TEMP[1].xxxx, CONST[7].xxxx, IN[0].yyyy 44: MOV TEMP[4].y, TEMP[1].xxxx 45: MUL TEMP[1].x, CONST[9].xxxx, IMM[1].zzzz 46: MOV TEMP[1].y, CONST[9].yyyy 47: MAD TEMP[1].xy, TEMP[4].xyyy, TEMP[1].xyyy, TEMP[5].xyyy 48: MUL TEMP[2].x, IN[0].yyyy, CONST[8].xxxx 49: MUL TEMP[2].x, TEMP[2].xxxx, IMM[1].wwww 50: SIN TEMP[2].x, TEMP[2].xxxx 51: MUL TEMP[4].xy, CONST[0].xyyy, IMM[1].yyyy 52: MOV TEMP[5].y, IMM[0].xxxx 53: MUL TEMP[5].x, CONST[9].xxxx, TEMP[4].xxxx 54: MUL TEMP[5].xy, TEMP[5].xyyy, IMM[0].yyyy 55: MOV TEMP[5].xy, TEMP[5].xyyy 56: TEX TEMP[5].y, TEMP[5], SAMP[2], 2D 57: ADD TEMP[5].x, TEMP[5].yyyy, IMM[0].zzzz 58: MUL TEMP[5].x, CONST[0].yyyy, TEMP[5].xxxx 59: MAD TEMP[5].x, TEMP[5].xxxx, CONST[7].xxxx, TEMP[4].yyyy 60: MOV TEMP[4].y, TEMP[5].xxxx 61: MUL TEMP[3].x, TEMP[3].xxxx, IMM[1].xxxx 62: F2I TEMP[3].x, TEMP[3].xxxx 63: IDIV TEMP[3].x, TEMP[3].xxxx, IMM[2].xxxx 64: I2F TEMP[3].x, TEMP[3].xxxx 65: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[3].xxxx 66: MUL TEMP[0].xy, TEMP[0].xyyy, CONST[9].xyyy 67: MUL TEMP[0].xy, TEMP[0].xyyy, IMM[0].yyyy 68: MOV TEMP[0].xy, TEMP[0].xyyy 69: TEX TEMP[0].x, TEMP[0], SAMP[1], 2D 70: MOV TEMP[3].xy, IN[0].xyyy 71: TEX TEMP[3], TEMP[3], SAMP[0], 2D 72: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[0].xxxx 73: MOV TEMP[1].xy, TEMP[1].xyyy 74: MOV TEMP[1].w, IMM[0].xxxx 75: TXL TEMP[1], TEMP[1], SAMP[3], 2D 76: MUL TEMP[0], TEMP[0].xxxx, TEMP[1] 77: MAD TEMP[0], TEMP[2].xxxx, IMM[3].xxxx, TEMP[0] 78: MUL TEMP[0], TEMP[3], TEMP[0] 79: MUL TEMP[0], TEMP[0], CONST[6].xxxx 80: MUL TEMP[0], TEMP[0], CONST[1] 81: MAD TEMP[1].x, TEMP[2].xxxx, IMM[0].zzzz, IMM[3].yyyy 82: MUL TEMP[0], TEMP[0], TEMP[1].xxxx 83: MUL TEMP[1].xy, TEMP[4].xyyy, CONST[9].xyyy 84: MUL TEMP[1].xy, TEMP[1].xyyy, IMM[0].yyyy 85: MOV TEMP[1].xy, TEMP[1].xyyy 86: TEX TEMP[1].x, TEMP[1], SAMP[2], 2D 87: MUL TEMP[1].x, TEMP[1].xxxx, IMM[1].yyyy 88: F2I TEMP[1].x, TEMP[1].xxxx 89: I2F TEMP[1].x, TEMP[1].xxxx 90: MUL TEMP[1].x, TEMP[1].xxxx, IMM[1].zzzz 91: MAD TEMP[1].x, TEMP[1].xxxx, CONST[10].xxxx, IMM[3].yyyy 92: MUL TEMP[0], TEMP[0], TEMP[1].xxxx 93: MOV OUT[0], TEMP[0] 94: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %36 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %37 = load <32 x i8>, <32 x i8> addrspace(2)* %36, align 32, !tbaa !0 %38 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %39 = load <16 x i8>, <16 x i8> addrspace(2)* %38, align 16, !tbaa !0 %40 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %41 = bitcast <8 x i32> addrspace(2)* %40 to <32 x i8> addrspace(2)* %42 = load <32 x i8>, <32 x i8> addrspace(2)* %41, align 32, !tbaa !0 %43 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %44 = bitcast <4 x i32> addrspace(2)* %43 to <16 x i8> addrspace(2)* %45 = load <16 x i8>, <16 x i8> addrspace(2)* %44, align 16, !tbaa !0 %46 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %47 = load <8 x i32>, <8 x i32> addrspace(2)* %46, align 32, !tbaa !0 %48 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %49 = load <4 x i32>, <4 x i32> addrspace(2)* %48, align 16, !tbaa !0 %50 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %51 = bitcast <8 x i32> addrspace(2)* %50 to <32 x i8> addrspace(2)* %52 = load <32 x i8>, <32 x i8> addrspace(2)* %51, align 32, !tbaa !0 %53 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %54 = bitcast <4 x i32> addrspace(2)* %53 to <16 x i8> addrspace(2)* %55 = load <16 x i8>, <16 x i8> addrspace(2)* %54, align 16, !tbaa !0 %56 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %57 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %58 = fmul float %33, %56 %59 = fmul float %58, 1.562500e-02 %60 = bitcast float %59 to i32 %61 = insertelement <2 x i32> undef, i32 %60, i32 0 %62 = insertelement <2 x i32> %61, i32 0, i32 1 %63 = bitcast <8 x i32> %47 to <32 x i8> %64 = bitcast <4 x i32> %49 to <16 x i8> %65 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %62, <32 x i8> %63, <16 x i8> %64, i32 2) %66 = extractelement <4 x float> %65, i32 1 %67 = fadd float %66, 0x3FD3333340000000 %68 = fmul float %25, %67 %69 = fmul float %68, %31 %70 = fadd float %69, %57 %71 = fmul float %58, 1.562500e-02 %72 = bitcast float %71 to i32 %73 = insertelement <2 x i32> undef, i32 %72, i32 0 %74 = insertelement <2 x i32> %73, i32 0, i32 1 %75 = bitcast <8 x i32> %47 to <32 x i8> %76 = bitcast <4 x i32> %49 to <16 x i8> %77 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %74, <32 x i8> %75, <16 x i8> %76, i32 2) %78 = extractelement <4 x float> %77, i32 1 %79 = fadd float %78, 0x3FD3333340000000 %80 = fmul float %25, %79 %81 = fmul float %80, %31 %82 = fadd float %81, %57 %83 = fmul float %25, 4.000000e+00 %84 = fmul float %83, 6.400000e+01 %85 = fptosi float %84 to i32 %86 = sdiv i32 %85, 64 %87 = sitofp i32 %86 to float %88 = fadd float %56, %87 %89 = fmul float %88, %33 %90 = fmul float %82, %34 %91 = fmul float %89, 1.562500e-02 %92 = fmul float %90, 1.562500e-02 %93 = bitcast float %91 to i32 %94 = bitcast float %92 to i32 %95 = insertelement <2 x i32> undef, i32 %93, i32 0 %96 = insertelement <2 x i32> %95, i32 %94, i32 1 %97 = bitcast <8 x i32> %47 to <32 x i8> %98 = bitcast <4 x i32> %49 to <16 x i8> %99 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %96, <32 x i8> %97, <16 x i8> %98, i32 2) %100 = extractelement <4 x float> %99, i32 0 %101 = fmul float %100, 1.000000e+01 %102 = fptosi float %101 to i32 %103 = sitofp i32 %102 to float %104 = fmul float %103, 0x3FB99999A0000000 %105 = fmul float %58, 1.562500e-02 %106 = bitcast float %105 to i32 %107 = insertelement <2 x i32> undef, i32 %106, i32 0 %108 = insertelement <2 x i32> %107, i32 0, i32 1 %109 = bitcast <8 x i32> %47 to <32 x i8> %110 = bitcast <4 x i32> %49 to <16 x i8> %111 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %108, <32 x i8> %109, <16 x i8> %110, i32 2) %112 = extractelement <4 x float> %111, i32 1 %113 = fadd float %112, 0x3FD3333340000000 %114 = fmul float %25, %113 %115 = fmul float %114, %31 %116 = fadd float %115, %57 %117 = fmul float %33, 0x3FB99999A0000000 %118 = fmul float %56, %117 %119 = fadd float %118, %104 %120 = fmul float %116, %34 %121 = fadd float %120, 0.000000e+00 %122 = fmul float %57, %32 %123 = fmul float %122, 0x400921FA00000000 %124 = call float @llvm.sin.f32(float %123) %125 = fmul float %24, 1.000000e+01 %126 = fmul float %25, 1.000000e+01 %127 = fmul float %33, %125 %128 = fmul float %127, 1.562500e-02 %129 = bitcast float %128 to i32 %130 = insertelement <2 x i32> undef, i32 %129, i32 0 %131 = insertelement <2 x i32> %130, i32 0, i32 1 %132 = bitcast <8 x i32> %47 to <32 x i8> %133 = bitcast <4 x i32> %49 to <16 x i8> %134 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %131, <32 x i8> %132, <16 x i8> %133, i32 2) %135 = extractelement <4 x float> %134, i32 1 %136 = fadd float %135, 0x3FD3333340000000 %137 = fmul float %25, %136 %138 = fmul float %137, %31 %139 = fadd float %138, %126 %140 = fmul float %83, 6.400000e+01 %141 = fptosi float %140 to i32 %142 = sdiv i32 %141, 64 %143 = sitofp i32 %142 to float %144 = fadd float %125, %143 %145 = fmul float %56, %33 %146 = fmul float %70, %34 %147 = fmul float %145, 1.562500e-02 %148 = fmul float %146, 1.562500e-02 %149 = bitcast float %147 to i32 %150 = bitcast float %148 to i32 %151 = insertelement <2 x i32> undef, i32 %149, i32 0 %152 = insertelement <2 x i32> %151, i32 %150, i32 1 %153 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %152, <32 x i8> %42, <16 x i8> %45, i32 2) %154 = extractelement <4 x float> %153, i32 0 %155 = bitcast float %56 to i32 %156 = bitcast float %57 to i32 %157 = insertelement <2 x i32> undef, i32 %155, i32 0 %158 = insertelement <2 x i32> %157, i32 %156, i32 1 %159 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %158, <32 x i8> %37, <16 x i8> %39, i32 2) %160 = extractelement <4 x float> %159, i32 0 %161 = extractelement <4 x float> %159, i32 1 %162 = extractelement <4 x float> %159, i32 2 %163 = extractelement <4 x float> %159, i32 3 %164 = fmul float %154, %154 %165 = bitcast float %119 to i32 %166 = bitcast float %121 to i32 %167 = insertelement <4 x i32> undef, i32 %165, i32 0 %168 = insertelement <4 x i32> %167, i32 %166, i32 1 %169 = insertelement <4 x i32> %168, i32 0, i32 2 %170 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %169, <32 x i8> %52, <16 x i8> %55, i32 2) %171 = extractelement <4 x float> %170, i32 0 %172 = extractelement <4 x float> %170, i32 1 %173 = extractelement <4 x float> %170, i32 2 %174 = extractelement <4 x float> %170, i32 3 %175 = fmul float %164, %171 %176 = fmul float %164, %172 %177 = fmul float %164, %173 %178 = fmul float %164, %174 %179 = fmul float %124, 0x3FC99999A0000000 %180 = fadd float %179, %175 %181 = fmul float %124, 0x3FC99999A0000000 %182 = fadd float %181, %176 %183 = fmul float %124, 0x3FC99999A0000000 %184 = fadd float %183, %177 %185 = fmul float %124, 0x3FC99999A0000000 %186 = fadd float %185, %178 %187 = fmul float %160, %180 %188 = fmul float %161, %182 %189 = fmul float %162, %184 %190 = fmul float %163, %186 %191 = fmul float %187, %30 %192 = fmul float %188, %30 %193 = fmul float %189, %30 %194 = fmul float %190, %30 %195 = fmul float %191, %26 %196 = fmul float %192, %27 %197 = fmul float %193, %28 %198 = fmul float %194, %29 %199 = fmul float %124, 0x3FD3333340000000 %200 = fadd float %199, 1.000000e+00 %201 = fmul float %195, %200 %202 = fmul float %196, %200 %203 = fmul float %197, %200 %204 = fmul float %198, %200 %205 = fmul float %144, %33 %206 = fmul float %139, %34 %207 = fmul float %205, 1.562500e-02 %208 = fmul float %206, 1.562500e-02 %209 = bitcast float %207 to i32 %210 = bitcast float %208 to i32 %211 = insertelement <2 x i32> undef, i32 %209, i32 0 %212 = insertelement <2 x i32> %211, i32 %210, i32 1 %213 = bitcast <8 x i32> %47 to <32 x i8> %214 = bitcast <4 x i32> %49 to <16 x i8> %215 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %212, <32 x i8> %213, <16 x i8> %214, i32 2) %216 = extractelement <4 x float> %215, i32 0 %217 = fmul float %216, 1.000000e+01 %218 = fptosi float %217 to i32 %219 = sitofp i32 %218 to float %220 = fmul float %219, 0x3FB99999A0000000 %221 = fmul float %220, %35 %222 = fadd float %221, 1.000000e+00 %223 = fmul float %201, %222 %224 = fmul float %202, %222 %225 = fmul float %203, %222 %226 = fmul float %204, %222 %227 = call i32 @llvm.SI.packf16(float %223, float %224) %228 = bitcast i32 %227 to float %229 = call i32 @llvm.SI.packf16(float %225, float %226) %230 = bitcast i32 %229 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %228, float %230, float %228, float %230) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.sin.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_mov_b32_e32 v2, 0x3c800000 ; 7E0402FF 3C800000 v_mov_b32_e32 v3, 0x3e99999a ; 7E0602FF 3E99999A v_mov_b32_e32 v4, 0x41200000 ; 7E0802FF 41200000 v_mov_b32_e32 v5, 0x3dcccccd ; 7E0A02FF 3DCCCCCD v_mov_b32_e32 v6, 0x3e4ccccd ; 7E0C02FF 3E4CCCCD v_interp_p1_f32 v7, v0, 0, 0, [m0] ; C81C0000 v_interp_p2_f32 v7, [v7], v1, 0, 0, [m0] ; C81D0001 v_interp_p1_f32 v8, v0, 1, 0, [m0] ; C8200100 v_interp_p2_f32 v8, [v8], v1, 1, 0, [m0] ; C8210101 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500 s_load_dwordx4 s[44:47], s[4:5], 0x4 ; C0960504 s_load_dwordx4 s[8:11], s[4:5], 0x8 ; C0840508 s_load_dwordx4 s[20:23], s[4:5], 0xc ; C08A050C s_load_dwordx8 s[36:43], s[6:7], 0x0 ; C0D20700 s_load_dwordx8 s[48:55], s[6:7], 0x8 ; C0D80708 s_load_dwordx8 s[12:19], s[6:7], 0x10 ; C0C60710 s_load_dwordx8 s[24:31], s[6:7], 0x18 ; C0CC0718 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104 s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105 s_buffer_load_dword s56, s[0:3], 0x6 ; C21C0106 s_buffer_load_dword s57, s[0:3], 0x25 ; C21C8125 s_buffer_load_dword s58, s[0:3], 0x28 ; C21D0128 s_buffer_load_dword s59, s[0:3], 0x24 ; C21D8124 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e64 v0, 4.0, s5 ; D2100000 00000AF6 v_mul_f32_e32 v0, 0x42800000, v0 ; 100000FF 42800000 v_cvt_i32_f32_e32 v0, v0 ; 7E001100 v_ashrrev_i32_e32 v1, 31, v0 ; 3002009F v_lshrrev_b32_e32 v1, 26, v1 ; 2C02029A v_add_i32_e32 v0, v0, v1 ; 4A000300 s_buffer_load_dword s60, s[0:3], 0x1c ; C21E011C v_mul_f32_e32 v1, s59, v7 ; 10020E3B v_mul_f32_e32 v9, v2, v1 ; 10120302 v_mov_b32_e32 v10, 0 ; 7E140280 image_sample v1, 2, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[12:19], s[8:11] ; F0800200 00430109 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_add_f32_e32 v1, v3, v1 ; 06020303 v_ashrrev_i32_e32 v0, 6, v0 ; 30000086 v_cvt_f32_i32_e32 v0, v0 ; 7E000B00 v_mul_f32_e32 v1, s5, v1 ; 10020205 s_buffer_load_dword s61, s[0:3], 0x20 ; C21E8120 v_mad_f32 v1, s60, v1, v8 ; D2820001 0422023C v_add_f32_e32 v11, v0, v7 ; 06160F00 v_mul_f32_e32 v11, s59, v11 ; 1016163B v_mul_f32_e32 v12, s57, v1 ; 10180239 v_mul_f32_e32 v13, v2, v11 ; 101A1702 v_mul_f32_e32 v14, v2, v12 ; 101C1902 image_sample v11, 1, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[12:19], s[8:11] ; F0800100 00430B0D s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v11, v4, v11 ; 10161704 v_cvt_i32_f32_e32 v11, v11 ; 7E16110B v_mul_f32_e32 v12, s4, v4 ; 10180804 v_mul_f32_e32 v12, s59, v12 ; 1018183B v_mul_f32_e32 v12, v2, v12 ; 10181902 v_cvt_f32_i32_e32 v11, v11 ; 7E160B0B v_mov_b32_e32 v13, v10 ; 7E1A030A image_sample v12, 2, 0, 0, 0, 0, 0, 0, 0, v[12:13], s[12:19], s[8:11] ; F0800200 00430C0C v_mov_b32_e32 v13, v9 ; 7E1A0309 image_sample v9, 1, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[48:55], s[44:47] ; F0800100 016C090D v_mul_f32_e32 v13, s59, v5 ; 101A0A3B v_mul_f32_e32 v14, v5, v11 ; 101C1705 v_mac_f32_e32 v14, v13, v7 ; 3E1C0F0D v_mad_f32 v15, v1, s57, 0 ; D282000F 02007301 image_sample v[17:20], 15, 0, 0, 0, 0, 0, 0, 0, v[7:8], s[36:43], s[32:35] ; F0800F00 01091107 v_mov_b32_e32 v16, v10 ; 7E20030A s_waitcnt vmcnt(0) ; BF8C0770 image_sample_l v[13:16], 15, 0, 0, 0, 0, 0, 0, 0, v[14:17], s[24:31], s[20:23] ; F0900F00 00A60D0E v_add_f32_e32 v1, v3, v12 ; 06021903 v_mul_f32_e32 v1, s5, v1 ; 10020205 v_mul_f32_e32 v7, s5, v4 ; 100E0805 v_mac_f32_e32 v7, s60, v1 ; 3E0E023C v_mac_f32_e32 v0, s4, v4 ; 3E000804 v_mul_f32_e32 v0, s59, v0 ; 1000003B v_mul_f32_e32 v1, s57, v7 ; 10020E39 v_mul_f32_e32 v10, v2, v0 ; 10140102 v_mul_f32_e32 v11, v2, v1 ; 10160302 image_sample v0, 1, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[12:19], s[8:11] ; F0800100 0043000A v_mul_f32_e32 v1, v9, v9 ; 10021309 s_waitcnt vmcnt(1) ; BF8C0771 v_mul_f32_e32 v2, v13, v1 ; 1004030D v_mul_f32_e32 v7, v14, v1 ; 100E030E v_mul_f32_e32 v9, v15, v1 ; 1012030F v_mul_f32_e32 v1, v16, v1 ; 10020310 v_mul_f32_e32 v8, s61, v8 ; 1010103D v_mul_f32_e32 v8, 0x40490fd0, v8 ; 101010FF 40490FD0 v_mul_f32_e32 v8, 0x3e22f983, v8 ; 101010FF 3E22F983 v_fract_f32_e32 v8, v8 ; 7E104108 v_sin_f32_e32 v8, v8 ; 7E106B08 s_buffer_load_dword s4, s[0:3], 0x18 ; C2020118 v_mac_f32_e32 v2, v6, v8 ; 3E041106 v_mac_f32_e32 v7, v6, v8 ; 3E0E1106 v_mac_f32_e32 v9, v6, v8 ; 3E121106 v_mac_f32_e32 v1, v6, v8 ; 3E021106 v_mul_f32_e32 v2, v2, v17 ; 10042302 v_mul_f32_e32 v6, v7, v18 ; 100C2507 v_mul_f32_e32 v7, v9, v19 ; 100E2709 v_mul_f32_e32 v1, v1, v20 ; 10022901 s_buffer_load_dword s0, s[0:3], 0x7 ; C2000107 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v2, s4, v2 ; 10040404 v_mul_f32_e32 v6, s4, v6 ; 100C0C04 v_mul_f32_e32 v7, s4, v7 ; 100E0E04 v_mul_f32_e32 v1, s4, v1 ; 10020204 v_mul_f32_e32 v2, s6, v2 ; 10040406 v_mul_f32_e32 v6, s7, v6 ; 100C0C07 v_mul_f32_e32 v7, s56, v7 ; 100E0E38 v_mul_f32_e32 v0, v4, v0 ; 10000104 v_cvt_i32_f32_e32 v0, v0 ; 7E001100 v_mul_f32_e32 v1, s0, v1 ; 10020200 v_mad_f32 v3, v8, v3, 1.0 ; D2820003 03CA0708 v_mul_f32_e32 v2, v3, v2 ; 10040503 v_cvt_f32_i32_e32 v0, v0 ; 7E000B00 v_mul_f32_e32 v4, v3, v6 ; 10080D03 v_mul_f32_e32 v6, v3, v7 ; 100C0F03 v_mul_f32_e32 v1, v3, v1 ; 10020303 v_mul_f32_e32 v0, v5, v0 ; 10000105 v_mad_f32 v0, v0, s58, 1.0 ; D2820000 03C87500 v_mul_f32_e32 v2, v0, v2 ; 10040500 v_mul_f32_e32 v3, v0, v4 ; 10060900 v_mul_f32_e32 v4, v0, v6 ; 10080D00 v_mul_f32_e32 v0, v0, v1 ; 10000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 v_cvt_pkrtz_f16_f32_e32 v0, v4, v0 ; 5E000104 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 64 VGPRS: 24 Code Size: 608 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..4] DCL TEMP[0..2], LOCAL 0: MUL TEMP[0], CONST[1], IN[0].xxxx 1: MAD TEMP[0], CONST[2], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[3], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[4], IN[0].wwww, TEMP[0] 4: MOV TEMP[1].xy, IN[2].xyxx 5: MAD TEMP[2].xy, IN[0].xyyy, CONST[0].xyyy, CONST[0].zwww 6: MOV TEMP[1].zw, TEMP[2].yyxy 7: MOV OUT[2], TEMP[1] 8: MOV OUT[0], TEMP[0] 9: MOV OUT[1], IN[1] 10: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %34 = load <16 x i8>, <16 x i8> addrspace(2)* %33, align 16, !tbaa !0 %35 = add i32 %5, %7 %36 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %34, i32 0, i32 %35) %37 = extractelement <4 x float> %36, i32 0 %38 = extractelement <4 x float> %36, i32 1 %39 = extractelement <4 x float> %36, i32 2 %40 = extractelement <4 x float> %36, i32 3 %41 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0 %43 = add i32 %5, %7 %44 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %43) %45 = extractelement <4 x float> %44, i32 0 %46 = extractelement <4 x float> %44, i32 1 %47 = extractelement <4 x float> %44, i32 2 %48 = extractelement <4 x float> %44, i32 3 %49 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %50 = load <16 x i8>, <16 x i8> addrspace(2)* %49, align 16, !tbaa !0 %51 = add i32 %5, %7 %52 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %50, i32 0, i32 %51) %53 = extractelement <4 x float> %52, i32 0 %54 = extractelement <4 x float> %52, i32 1 %55 = fmul float %17, %37 %56 = fmul float %18, %37 %57 = fmul float %19, %37 %58 = fmul float %20, %37 %59 = fmul float %21, %38 %60 = fadd float %59, %55 %61 = fmul float %22, %38 %62 = fadd float %61, %56 %63 = fmul float %23, %38 %64 = fadd float %63, %57 %65 = fmul float %24, %38 %66 = fadd float %65, %58 %67 = fmul float %25, %39 %68 = fadd float %67, %60 %69 = fmul float %26, %39 %70 = fadd float %69, %62 %71 = fmul float %27, %39 %72 = fadd float %71, %64 %73 = fmul float %28, %39 %74 = fadd float %73, %66 %75 = fmul float %29, %40 %76 = fadd float %75, %68 %77 = fmul float %30, %40 %78 = fadd float %77, %70 %79 = fmul float %31, %40 %80 = fadd float %79, %72 %81 = fmul float %32, %40 %82 = fadd float %81, %74 %83 = fmul float %37, %13 %84 = fadd float %83, %15 %85 = fmul float %38, %14 %86 = fadd float %85, %16 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %45, float %46, float %47, float %48) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %53, float %54, float %84, float %86) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %76, float %78, float %80, float %82) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 buffer_load_format_xyzw v[9:12], v0, s[16:19], 0 idxen ; E00C2000 80040900 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_buffer_load_dword s5, s[0:3], 0x3 ; C2028103 s_buffer_load_dword s6, s[0:3], 0x0 ; C2030100 s_buffer_load_dword s7, s[0:3], 0x1 ; C2038101 exp 15, 32, 0, 0, 0, v5, v6, v7, v8 ; F800020F 08070605 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s4 ; 7E000204 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v5, s5 ; 7E0A0205 v_mac_f32_e32 v0, s6, v1 ; 3E000206 v_mac_f32_e32 v5, s7, v2 ; 3E0A0407 exp 15, 33, 0, 0, 0, v9, v10, v0, v5 ; F800021F 05000A09 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_buffer_load_dword s5, s[0:3], 0x6 ; C2028106 s_buffer_load_dword s6, s[0:3], 0x7 ; C2030107 s_buffer_load_dword s7, s[0:3], 0x8 ; C2038108 s_buffer_load_dword s9, s[0:3], 0x9 ; C2048109 s_buffer_load_dword s10, s[0:3], 0xa ; C205010A s_buffer_load_dword s11, s[0:3], 0xb ; C205810B s_buffer_load_dword s12, s[0:3], 0xc ; C206010C s_buffer_load_dword s13, s[0:3], 0xd ; C206810D s_buffer_load_dword s14, s[0:3], 0xe ; C207010E s_buffer_load_dword s15, s[0:3], 0xf ; C207810F s_buffer_load_dword s16, s[0:3], 0x10 ; C2080110 s_buffer_load_dword s17, s[0:3], 0x11 ; C2088111 s_buffer_load_dword s18, s[0:3], 0x12 ; C2090112 s_buffer_load_dword s0, s[0:3], 0x13 ; C2000113 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s8, v1 ; 10000208 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v0, s7, v2 ; 3E000407 v_mul_f32_e32 v5, s4, v1 ; 100A0204 v_mac_f32_e32 v5, s9, v2 ; 3E0A0409 v_mul_f32_e32 v6, s5, v1 ; 100C0205 v_mac_f32_e32 v6, s10, v2 ; 3E0C040A v_mul_f32_e32 v1, s6, v1 ; 10020206 v_mac_f32_e32 v1, s11, v2 ; 3E02040B v_mac_f32_e32 v0, s12, v3 ; 3E00060C v_mac_f32_e32 v5, s13, v3 ; 3E0A060D v_mac_f32_e32 v6, s14, v3 ; 3E0C060E v_mac_f32_e32 v1, s15, v3 ; 3E02060F v_mac_f32_e32 v0, s16, v4 ; 3E000810 v_mac_f32_e32 v5, s17, v4 ; 3E0A0811 v_mac_f32_e32 v6, s18, v4 ; 3E0C0812 v_mac_f32_e32 v1, s0, v4 ; 3E020800 exp 15, 12, 0, 1, 0, v0, v5, v6, v1 ; F80008CF 01060500 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 16 Code Size: 256 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: ABS TEMP[0].xy, IN[1].zwww 1: ADD TEMP[0].xy, IMM[0].xxxx, -TEMP[0].xyyy 2: MUL TEMP[0].xy, TEMP[0].xyyy, CONST[1].xyyy 3: MOV TEMP[1].xy, IN[1].xyyy 4: TEX TEMP[1], TEMP[1], SAMP[0], 2D 5: MUL TEMP[1], TEMP[1], IN[0] 6: MOV TEMP[2].xyz, TEMP[1].xyzx 7: MIN TEMP[0].x, TEMP[0].xxxx, TEMP[0].yyyy 8: MOV_SAT TEMP[0].x, TEMP[0].xxxx 9: MUL TEMP[0].x, TEMP[1].wwww, TEMP[0].xxxx 10: MOV TEMP[2].w, TEMP[0].xxxx 11: MOV OUT[0], TEMP[2] 12: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %26 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %27 = load <32 x i8>, <32 x i8> addrspace(2)* %26, align 32, !tbaa !0 %28 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %29 = load <16 x i8>, <16 x i8> addrspace(2)* %28, align 16, !tbaa !0 %30 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %32 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %35 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %36 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %37 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %38 = call float @llvm.fabs.f32(float %36) %39 = call float @llvm.fabs.f32(float %37) %40 = fsub float 1.000000e+00, %38 %41 = fsub float 1.000000e+00, %39 %42 = fmul float %40, %24 %43 = fmul float %41, %25 %44 = bitcast float %34 to i32 %45 = bitcast float %35 to i32 %46 = insertelement <2 x i32> undef, i32 %44, i32 0 %47 = insertelement <2 x i32> %46, i32 %45, i32 1 %48 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %47, <32 x i8> %27, <16 x i8> %29, i32 2) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = extractelement <4 x float> %48, i32 2 %52 = extractelement <4 x float> %48, i32 3 %53 = fmul float %49, %30 %54 = fmul float %50, %31 %55 = fmul float %51, %32 %56 = fmul float %52, %33 %57 = call float @llvm.minnum.f32(float %42, float %43) %58 = call float @llvm.AMDIL.clamp.(float %57, float 0.000000e+00, float 1.000000e+00) %59 = fmul float %56, %58 %60 = call i32 @llvm.SI.packf16(float %53, float %54) %61 = bitcast i32 %60 to float %62 = call i32 @llvm.SI.packf16(float %55, float %59) %63 = bitcast i32 %62 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %61, float %63, float %61, float %63) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_buffer_load_dword s0, s[0:3], 0x5 ; C2000105 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600 v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601 v_interp_p1_f32 v0, v0, 3, 1, [m0] ; C8000700 v_interp_p2_f32 v0, [v0], v1, 3, 1, [m0] ; C8010701 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[9:12], 15, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[12:19], s[8:11] ; F0800F00 00430906 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v1, v2, v9 ; 10021302 v_mul_f32_e32 v2, v3, v10 ; 10041503 v_mul_f32_e32 v3, v4, v11 ; 10061704 v_mul_f32_e32 v4, v5, v12 ; 10081905 v_sub_f32_e64 v5, 1.0, |v8| ; D2080205 000210F2 v_mul_f32_e32 v5, s4, v5 ; 100A0A04 v_sub_f32_e64 v0, 1.0, |v0| ; D2080200 000200F2 v_mul_f32_e32 v0, s0, v0 ; 10000000 v_min_f32_e32 v0, v0, v5 ; 1E000B00 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_mul_f32_e32 v0, v0, v4 ; 10000900 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_cvt_pkrtz_f16_f32_e32 v0, v3, v0 ; 5E000103 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 16 Code Size: 188 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL CONST[0..3] DCL TEMP[0..1], LOCAL 0: MUL TEMP[0], CONST[0], IN[0].xxxx 1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0] 4: MOV TEMP[1].xyz, IN[1].xyzx 5: MOV OUT[1], TEMP[1] 6: MOV OUT[0], TEMP[0] 7: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = fmul float %13, %33 %45 = fmul float %14, %33 %46 = fmul float %15, %33 %47 = fmul float %16, %33 %48 = fmul float %17, %34 %49 = fadd float %48, %44 %50 = fmul float %18, %34 %51 = fadd float %50, %45 %52 = fmul float %19, %34 %53 = fadd float %52, %46 %54 = fmul float %20, %34 %55 = fadd float %54, %47 %56 = fmul float %21, %35 %57 = fadd float %56, %49 %58 = fmul float %22, %35 %59 = fadd float %58, %51 %60 = fmul float %23, %35 %61 = fadd float %60, %53 %62 = fmul float %24, %35 %63 = fadd float %62, %55 %64 = fmul float %25, %36 %65 = fadd float %64, %57 %66 = fmul float %26, %36 %67 = fadd float %66, %59 %68 = fmul float %27, %36 %69 = fadd float %68, %61 %70 = fmul float %28, %36 %71 = fadd float %70, %63 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float %43, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %65, float %67, float %69, float %71) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_mov_b32_e32 v1, 0 ; 7E020280 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[0:3], 0x0 ; C2060100 s_buffer_load_dword s13, s[0:3], 0x1 ; C2068101 buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[8:11], 0 idxen ; E00C2000 80020600 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_buffer_load_dword s5, s[0:3], 0x3 ; C2028103 s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104 s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105 s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106 s_buffer_load_dword s9, s[0:3], 0x7 ; C2048107 s_buffer_load_dword s10, s[0:3], 0x8 ; C2050108 s_buffer_load_dword s11, s[0:3], 0x9 ; C2058109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 v_mul_f32_e32 v0, s12, v2 ; 1000040C s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v0, s6, v3 ; 3E000606 v_mul_f32_e32 v9, s13, v2 ; 1012040D v_mac_f32_e32 v9, s7, v3 ; 3E120607 v_mul_f32_e32 v10, s4, v2 ; 10140404 v_mac_f32_e32 v10, s8, v3 ; 3E140608 v_mul_f32_e32 v2, s5, v2 ; 10040405 v_mac_f32_e32 v2, s9, v3 ; 3E040609 v_mac_f32_e32 v0, s10, v4 ; 3E00080A v_mac_f32_e32 v9, s11, v4 ; 3E12080B v_mac_f32_e32 v10, s14, v4 ; 3E14080E v_mac_f32_e32 v2, s15, v4 ; 3E04080F v_mac_f32_e32 v0, s16, v5 ; 3E000A10 v_mac_f32_e32 v9, s17, v5 ; 3E120A11 v_mac_f32_e32 v10, s18, v5 ; 3E140A12 v_mac_f32_e32 v2, s0, v5 ; 3E040A00 exp 15, 32, 0, 0, 0, v6, v7, v8, v1 ; F800020F 01080706 exp 15, 12, 0, 1, 0, v0, v9, v10, v2 ; F80008CF 020A0900 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 196 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL CONST[0] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { -0.5000, 2.0000, 0.0000, 1.0000} 0: ADD TEMP[0].xy, IN[0].xyyy, IMM[0].xxxx 1: MUL TEMP[0].xy, TEMP[0].xyyy, IMM[0].yyyy 2: MOV TEMP[1].zw, IMM[0].zzzz 3: DP2 TEMP[2].x, TEMP[0].xyyy, TEMP[0].xyyy 4: SQRT TEMP[2].x, TEMP[2].xxxx 5: ADD TEMP[2].xy, IMM[0].wwww, -TEMP[2].xxxx 6: MOV_SAT TEMP[2].xy, TEMP[2].xyyy 7: MUL TEMP[1].xy, TEMP[2].xyyy, TEMP[0].xyyy 8: MUL TEMP[0], TEMP[1], CONST[0].xxxx 9: MOV OUT[0], TEMP[0] 10: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %26 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %27 = fadd float %25, -5.000000e-01 %28 = fadd float %26, -5.000000e-01 %29 = fmul float %27, 2.000000e+00 %30 = fmul float %28, 2.000000e+00 %31 = fmul float %29, %29 %32 = fmul float %30, %30 %33 = fadd float %31, %32 %34 = call float @llvm.sqrt.f32(float %33) %35 = fsub float 1.000000e+00, %34 %36 = fsub float 1.000000e+00, %34 %37 = call float @llvm.AMDIL.clamp.(float %35, float 0.000000e+00, float 1.000000e+00) %38 = call float @llvm.AMDIL.clamp.(float %36, float 0.000000e+00, float 1.000000e+00) %39 = fmul float %37, %29 %40 = fmul float %38, %30 %41 = fmul float %39, %24 %42 = fmul float %40, %24 %43 = fmul float %24, 0.000000e+00 %44 = fmul float %24, 0.000000e+00 %45 = call i32 @llvm.SI.packf16(float %41, float %42) %46 = bitcast i32 %45 to float %47 = call i32 @llvm.SI.packf16(float %43, float %44) %48 = bitcast i32 %47 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %46, float %48, float %46, float %48) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v0, v0, 1, 0, [m0] ; C8000100 v_interp_p2_f32 v0, [v0], v1, 1, 0, [m0] ; C8010101 v_add_f32_e32 v1, -0.5, v2 ; 060204F1 v_add_f32_e32 v0, -0.5, v0 ; 060000F1 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s0, s[0:3], 0x0 ; C2000100 v_add_f32_e32 v1, v1, v1 ; 06020301 v_add_f32_e32 v0, v0, v0 ; 06000100 v_mul_f32_e32 v2, v0, v0 ; 10040100 v_mac_f32_e32 v2, v1, v1 ; 3E040301 v_sqrt_f32_e32 v2, v2 ; 7E046702 v_sub_f32_e32 v2, 1.0, v2 ; 080404F2 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_mul_f32_e32 v1, v1, v2 ; 10020501 v_mul_f32_e32 v0, v0, v2 ; 10000500 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s0, v1 ; 10020200 v_mul_f32_e32 v0, s0, v0 ; 10000000 v_mul_f32_e64 v2, 0, s0 ; D2100002 00000080 v_cvt_pkrtz_f16_f32_e32 v0, v1, v0 ; 5E000101 v_cvt_pkrtz_f16_f32_e32 v1, v2, v2 ; 5E020502 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 4 Code Size: 120 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL CONST[0..3] DCL TEMP[0..1], LOCAL 0: MUL TEMP[0], CONST[0], IN[0].xxxx 1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0] 4: MOV TEMP[1].xy, IN[1].xyxx 5: MOV OUT[1], TEMP[1] 6: MOV OUT[0], TEMP[0] 7: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = fmul float %13, %33 %44 = fmul float %14, %33 %45 = fmul float %15, %33 %46 = fmul float %16, %33 %47 = fmul float %17, %34 %48 = fadd float %47, %43 %49 = fmul float %18, %34 %50 = fadd float %49, %44 %51 = fmul float %19, %34 %52 = fadd float %51, %45 %53 = fmul float %20, %34 %54 = fadd float %53, %46 %55 = fmul float %21, %35 %56 = fadd float %55, %48 %57 = fmul float %22, %35 %58 = fadd float %57, %50 %59 = fmul float %23, %35 %60 = fadd float %59, %52 %61 = fmul float %24, %35 %62 = fadd float %61, %54 %63 = fmul float %25, %36 %64 = fadd float %63, %56 %65 = fmul float %26, %36 %66 = fadd float %65, %58 %67 = fmul float %27, %36 %68 = fadd float %67, %60 %69 = fmul float %28, %36 %70 = fadd float %69, %62 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %64, float %66, float %68, float %70) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_mov_b32_e32 v1, 0 ; 7E020280 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[0:3], 0x0 ; C2060100 s_buffer_load_dword s13, s[0:3], 0x1 ; C2068101 buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[8:11], 0 idxen ; E00C2000 80020600 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_buffer_load_dword s5, s[0:3], 0x3 ; C2028103 s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104 s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105 s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106 s_buffer_load_dword s9, s[0:3], 0x7 ; C2048107 s_buffer_load_dword s10, s[0:3], 0x8 ; C2050108 s_buffer_load_dword s11, s[0:3], 0x9 ; C2058109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 v_mul_f32_e32 v0, s12, v2 ; 1000040C s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v0, s6, v3 ; 3E000606 v_mul_f32_e32 v8, s13, v2 ; 1010040D v_mac_f32_e32 v8, s7, v3 ; 3E100607 v_mul_f32_e32 v9, s4, v2 ; 10120404 v_mac_f32_e32 v9, s8, v3 ; 3E120608 v_mul_f32_e32 v2, s5, v2 ; 10040405 v_mac_f32_e32 v2, s9, v3 ; 3E040609 v_mac_f32_e32 v0, s10, v4 ; 3E00080A v_mac_f32_e32 v8, s11, v4 ; 3E10080B v_mac_f32_e32 v9, s14, v4 ; 3E12080E v_mac_f32_e32 v2, s15, v4 ; 3E04080F v_mac_f32_e32 v0, s16, v5 ; 3E000A10 v_mac_f32_e32 v8, s17, v5 ; 3E100A11 v_mac_f32_e32 v9, s18, v5 ; 3E120A12 v_mac_f32_e32 v2, s0, v5 ; 3E040A00 exp 15, 32, 0, 0, 0, v6, v7, v1, v1 ; F800020F 01010706 exp 15, 12, 0, 1, 0, v0, v8, v9, v2 ; F80008CF 02090800 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 196 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL CONST[0..6] DCL TEMP[0..4], LOCAL IMM[0] FLT32 { -0.2500, -0.5000, 2.0000, 0.0000} IMM[1] FLT32 { 20.0000, 1.0000, 0.5000, -3.7600} IMM[2] FLT32 { 0.1000, -0.0500, 0.0000, 1.0000} 0: ADD TEMP[0].xy, IN[0].xyyy, IMM[0].xyyy 1: MUL TEMP[0].xy, TEMP[0].xyyy, IMM[0].zzzz 2: MUL TEMP[1].x, CONST[2].xxxx, CONST[4].xxxx 3: MOV TEMP[2].y, IMM[0].wwww 4: ABS TEMP[3].x, TEMP[0].yyyy 5: MUL TEMP[3].x, TEMP[1].xxxx, TEMP[3].xxxx 6: ADD TEMP[3].x, TEMP[0].xxxx, -TEMP[3].xxxx 7: MAX TEMP[3].x, TEMP[3].xxxx, IMM[0].wwww 8: MIN TEMP[3].x, TEMP[3].xxxx, CONST[5].xxxx 9: MOV TEMP[2].x, TEMP[3].xxxx 10: ADD TEMP[4].xy, TEMP[0].xyyy, -TEMP[2].xyyy 11: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[1].xxxx 12: MAD TEMP[1].x, TEMP[1].xxxx, TEMP[1].xxxx, CONST[5].xxxx 13: RSQ TEMP[1].x, TEMP[1].xxxx 14: DP2 TEMP[4].x, TEMP[4].xyyy, TEMP[4].xyyy 15: SQRT TEMP[4].x, TEMP[4].xxxx 16: MAD TEMP[1].x, TEMP[3].xxxx, TEMP[1].xxxx, TEMP[4].xxxx 17: ADD TEMP[0].xy, TEMP[0].xyyy, -TEMP[2].xyyy 18: MUL TEMP[2].xy, TEMP[0].xyyy, IMM[1].xxxx 19: MOV TEMP[3].xy, TEMP[2].xyxx 20: MUL TEMP[0].xy, TEMP[0].xyyy, IMM[1].xxxx 21: DP2 TEMP[0].x, TEMP[0].xyyy, TEMP[0].xyyy 22: SQRT TEMP[0].x, TEMP[0].xxxx 23: FSLT TEMP[0].x, IMM[1].yyyy, TEMP[0].xxxx 24: UIF TEMP[0].xxxx :0 25: DP2 TEMP[0].x, TEMP[2].xyyy, TEMP[2].xyyy 26: RSQ TEMP[0].x, TEMP[0].xxxx 27: MUL TEMP[3].xy, TEMP[2].xyyy, TEMP[0].xxxx 28: ENDIF 29: SQRT TEMP[0].x, TEMP[1].xxxx 30: MAD TEMP[0].x, CONST[3].xxxx, CONST[2].xxxx, -TEMP[0].xxxx 31: MAX TEMP[0].x, TEMP[0].xxxx, IMM[0].wwww 32: MUL TEMP[1].x, CONST[0].xxxx, TEMP[0].xxxx 33: MAD TEMP[1].x, TEMP[1].xxxx, IMM[1].zzzz, IMM[1].wwww 34: ADD TEMP[2].xy, IN[0].xyyy, IMM[0].yyyy 35: MUL TEMP[0].x, CONST[0].xxxx, TEMP[0].xxxx 36: SIN TEMP[0].x, TEMP[0].xxxx 37: MUL TEMP[0].x, TEMP[0].xxxx, CONST[1].xxxx 38: MAX TEMP[4].x, CONST[3].xxxx, IMM[2].xxxx 39: MUL TEMP[4].x, CONST[6].xxxx, TEMP[4].xxxx 40: RSQ TEMP[4].x, TEMP[4].xxxx 41: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[4].xxxx 42: MAD TEMP[1].x, TEMP[1].xxxx, TEMP[1].xxxx, IMM[0].zzzz 43: MUL TEMP[1].x, CONST[6].yyyy, TEMP[1].xxxx 44: RCP TEMP[1].x, TEMP[1].xxxx 45: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[1].xxxx 46: DP2 TEMP[1].x, TEMP[2].xyyy, TEMP[2].xyyy 47: SQRT TEMP[1].x, TEMP[1].xxxx 48: ADD TEMP[1].x, TEMP[1].xxxx, IMM[2].yyyy 49: MOV_SAT TEMP[1].x, TEMP[1].xxxx 50: MUL TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz 51: ADD TEMP[1].x, IMM[1].yyyy, -TEMP[1].xxxx 52: MOV_SAT TEMP[1].x, TEMP[1].xxxx 53: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[1].xxxx 54: MUL TEMP[0].xy, TEMP[3].xyyy, TEMP[0].xxxx 55: MOV TEMP[1].zw, IMM[2].wwzw 56: MOV TEMP[1].xy, TEMP[0].xyxx 57: MOV OUT[0], TEMP[1] 58: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %32 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %34 = fadd float %32, -2.500000e-01 %35 = fadd float %33, -5.000000e-01 %36 = fmul float %34, 2.000000e+00 %37 = fmul float %35, 2.000000e+00 %38 = fmul float %26, %28 %39 = call float @llvm.fabs.f32(float %37) %40 = fmul float %38, %39 %41 = fsub float %36, %40 %42 = call float @llvm.maxnum.f32(float %41, float 0.000000e+00) %43 = call float @llvm.minnum.f32(float %42, float %29) %44 = fsub float %36, %43 %45 = fmul float %43, %38 %46 = fmul float %38, %38 %47 = fadd float %46, %29 %48 = call float @llvm.AMDGPU.rsq.clamped.f32(float %47) %49 = fmul float %44, %44 %50 = fmul float %37, %37 %51 = fadd float %49, %50 %52 = call float @llvm.sqrt.f32(float %51) %53 = fmul float %45, %48 %54 = fadd float %53, %52 %55 = fsub float %36, %43 %56 = fmul float %55, 2.000000e+01 %57 = fmul float %37, 2.000000e+01 %58 = fmul float %55, 2.000000e+01 %59 = fmul float %37, 2.000000e+01 %60 = fmul float %58, %58 %61 = fmul float %59, %59 %62 = fadd float %60, %61 %63 = call float @llvm.sqrt.f32(float %62) %64 = fcmp ogt float %63, 1.000000e+00 br i1 %64, label %IF, label %ENDIF IF: ; preds = %main_body %65 = fmul float %56, %56 %66 = fmul float %57, %57 %67 = fadd float %65, %66 %68 = call float @llvm.AMDGPU.rsq.clamped.f32(float %67) %69 = fmul float %56, %68 %70 = fmul float %57, %68 br label %ENDIF ENDIF: ; preds = %main_body, %IF %temp12.0 = phi float [ %69, %IF ], [ %56, %main_body ] %temp13.0 = phi float [ %70, %IF ], [ %57, %main_body ] %71 = call float @llvm.sqrt.f32(float %54) %72 = fmul float %27, %26 %73 = fsub float %72, %71 %74 = call float @llvm.maxnum.f32(float %73, float 0.000000e+00) %75 = fmul float %24, %74 %76 = fmul float %75, 5.000000e-01 %77 = fadd float %76, 0xC00E147AE0000000 %78 = fadd float %32, -5.000000e-01 %79 = fadd float %33, -5.000000e-01 %80 = fmul float %24, %74 %81 = call float @llvm.sin.f32(float %80) %82 = fmul float %81, %25 %83 = call float @llvm.maxnum.f32(float %27, float 0x3FB99999A0000000) %84 = fmul float %30, %83 %85 = call float @llvm.AMDGPU.rsq.clamped.f32(float %84) %86 = fmul float %82, %85 %87 = fmul float %77, %77 %88 = fadd float %87, 2.000000e+00 %89 = fmul float %31, %88 %90 = fdiv float 1.000000e+00, %89 %91 = fmul float %86, %90 %92 = fmul float %78, %78 %93 = fmul float %79, %79 %94 = fadd float %92, %93 %95 = call float @llvm.sqrt.f32(float %94) %96 = fadd float %95, 0xBFA99999A0000000 %97 = call float @llvm.AMDIL.clamp.(float %96, float 0.000000e+00, float 1.000000e+00) %98 = fmul float %97, 2.000000e+00 %99 = fsub float 1.000000e+00, %98 %100 = call float @llvm.AMDIL.clamp.(float %99, float 0.000000e+00, float 1.000000e+00) %101 = fmul float %91, %100 %102 = fmul float %temp12.0, %101 %103 = fmul float %temp13.0, %101 %104 = call i32 @llvm.SI.packf16(float %102, float %103) %105 = bitcast i32 %104 to float %106 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 1.000000e+00) %107 = bitcast i32 %106 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %105, float %107, float %105, float %107) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sin.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[12:15], s[2:3], 0x0 ; C0860300 s_mov_b32 m0, s9 ; BEFC0309 v_mov_b32_e32 v3, 0xbe800000 ; 7E0602FF BE800000 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s3, s[12:15], 0x0 ; C2018D00 s_buffer_load_dword s0, s[12:15], 0x4 ; C2000D04 s_buffer_load_dword s4, s[12:15], 0x8 ; C2020D08 s_buffer_load_dword s5, s[12:15], 0xc ; C2028D0C s_buffer_load_dword s6, s[12:15], 0x10 ; C2030D10 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v4, v0, 1, 0, [m0] ; C8100100 s_buffer_load_dword s7, s[12:15], 0x14 ; C2038D14 v_interp_p2_f32 v4, [v4], v1, 1, 0, [m0] ; C8110101 v_add_f32_e32 v0, v2, v3 ; 06000702 v_add_f32_e32 v1, -0.5, v4 ; 060208F1 v_add_f32_e32 v7, v0, v0 ; 060E0100 v_add_f32_e32 v5, v1, v1 ; 060A0301 s_buffer_load_dword s2, s[12:15], 0x18 ; C2010D18 s_buffer_load_dword s1, s[12:15], 0x19 ; C2008D19 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v3, s5 ; 7E060205 v_mov_b32_e32 v1, s6 ; 7E020206 v_mul_f32_e32 v8, s4, v1 ; 10100204 v_mad_f32 v1, -v8, |v5|, v7 ; D2820201 241E0B08 v_mad_f32 v6, v8, v8, s7 ; D2820006 001E1108 v_max_f32_e32 v1, 0, v1 ; 20020280 v_min_f32_e32 v9, s7, v1 ; 1E120207 v_rsq_clamp_f32_e32 v6, v6 ; 7E0C5906 v_mad_f32 v0, 2.0, v0, -v9 ; D2820000 842600F4 v_mov_b32_e32 v10, 0x41a00000 ; 7E1402FF 41A00000 v_mul_f32_e32 v1, v10, v0 ; 1002010A v_mul_f32_e32 v0, v10, v5 ; 10000B0A v_mul_f32_e32 v10, v0, v0 ; 10140100 v_mac_f32_e32 v10, v1, v1 ; 3E140301 v_sqrt_f32_e32 v10, v10 ; 7E14670A v_cmp_lt_f32_e32 vcc, 1.0, v10 ; 7C0214F2 s_and_saveexec_b64 s[6:7], vcc ; BE86246A s_xor_b64 s[6:7], exec, s[6:7] ; 8986067E v_mul_f32_e32 v10, v0, v0 ; 10140100 v_mac_f32_e32 v10, v1, v1 ; 3E140301 v_rsq_clamp_f32_e32 v10, v10 ; 7E14590A v_mul_f32_e32 v1, v10, v1 ; 1002030A v_mul_f32_e32 v0, v10, v0 ; 1000010A s_or_b64 exec, exec, s[6:7] ; 88FE067E v_subrev_f32_e32 v7, v9, v7 ; 0A0E0F09 v_mul_f32_e32 v8, v8, v9 ; 10101308 v_mul_f32_e32 v5, v5, v5 ; 100A0B05 v_mac_f32_e32 v5, v7, v7 ; 3E0A0F07 v_sqrt_f32_e32 v5, v5 ; 7E0A6705 v_mac_f32_e32 v5, v6, v8 ; 3E0A1106 v_sqrt_f32_e32 v5, v5 ; 7E0A6705 v_mad_f32 v5, s4, v3, -v5 ; D2820005 84160604 v_max_f32_e32 v5, 0, v5 ; 200A0A80 v_mul_f32_e32 v5, s3, v5 ; 100A0A03 v_madak_f32_e32 v6, 0.5, v5, 0xc070a3d7 ; 420C0AF0 C070A3D7 v_add_f32_e32 v2, -0.5, v2 ; 060404F1 v_add_f32_e32 v4, -0.5, v4 ; 060808F1 v_mul_f32_e32 v5, 0x3e22f983, v5 ; 100A0AFF 3E22F983 v_fract_f32_e32 v5, v5 ; 7E0A4105 v_max_f32_e32 v3, 0x3dcccccd, v3 ; 200606FF 3DCCCCCD v_mul_f32_e32 v3, s2, v3 ; 10060602 v_rsq_clamp_f32_e32 v3, v3 ; 7E065903 v_mad_f32 v6, v6, v6, 2.0 ; D2820006 03D20D06 v_mul_f32_e32 v6, s1, v6 ; 100C0C01 v_rcp_f32_e32 v6, v6 ; 7E0C5506 v_sin_f32_e32 v5, v5 ; 7E0A6B05 v_mul_f32_e32 v5, s0, v5 ; 100A0A00 v_mul_f32_e32 v3, v3, v5 ; 10060B03 v_mul_f32_e32 v3, v6, v3 ; 10060706 v_mul_f32_e32 v4, v4, v4 ; 10080904 v_mac_f32_e32 v4, v2, v2 ; 3E080502 v_sqrt_f32_e32 v2, v4 ; 7E046704 v_mov_b32_e32 v4, 0xbd4ccccd ; 7E0802FF BD4CCCCD v_add_f32_e32 v2, v2, v4 ; 06040902 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_mad_f32 v2, -2.0, v2, 1.0 ; D2820002 03CA04F5 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_mul_f32_e32 v2, v2, v3 ; 10040702 v_mul_f32_e32 v1, v2, v1 ; 10020302 v_mul_f32_e32 v0, v2, v0 ; 10000102 v_cvt_pkrtz_f16_f32_e32 v0, v1, v0 ; 5E000101 v_cvt_pkrtz_f16_f32_e64 v1, 0, 1.0 ; D25E0001 0001E480 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 404 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL CONST[0..12] DCL TEMP[0..5], LOCAL 0: MUL TEMP[0], CONST[0], IN[0].xxxx 1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0].xyz, CONST[3], IN[0].wwww, TEMP[0] 4: MOV TEMP[1].x, CONST[4].xxxx 5: MOV TEMP[1].y, CONST[5].xxxx 6: MOV TEMP[1].z, CONST[6].xxxx 7: MOV TEMP[2].x, CONST[4].yyyy 8: MOV TEMP[2].y, CONST[5].yyyy 9: MOV TEMP[2].z, CONST[6].yyyy 10: MOV TEMP[3].x, CONST[4].zzzz 11: MOV TEMP[3].y, CONST[5].zzzz 12: MOV TEMP[3].z, CONST[6].zzzz 13: MUL TEMP[1].xyz, TEMP[1].xyzz, IN[1].xxxx 14: MAD TEMP[1].xyz, TEMP[2].xyzz, IN[1].yyyy, TEMP[1].xyzz 15: MAD TEMP[1].xyz, TEMP[3].xyzz, IN[1].zzzz, TEMP[1].xyzz 16: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz 17: RSQ TEMP[2].x, TEMP[2].xxxx 18: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx 19: MUL TEMP[2].xyz, CONST[0].xyzz, IN[3].xxxx 20: MAD TEMP[2].xyz, CONST[1].xyzz, IN[3].yyyy, TEMP[2].xyzz 21: MAD TEMP[2].xyz, CONST[2].xyzz, IN[3].zzzz, TEMP[2].xyzz 22: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz 23: RSQ TEMP[3].x, TEMP[3].xxxx 24: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx 25: MUL TEMP[3].xyz, TEMP[1].zxyy, TEMP[2].yzxx 26: MAD TEMP[3].xyz, TEMP[1].yzxx, TEMP[2].zxyy, -TEMP[3].xyzz 27: MUL TEMP[3].xyz, TEMP[3].xyzz, IN[3].wwww 28: MOV TEMP[4].x, TEMP[2].xxxx 29: MOV TEMP[4].y, TEMP[3].xxxx 30: MOV TEMP[4].z, TEMP[1].xxxx 31: MOV TEMP[4].w, TEMP[0].xxxx 32: MOV TEMP[5].x, TEMP[2].yyyy 33: MOV TEMP[5].y, TEMP[3].yyyy 34: MOV TEMP[5].z, TEMP[1].yyyy 35: MOV TEMP[5].w, TEMP[0].yyyy 36: MOV TEMP[2].x, TEMP[2].zzzz 37: MOV TEMP[2].y, TEMP[3].zzzz 38: MOV TEMP[2].z, TEMP[1].zzzz 39: MOV TEMP[2].w, TEMP[0].zzzz 40: MUL TEMP[0], CONST[9], IN[0].xxxx 41: MAD TEMP[0], CONST[10], IN[0].yyyy, TEMP[0] 42: MAD TEMP[0], CONST[11], IN[0].zzzz, TEMP[0] 43: MAD TEMP[0], CONST[12], IN[0].wwww, TEMP[0] 44: MAD TEMP[1].xy, IN[2].xyyy, CONST[8].xyyy, CONST[8].zwww 45: MOV OUT[1], TEMP[4] 46: MOV OUT[4], TEMP[1] 47: MOV OUT[2], TEMP[5] 48: MOV OUT[3], TEMP[2] 49: MOV OUT[0], TEMP[0] 50: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 156) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 172) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 204) %54 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %55 = load <16 x i8>, <16 x i8> addrspace(2)* %54, align 16, !tbaa !0 %56 = add i32 %5, %7 %57 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %55, i32 0, i32 %56) %58 = extractelement <4 x float> %57, i32 0 %59 = extractelement <4 x float> %57, i32 1 %60 = extractelement <4 x float> %57, i32 2 %61 = extractelement <4 x float> %57, i32 3 %62 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %63 = load <16 x i8>, <16 x i8> addrspace(2)* %62, align 16, !tbaa !0 %64 = add i32 %5, %7 %65 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %63, i32 0, i32 %64) %66 = extractelement <4 x float> %65, i32 0 %67 = extractelement <4 x float> %65, i32 1 %68 = extractelement <4 x float> %65, i32 2 %69 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %70 = load <16 x i8>, <16 x i8> addrspace(2)* %69, align 16, !tbaa !0 %71 = add i32 %5, %7 %72 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %70, i32 0, i32 %71) %73 = extractelement <4 x float> %72, i32 0 %74 = extractelement <4 x float> %72, i32 1 %75 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %76 = load <16 x i8>, <16 x i8> addrspace(2)* %75, align 16, !tbaa !0 %77 = add i32 %5, %7 %78 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %76, i32 0, i32 %77) %79 = extractelement <4 x float> %78, i32 0 %80 = extractelement <4 x float> %78, i32 1 %81 = extractelement <4 x float> %78, i32 2 %82 = extractelement <4 x float> %78, i32 3 %83 = fmul float %13, %58 %84 = fmul float %14, %58 %85 = fmul float %15, %58 %86 = fmul float %16, %59 %87 = fadd float %86, %83 %88 = fmul float %17, %59 %89 = fadd float %88, %84 %90 = fmul float %18, %59 %91 = fadd float %90, %85 %92 = fmul float %19, %60 %93 = fadd float %92, %87 %94 = fmul float %20, %60 %95 = fadd float %94, %89 %96 = fmul float %21, %60 %97 = fadd float %96, %91 %98 = fmul float %22, %61 %99 = fadd float %98, %93 %100 = fmul float %23, %61 %101 = fadd float %100, %95 %102 = fmul float %24, %61 %103 = fadd float %102, %97 %104 = fmul float %25, %66 %105 = fmul float %28, %66 %106 = fmul float %31, %66 %107 = fmul float %26, %67 %108 = fadd float %107, %104 %109 = fmul float %29, %67 %110 = fadd float %109, %105 %111 = fmul float %32, %67 %112 = fadd float %111, %106 %113 = fmul float %27, %68 %114 = fadd float %113, %108 %115 = fmul float %30, %68 %116 = fadd float %115, %110 %117 = fmul float %33, %68 %118 = fadd float %117, %112 %119 = fmul float %114, %114 %120 = fmul float %116, %116 %121 = fadd float %120, %119 %122 = fmul float %118, %118 %123 = fadd float %121, %122 %124 = call float @llvm.AMDGPU.rsq.clamped.f32(float %123) %125 = fmul float %114, %124 %126 = fmul float %116, %124 %127 = fmul float %118, %124 %128 = fmul float %13, %79 %129 = fmul float %14, %79 %130 = fmul float %15, %79 %131 = fmul float %16, %80 %132 = fadd float %131, %128 %133 = fmul float %17, %80 %134 = fadd float %133, %129 %135 = fmul float %18, %80 %136 = fadd float %135, %130 %137 = fmul float %19, %81 %138 = fadd float %137, %132 %139 = fmul float %20, %81 %140 = fadd float %139, %134 %141 = fmul float %21, %81 %142 = fadd float %141, %136 %143 = fmul float %138, %138 %144 = fmul float %140, %140 %145 = fadd float %144, %143 %146 = fmul float %142, %142 %147 = fadd float %145, %146 %148 = call float @llvm.AMDGPU.rsq.clamped.f32(float %147) %149 = fmul float %138, %148 %150 = fmul float %140, %148 %151 = fmul float %142, %148 %152 = fmul float %127, %150 %153 = fmul float %125, %151 %154 = fmul float %126, %149 %155 = fmul float %126, %151 %156 = fsub float %155, %152 %157 = fmul float %127, %149 %158 = fsub float %157, %153 %159 = fmul float %125, %150 %160 = fsub float %159, %154 %161 = fmul float %156, %82 %162 = fmul float %158, %82 %163 = fmul float %160, %82 %164 = fmul float %38, %58 %165 = fmul float %39, %58 %166 = fmul float %40, %58 %167 = fmul float %41, %58 %168 = fmul float %42, %59 %169 = fadd float %168, %164 %170 = fmul float %43, %59 %171 = fadd float %170, %165 %172 = fmul float %44, %59 %173 = fadd float %172, %166 %174 = fmul float %45, %59 %175 = fadd float %174, %167 %176 = fmul float %46, %60 %177 = fadd float %176, %169 %178 = fmul float %47, %60 %179 = fadd float %178, %171 %180 = fmul float %48, %60 %181 = fadd float %180, %173 %182 = fmul float %49, %60 %183 = fadd float %182, %175 %184 = fmul float %50, %61 %185 = fadd float %184, %177 %186 = fmul float %51, %61 %187 = fadd float %186, %179 %188 = fmul float %52, %61 %189 = fadd float %188, %181 %190 = fmul float %53, %61 %191 = fadd float %190, %183 %192 = fmul float %73, %34 %193 = fadd float %192, %36 %194 = fmul float %74, %35 %195 = fadd float %194, %37 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %149, float %161, float %125, float %99) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %150, float %162, float %126, float %101) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %151, float %163, float %127, float %103) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %193, float %195, float %127, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %185, float %187, float %189, float %191) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0 ; 7E020280 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 s_load_dwordx4 s[8:11], s[8:9], 0xc ; C084090C s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s20, s[0:3], 0x0 ; C20A0100 buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[12:15], 0 idxen ; E00C2000 80030600 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[9:12], v0, s[16:19], 0 idxen ; E00C2000 80040900 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[11:14], v0, s[8:11], 0 idxen ; E00C2000 80020B00 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_buffer_load_dword s5, s[0:3], 0x2 ; C2028102 s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104 s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105 s_buffer_load_dword s8, s[0:3], 0xd ; C204010D s_buffer_load_dword s9, s[0:3], 0xe ; C204810E s_buffer_load_dword s10, s[0:3], 0x10 ; C2050110 s_buffer_load_dword s11, s[0:3], 0x11 ; C2058111 s_buffer_load_dword s12, s[0:3], 0x12 ; C2060112 s_buffer_load_dword s13, s[0:3], 0x14 ; C2068114 s_buffer_load_dword s14, s[0:3], 0x15 ; C2070115 s_buffer_load_dword s15, s[0:3], 0x16 ; C2078116 s_buffer_load_dword s16, s[0:3], 0x18 ; C2080118 s_buffer_load_dword s17, s[0:3], 0x19 ; C2088119 s_buffer_load_dword s18, s[0:3], 0x6 ; C2090106 s_buffer_load_dword s19, s[0:3], 0x8 ; C2098108 s_buffer_load_dword s21, s[0:3], 0x9 ; C20A8109 s_buffer_load_dword s22, s[0:3], 0xa ; C20B010A s_buffer_load_dword s23, s[0:3], 0xc ; C20B810C s_buffer_load_dword s24, s[0:3], 0x1a ; C20C011A s_buffer_load_dword s25, s[0:3], 0x20 ; C20C8120 s_buffer_load_dword s26, s[0:3], 0x21 ; C20D0121 s_buffer_load_dword s27, s[0:3], 0x22 ; C20D8122 s_buffer_load_dword s28, s[0:3], 0x23 ; C20E0123 s_buffer_load_dword s29, s[0:3], 0x24 ; C20E8124 s_buffer_load_dword s30, s[0:3], 0x25 ; C20F0125 s_buffer_load_dword s31, s[0:3], 0x26 ; C20F8126 s_buffer_load_dword s32, s[0:3], 0x27 ; C2100127 s_buffer_load_dword s33, s[0:3], 0x28 ; C2108128 s_buffer_load_dword s34, s[0:3], 0x29 ; C2110129 s_buffer_load_dword s35, s[0:3], 0x2a ; C211812A s_buffer_load_dword s36, s[0:3], 0x2b ; C212012B s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v0, s27 ; 7E00021B s_buffer_load_dword s27, s[0:3], 0x2c ; C20D812C v_mov_b32_e32 v15, s28 ; 7E1E021C s_buffer_load_dword s28, s[0:3], 0x2d ; C20E012D s_buffer_load_dword s37, s[0:3], 0x2e ; C212812E s_buffer_load_dword s38, s[0:3], 0x2f ; C213012F s_buffer_load_dword s39, s[0:3], 0x30 ; C2138130 s_buffer_load_dword s40, s[0:3], 0x31 ; C2140131 s_buffer_load_dword s41, s[0:3], 0x32 ; C2148132 s_buffer_load_dword s0, s[0:3], 0x33 ; C2000133 v_mul_f32_e32 v16, s29, v2 ; 1020041D v_mac_f32_e32 v16, s33, v3 ; 3E200621 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v16, s27, v4 ; 3E20081B v_mac_f32_e32 v16, s39, v5 ; 3E200A27 v_mac_f32_e32 v0, s25, v9 ; 3E001219 v_mac_f32_e32 v15, s26, v10 ; 3E1E141A v_mul_f32_e32 v9, s10, v6 ; 10120C0A v_mac_f32_e32 v9, s11, v7 ; 3E120E0B v_mac_f32_e32 v9, s12, v8 ; 3E12100C v_mul_f32_e32 v10, s13, v6 ; 10140C0D v_mac_f32_e32 v10, s14, v7 ; 3E140E0E v_mac_f32_e32 v10, s15, v8 ; 3E14100F v_mul_f32_e32 v6, s16, v6 ; 100C0C10 v_mac_f32_e32 v6, s17, v7 ; 3E0C0E11 v_mac_f32_e32 v6, s24, v8 ; 3E0C1018 v_mul_f32_e32 v7, s30, v2 ; 100E041E v_mac_f32_e32 v7, s34, v3 ; 3E0E0622 v_mac_f32_e32 v7, s28, v4 ; 3E0E081C v_mac_f32_e32 v7, s40, v5 ; 3E0E0A28 v_mul_f32_e32 v8, s31, v2 ; 1010041F v_mac_f32_e32 v8, s35, v3 ; 3E100623 v_mac_f32_e32 v8, s37, v4 ; 3E100825 v_mac_f32_e32 v8, s41, v5 ; 3E100A29 v_mul_f32_e32 v17, s32, v2 ; 10220420 v_mac_f32_e32 v17, s36, v3 ; 3E220624 v_mac_f32_e32 v17, s38, v4 ; 3E220826 v_mac_f32_e32 v17, s0, v5 ; 3E220A00 v_mul_f32_e32 v18, s20, v2 ; 10240414 v_mac_f32_e32 v18, s6, v3 ; 3E240606 v_mac_f32_e32 v18, s19, v4 ; 3E240813 v_mac_f32_e32 v18, s23, v5 ; 3E240A17 v_mul_f32_e32 v19, s4, v2 ; 10260404 v_mac_f32_e32 v19, s7, v3 ; 3E260607 v_mac_f32_e32 v19, s21, v4 ; 3E260815 v_mac_f32_e32 v19, s8, v5 ; 3E260A08 v_mul_f32_e32 v2, s5, v2 ; 10040405 v_mac_f32_e32 v2, s18, v3 ; 3E040612 v_mac_f32_e32 v2, s22, v4 ; 3E040816 v_mac_f32_e32 v2, s9, v5 ; 3E040A09 v_mul_f32_e32 v3, s20, v11 ; 10061614 v_mac_f32_e32 v3, s6, v12 ; 3E061806 v_mul_f32_e32 v4, s4, v11 ; 10081604 v_mac_f32_e32 v4, s7, v12 ; 3E081807 v_mul_f32_e32 v5, s5, v11 ; 100A1605 v_mac_f32_e32 v5, s18, v12 ; 3E0A1812 v_mac_f32_e32 v3, s19, v13 ; 3E061A13 v_mac_f32_e32 v4, s21, v13 ; 3E081A15 v_mac_f32_e32 v5, s22, v13 ; 3E0A1A16 v_mul_f32_e32 v11, v9, v9 ; 10161309 v_mac_f32_e32 v11, v10, v10 ; 3E16150A v_mac_f32_e32 v11, v6, v6 ; 3E160D06 v_rsq_clamp_f32_e32 v11, v11 ; 7E16590B v_mul_f32_e32 v12, v3, v3 ; 10180703 v_mac_f32_e32 v12, v4, v4 ; 3E180904 v_mac_f32_e32 v12, v5, v5 ; 3E180B05 v_rsq_clamp_f32_e32 v12, v12 ; 7E18590C v_mul_f32_e32 v9, v11, v9 ; 1012130B v_mul_f32_e32 v10, v11, v10 ; 1014150B v_mul_f32_e32 v6, v11, v6 ; 100C0D0B v_mul_f32_e32 v3, v12, v3 ; 1006070C v_mul_f32_e32 v4, v12, v4 ; 1008090C v_mul_f32_e32 v5, v12, v5 ; 100A0B0C v_mul_f32_e32 v11, v4, v6 ; 10160D04 v_mad_f32 v11, v10, v5, -v11 ; D282000B 842E0B0A v_mul_f32_e32 v12, v5, v9 ; 10181305 v_mad_f32 v12, v6, v3, -v12 ; D282000C 84320706 v_mul_f32_e32 v13, v3, v10 ; 101A1503 v_mad_f32 v13, v9, v4, -v13 ; D282000D 84360909 v_mul_f32_e32 v11, v14, v11 ; 1016170E v_mul_f32_e32 v12, v14, v12 ; 1018190E v_mul_f32_e32 v13, v14, v13 ; 101A1B0E exp 15, 32, 0, 0, 0, v3, v11, v9, v18 ; F800020F 12090B03 exp 15, 33, 0, 0, 0, v4, v12, v10, v19 ; F800021F 130A0C04 exp 15, 34, 0, 0, 0, v5, v13, v6, v2 ; F800022F 02060D05 exp 15, 35, 0, 0, 0, v0, v15, v6, v1 ; F800023F 01060F00 exp 15, 12, 0, 1, 0, v16, v7, v8, v17 ; F80008CF 11080710 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 48 VGPRS: 20 Code Size: 592 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 0.5000} IMM[1] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[3].xyyy 1: TEX TEMP[0].yw, TEMP[0], SAMP[0], 2D 2: MAD TEMP[0].xy, TEMP[0].wyyy, IMM[0].xxxx, IMM[0].yyyy 3: DP2 TEMP[1].x, TEMP[0].xyyy, TEMP[0].xyyy 4: MOV_SAT TEMP[1].x, TEMP[1].xxxx 5: ADD TEMP[1].x, IMM[0].zzzz, -TEMP[1].xxxx 6: SQRT TEMP[1].x, TEMP[1].xxxx 7: MOV TEMP[0].z, TEMP[1].xxxx 8: DP3 TEMP[1].x, IN[0].xyzz, TEMP[0].xyzz 9: DP3 TEMP[2].x, IN[1].xyzz, TEMP[0].xyzz 10: MOV TEMP[1].y, TEMP[2].xxxx 11: DP3 TEMP[0].x, IN[2].xyzz, TEMP[0].xyzz 12: MOV TEMP[1].z, TEMP[0].xxxx 13: MAD TEMP[0].xyz, TEMP[1].xyzz, IMM[0].wwww, IMM[0].wwww 14: MOV TEMP[0].w, IMM[1].xxxx 15: MOV OUT[0], TEMP[0] 16: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %32 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %35 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %36 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %37 = bitcast float %35 to i32 %38 = bitcast float %36 to i32 %39 = insertelement <2 x i32> undef, i32 %37, i32 0 %40 = insertelement <2 x i32> %39, i32 %38, i32 1 %41 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %40, <32 x i8> %23, <16 x i8> %25, i32 2) %42 = extractelement <4 x float> %41, i32 1 %43 = extractelement <4 x float> %41, i32 3 %44 = fmul float %43, 2.000000e+00 %45 = fadd float %44, -1.000000e+00 %46 = fmul float %42, 2.000000e+00 %47 = fadd float %46, -1.000000e+00 %48 = fmul float %45, %45 %49 = fmul float %47, %47 %50 = fadd float %48, %49 %51 = call float @llvm.AMDIL.clamp.(float %50, float 0.000000e+00, float 1.000000e+00) %52 = fsub float 1.000000e+00, %51 %53 = call float @llvm.sqrt.f32(float %52) %54 = fmul float %26, %45 %55 = fmul float %27, %47 %56 = fadd float %55, %54 %57 = fmul float %28, %53 %58 = fadd float %56, %57 %59 = fmul float %29, %45 %60 = fmul float %30, %47 %61 = fadd float %60, %59 %62 = fmul float %31, %53 %63 = fadd float %61, %62 %64 = fmul float %32, %45 %65 = fmul float %33, %47 %66 = fadd float %65, %64 %67 = fmul float %34, %53 %68 = fadd float %66, %67 %69 = fmul float %58, 5.000000e-01 %70 = fadd float %69, 5.000000e-01 %71 = fmul float %63, 5.000000e-01 %72 = fadd float %71, 5.000000e-01 %73 = fmul float %68, 5.000000e-01 %74 = fadd float %73, 5.000000e-01 %75 = call i32 @llvm.SI.packf16(float %70, float %72) %76 = bitcast i32 %75 to float %77 = call i32 @llvm.SI.packf16(float %74, float 0.000000e+00) %78 = bitcast i32 %77 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %76, float %78, float %76, float %78) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 0, 1, [m0] ; C8140400 v_interp_p2_f32 v5, [v5], v1, 0, 1, [m0] ; C8150401 v_interp_p1_f32 v6, v0, 1, 1, [m0] ; C8180500 v_interp_p2_f32 v6, [v6], v1, 1, 1, [m0] ; C8190501 v_interp_p1_f32 v7, v0, 2, 1, [m0] ; C81C0600 v_interp_p2_f32 v7, [v7], v1, 2, 1, [m0] ; C81D0601 v_interp_p1_f32 v8, v0, 0, 2, [m0] ; C8200800 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 v_interp_p2_f32 v8, [v8], v1, 0, 2, [m0] ; C8210801 v_interp_p1_f32 v9, v0, 1, 2, [m0] ; C8240900 v_interp_p2_f32 v9, [v9], v1, 1, 2, [m0] ; C8250901 v_interp_p1_f32 v10, v0, 2, 2, [m0] ; C8280A00 v_interp_p2_f32 v10, [v10], v1, 2, 2, [m0] ; C8290A01 v_interp_p1_f32 v11, v0, 0, 3, [m0] ; C82C0C00 v_interp_p2_f32 v11, [v11], v1, 0, 3, [m0] ; C82D0C01 v_interp_p1_f32 v12, v0, 1, 3, [m0] ; C8300D00 v_interp_p2_f32 v12, [v12], v1, 1, 3, [m0] ; C8310D01 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:1], 10, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[4:11], s[0:3] ; F0800A00 0001000B s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, 2.0, v1, -1.0 ; D2820001 03CE02F4 v_mad_f32 v0, 2.0, v0, -1.0 ; D2820000 03CE00F4 v_mul_f32_e32 v2, v1, v2 ; 10040501 v_mac_f32_e32 v2, v0, v3 ; 3E040700 v_mul_f32_e32 v3, v1, v5 ; 10060B01 v_mac_f32_e32 v3, v0, v6 ; 3E060D00 v_mul_f32_e32 v5, v1, v8 ; 100A1101 v_mac_f32_e32 v5, v0, v9 ; 3E0A1300 v_mul_f32_e32 v0, v0, v0 ; 10000100 v_mac_f32_e32 v0, v1, v1 ; 3E000301 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_sub_f32_e32 v0, 1.0, v0 ; 080000F2 v_sqrt_f32_e32 v0, v0 ; 7E006700 v_mac_f32_e32 v2, v0, v4 ; 3E040900 v_mac_f32_e32 v3, v0, v7 ; 3E060F00 v_mac_f32_e32 v5, v0, v10 ; 3E0A1500 v_mad_f32 v0, 0.5, v2, 0.5 ; D2820000 03C204F0 v_mad_f32 v1, 0.5, v3, 0.5 ; D2820001 03C206F0 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_mad_f32 v1, 0.5, v5, 0.5 ; D2820001 03C20AF0 v_cvt_pkrtz_f16_f32_e64 v1, v1, 0 ; D25E0001 00010101 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 16 Code Size: 244 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[0..7] DCL CONST[9..12] DCL TEMP[0..6], LOCAL IMM[0] FLT32 { 0.2500, -1.0000, 10.0000, 0.4999} IMM[1] INT32 {256, 0, 1, 2} IMM[2] FLT32 { 1.0000, 0.0000, 0.1000, 0.0039} IMM[3] FLT32 { 16.0000, -8.0000, 4.0000, -2.0000} IMM[4] INT32 {4, 0, 0, 0} 0: MUL TEMP[0].x, IN[2].xxxx, IMM[0].xxxx 1: F2I TEMP[0].x, TEMP[0].xxxx 2: F2I TEMP[1].x, IN[2].yyyy 3: IDIV TEMP[2].x, TEMP[1].xxxx, IMM[1].xxxx 4: I2F TEMP[3].x, TEMP[0].xxxx 5: I2F TEMP[4].x, TEMP[2].xxxx 6: MOV TEMP[3].y, TEMP[4].xxxx 7: UMUL TEMP[2].x, TEMP[2].xxxx, IMM[1].xxxx 8: INEG TEMP[2].x, TEMP[2].xxxx 9: UADD TEMP[2].x, TEMP[1].xxxx, TEMP[2].xxxx 10: I2F TEMP[2].x, TEMP[2].xxxx 11: MOV TEMP[3].z, TEMP[2].xxxx 12: ADD TEMP[2].xyz, TEMP[3].xyzz, IMM[0].yyyy 13: I2F TEMP[1].x, TEMP[1].xxxx 14: ADD TEMP[1].x, IN[2].yyyy, -TEMP[1].xxxx 15: MAD TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz, IMM[0].wwww 16: F2I TEMP[1].x, TEMP[1].xxxx 17: USEQ TEMP[4].x, TEMP[1].xxxx, IMM[1].yyyy 18: AND TEMP[4].x, TEMP[4].xxxx, IMM[2].xxxx 19: USEQ TEMP[5].x, TEMP[1].xxxx, IMM[1].zzzz 20: AND TEMP[5].x, TEMP[5].xxxx, IMM[2].xxxx 21: MOV TEMP[4].y, TEMP[5].xxxx 22: USEQ TEMP[1].x, TEMP[1].xxxx, IMM[1].wwww 23: AND TEMP[1].x, TEMP[1].xxxx, IMM[2].xxxx 24: MOV TEMP[4].z, TEMP[1].xxxx 25: MOV TEMP[1].xyz, TEMP[4].xyzx 26: MOV TEMP[4].w, IMM[2].yyyy 27: MOV TEMP[4].xyz, TEMP[3].xyzx 28: MOV TEMP[3].y, IMM[2].yzyy 29: DP4 TEMP[4].x, TEMP[1], TEMP[4] 30: MUL TEMP[3].x, TEMP[4].xxxx, IMM[2].wwww 31: MOV TEMP[3].xy, TEMP[3].xyyy 32: MOV TEMP[3].w, IMM[2].yyyy 33: TXL TEMP[3].xy, TEMP[3], SAMP[0], 2D 34: MAD TEMP[4].x, TEMP[3].xxxx, IMM[3].xxxx, IMM[3].yyyy 35: MOV TEMP[2].w, TEMP[4].xxxx 36: MUL TEMP[3].x, TEMP[3].yyyy, IMM[3].zzzz 37: MOV TEMP[1].w, TEMP[3].xxxx 38: UMUL TEMP[0].x, IMM[4].xxxx, TEMP[0].xxxx 39: I2F TEMP[0].x, TEMP[0].xxxx 40: ADD TEMP[0].x, IN[2].xxxx, -TEMP[0].xxxx 41: ADD TEMP[0].x, TEMP[0].xxxx, IMM[3].wwww 42: MOV TEMP[3].x, CONST[4].xxxx 43: MOV TEMP[3].y, CONST[5].xxxx 44: MOV TEMP[3].z, CONST[6].xxxx 45: MOV TEMP[4].x, CONST[4].yyyy 46: MOV TEMP[4].y, CONST[5].yyyy 47: MOV TEMP[4].z, CONST[6].yyyy 48: MOV TEMP[5].x, CONST[4].zzzz 49: MOV TEMP[5].y, CONST[5].zzzz 50: MOV TEMP[5].z, CONST[6].zzzz 51: MUL TEMP[6], CONST[9], IN[0].xxxx 52: MAD TEMP[6], CONST[10], IN[0].yyyy, TEMP[6] 53: MAD TEMP[6], CONST[11], IN[0].zzzz, TEMP[6] 54: MAD TEMP[6], CONST[12], IN[0].wwww, TEMP[6] 55: MUL TEMP[3].xyz, TEMP[3].xyzz, IN[1].xxxx 56: MAD TEMP[3].xyz, TEMP[4].xyzz, IN[1].yyyy, TEMP[3].xyzz 57: MAD TEMP[0].xyz, TEMP[5].xyzz, TEMP[0].xxxx, TEMP[3].xyzz 58: MUL TEMP[3], CONST[0], IN[0].xxxx 59: MAD TEMP[3], CONST[1], IN[0].yyyy, TEMP[3] 60: MAD TEMP[3], CONST[2], IN[0].zzzz, TEMP[3] 61: MAD TEMP[3].xyz, CONST[3], IN[0].wwww, TEMP[3] 62: DP3 TEMP[4].x, TEMP[0].xyzz, TEMP[0].xyzz 63: RSQ TEMP[4].x, TEMP[4].xxxx 64: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[4].xxxx 65: MOV TEMP[0].w, TEMP[3].xxxx 66: MOV TEMP[3].xy, TEMP[3].yzyy 67: MOV OUT[1], TEMP[2] 68: MOV OUT[2], TEMP[1] 69: MOV OUT[4], TEMP[3] 70: MOV OUT[3], TEMP[0] 71: MOV OUT[0], TEMP[6] 72: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 156) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 172) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 204) %53 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %54 = load <32 x i8>, <32 x i8> addrspace(2)* %53, align 32, !tbaa !0 %55 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %56 = load <16 x i8>, <16 x i8> addrspace(2)* %55, align 16, !tbaa !0 %57 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %58 = load <16 x i8>, <16 x i8> addrspace(2)* %57, align 16, !tbaa !0 %59 = add i32 %5, %7 %60 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %58, i32 0, i32 %59) %61 = extractelement <4 x float> %60, i32 0 %62 = extractelement <4 x float> %60, i32 1 %63 = extractelement <4 x float> %60, i32 2 %64 = extractelement <4 x float> %60, i32 3 %65 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %66 = load <16 x i8>, <16 x i8> addrspace(2)* %65, align 16, !tbaa !0 %67 = add i32 %5, %7 %68 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %66, i32 0, i32 %67) %69 = extractelement <4 x float> %68, i32 0 %70 = extractelement <4 x float> %68, i32 1 %71 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %72 = load <16 x i8>, <16 x i8> addrspace(2)* %71, align 16, !tbaa !0 %73 = add i32 %5, %7 %74 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %72, i32 0, i32 %73) %75 = extractelement <4 x float> %74, i32 0 %76 = extractelement <4 x float> %74, i32 1 %77 = fmul float %75, 2.500000e-01 %78 = fptosi float %77 to i32 %79 = fptosi float %76 to i32 %80 = sdiv i32 %79, 256 %81 = sitofp i32 %78 to float %82 = sitofp i32 %80 to float %83 = shl nsw i32 %80, 8 %84 = sub i32 %79, %83 %85 = sitofp i32 %84 to float %86 = fadd float %81, -1.000000e+00 %87 = fadd float %82, -1.000000e+00 %88 = fadd float %85, -1.000000e+00 %89 = sitofp i32 %79 to float %90 = fsub float %76, %89 %91 = fmul float %90, 1.000000e+01 %92 = fadd float %91, 0x3FDFFE5CA0000000 %93 = fptosi float %92 to i32 %94 = icmp eq i32 %93, 0 %95 = select i1 %94, float 1.000000e+00, float 0.000000e+00 %96 = icmp eq i32 %93, 1 %97 = select i1 %96, float 1.000000e+00, float 0.000000e+00 %98 = icmp eq i32 %93, 2 %99 = select i1 %98, float 1.000000e+00, float 0.000000e+00 %100 = fmul float %95, %81 %101 = fmul float %97, %82 %102 = fadd float %100, %101 %103 = fmul float %99, %85 %104 = fadd float %102, %103 %105 = fadd float %104, 0.000000e+00 %106 = fmul float %105, 0x3F70101020000000 %107 = bitcast float %106 to i32 %108 = insertelement <4 x i32> undef, i32 %107, i32 0 %109 = insertelement <4 x i32> %108, i32 1036831949, i32 1 %110 = insertelement <4 x i32> %109, i32 0, i32 2 %111 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %110, <32 x i8> %54, <16 x i8> %56, i32 2) %112 = extractelement <4 x float> %111, i32 0 %113 = extractelement <4 x float> %111, i32 1 %114 = fmul float %112, 1.600000e+01 %115 = fadd float %114, -8.000000e+00 %116 = fmul float %113, 4.000000e+00 %117 = shl i32 %78, 2 %118 = sitofp i32 %117 to float %119 = fsub float %75, %118 %120 = fadd float %119, -2.000000e+00 %121 = fmul float %37, %61 %122 = fmul float %38, %61 %123 = fmul float %39, %61 %124 = fmul float %40, %61 %125 = fmul float %41, %62 %126 = fadd float %125, %121 %127 = fmul float %42, %62 %128 = fadd float %127, %122 %129 = fmul float %43, %62 %130 = fadd float %129, %123 %131 = fmul float %44, %62 %132 = fadd float %131, %124 %133 = fmul float %45, %63 %134 = fadd float %133, %126 %135 = fmul float %46, %63 %136 = fadd float %135, %128 %137 = fmul float %47, %63 %138 = fadd float %137, %130 %139 = fmul float %48, %63 %140 = fadd float %139, %132 %141 = fmul float %49, %64 %142 = fadd float %141, %134 %143 = fmul float %50, %64 %144 = fadd float %143, %136 %145 = fmul float %51, %64 %146 = fadd float %145, %138 %147 = fmul float %52, %64 %148 = fadd float %147, %140 %149 = fmul float %28, %69 %150 = fmul float %31, %69 %151 = fmul float %34, %69 %152 = fmul float %29, %70 %153 = fadd float %152, %149 %154 = fmul float %32, %70 %155 = fadd float %154, %150 %156 = fmul float %35, %70 %157 = fadd float %156, %151 %158 = fmul float %30, %120 %159 = fadd float %158, %153 %160 = fmul float %33, %120 %161 = fadd float %160, %155 %162 = fmul float %36, %120 %163 = fadd float %162, %157 %164 = fmul float %13, %61 %165 = fmul float %14, %61 %166 = fmul float %15, %61 %167 = fmul float %16, %61 %168 = fmul float %17, %62 %169 = fadd float %168, %164 %170 = fmul float %18, %62 %171 = fadd float %170, %165 %172 = fmul float %19, %62 %173 = fadd float %172, %166 %174 = fmul float %20, %62 %175 = fadd float %174, %167 %176 = fmul float %21, %63 %177 = fadd float %176, %169 %178 = fmul float %22, %63 %179 = fadd float %178, %171 %180 = fmul float %23, %63 %181 = fadd float %180, %173 %182 = fmul float %24, %63 %183 = fadd float %182, %175 %184 = fmul float %25, %64 %185 = fadd float %184, %177 %186 = fmul float %26, %64 %187 = fadd float %186, %179 %188 = fmul float %27, %64 %189 = fadd float %188, %181 %190 = fmul float %159, %159 %191 = fmul float %161, %161 %192 = fadd float %191, %190 %193 = fmul float %163, %163 %194 = fadd float %192, %193 %195 = call float @llvm.AMDGPU.rsq.clamped.f32(float %194) %196 = fmul float %159, %195 %197 = fmul float %161, %195 %198 = fmul float %163, %195 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %86, float %87, float %88, float %115) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %95, float %97, float %99, float %116) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %196, float %197, float %198, float %185) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %187, float %189, float %189, float %183) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %142, float %144, float %146, float %148) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0x3efff2e5 ; 7E0202FF 3EFFF2E5 v_mov_b32_e32 v2, 0xc1000000 ; 7E0402FF C1000000 v_mov_b32_e32 v5, 0 ; 7E0A0280 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500 s_load_dwordx4 s[16:19], s[8:9], 0x0 ; C0880900 s_load_dwordx4 s[20:23], s[8:9], 0x4 ; C08A0904 s_load_dwordx4 s[24:27], s[8:9], 0x8 ; C08C0908 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s28, s[0:3], 0x10 ; C20E0110 s_buffer_load_dword s29, s[0:3], 0x11 ; C20E8111 buffer_load_format_xyzw v[6:9], v0, s[16:19], 0 idxen ; E00C2000 80040600 buffer_load_format_xyzw v[10:13], v0, s[20:23], 0 idxen ; E00C2000 80050A00 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[12:15], v0, s[24:27], 0 idxen ; E00C2000 80060C00 s_buffer_load_dword s16, s[0:3], 0x12 ; C2080112 s_buffer_load_dword s17, s[0:3], 0x14 ; C2088114 s_buffer_load_dword s18, s[0:3], 0x15 ; C2090115 s_buffer_load_dword s19, s[0:3], 0x16 ; C2098116 s_buffer_load_dword s20, s[0:3], 0x18 ; C20A0118 s_buffer_load_dword s21, s[0:3], 0x19 ; C20A8119 s_buffer_load_dword s22, s[0:3], 0x1a ; C20B011A s_buffer_load_dword s23, s[0:3], 0x24 ; C20B8124 s_buffer_load_dword s24, s[0:3], 0x0 ; C20C0100 s_buffer_load_dword s25, s[0:3], 0x1 ; C20C8101 s_buffer_load_dword s26, s[0:3], 0x2 ; C20D0102 s_buffer_load_dword s27, s[0:3], 0x3 ; C20D8103 s_buffer_load_dword s30, s[0:3], 0x4 ; C20F0104 s_buffer_load_dword s31, s[0:3], 0x5 ; C20F8105 s_buffer_load_dword s32, s[0:3], 0x6 ; C2100106 s_buffer_load_dword s33, s[0:3], 0x7 ; C2108107 s_buffer_load_dword s34, s[0:3], 0x8 ; C2110108 s_buffer_load_dword s35, s[0:3], 0x9 ; C2118109 s_buffer_load_dword s36, s[0:3], 0x25 ; C2120125 s_buffer_load_dword s37, s[0:3], 0x26 ; C2128126 s_buffer_load_dword s38, s[0:3], 0x27 ; C2130127 s_buffer_load_dword s39, s[0:3], 0x28 ; C2138128 s_buffer_load_dword s40, s[0:3], 0x29 ; C2140129 s_buffer_load_dword s41, s[0:3], 0xa ; C214810A s_buffer_load_dword s42, s[0:3], 0xb ; C215010B s_buffer_load_dword s43, s[0:3], 0xc ; C215810C s_buffer_load_dword s44, s[0:3], 0xd ; C216010D s_buffer_load_dword s45, s[0:3], 0xe ; C216810E s_buffer_load_dword s46, s[0:3], 0x2a ; C217012A s_buffer_load_dword s47, s[0:3], 0x2b ; C217812B s_buffer_load_dword s48, s[0:3], 0x2c ; C218012C s_buffer_load_dword s49, s[0:3], 0x2d ; C218812D s_buffer_load_dword s50, s[0:3], 0x2e ; C219012E s_buffer_load_dword s51, s[0:3], 0x2f ; C219812F s_buffer_load_dword s52, s[0:3], 0x30 ; C21A0130 s_buffer_load_dword s53, s[0:3], 0x31 ; C21A8131 s_buffer_load_dword s54, s[0:3], 0x32 ; C21B0132 s_buffer_load_dword s0, s[0:3], 0x33 ; C2000133 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v0, s23, v6 ; 10000C17 v_mul_f32_e32 v14, s28, v10 ; 101C141C v_mul_f32_e32 v15, s17, v10 ; 101E1411 v_mul_f32_e32 v10, s20, v10 ; 10141414 v_mac_f32_e32 v0, s39, v7 ; 3E000E27 v_mac_f32_e32 v14, s29, v11 ; 3E1C161D v_mac_f32_e32 v15, s18, v11 ; 3E1E1612 v_mac_f32_e32 v10, s21, v11 ; 3E141615 v_mul_f32_e32 v11, s36, v6 ; 10160C24 v_mac_f32_e32 v11, s40, v7 ; 3E160E28 v_mul_f32_e32 v16, s37, v6 ; 10200C25 v_mac_f32_e32 v16, s46, v7 ; 3E200E2E v_mul_f32_e32 v17, s38, v6 ; 10220C26 v_mac_f32_e32 v17, s47, v7 ; 3E220E2F v_mul_f32_e32 v18, s24, v6 ; 10240C18 v_mac_f32_e32 v18, s30, v7 ; 3E240E1E v_mul_f32_e32 v19, s25, v6 ; 10260C19 v_mac_f32_e32 v19, s31, v7 ; 3E260E1F v_mul_f32_e32 v20, s26, v6 ; 10280C1A v_mac_f32_e32 v20, s32, v7 ; 3E280E20 v_mul_f32_e32 v6, s27, v6 ; 100C0C1B v_mac_f32_e32 v6, s33, v7 ; 3E0C0E21 v_mac_f32_e32 v0, s48, v8 ; 3E001030 v_mac_f32_e32 v11, s49, v8 ; 3E161031 v_mac_f32_e32 v16, s50, v8 ; 3E201032 v_mac_f32_e32 v17, s51, v8 ; 3E221033 v_mac_f32_e32 v18, s34, v8 ; 3E241022 v_mac_f32_e32 v19, s35, v8 ; 3E261023 v_mac_f32_e32 v20, s41, v8 ; 3E281029 v_mac_f32_e32 v6, s42, v8 ; 3E0C102A v_mac_f32_e32 v0, s52, v9 ; 3E001234 v_mac_f32_e32 v11, s53, v9 ; 3E161235 v_mac_f32_e32 v16, s54, v9 ; 3E201236 v_mul_f32_e32 v3, 0x3e800000, v12 ; 100618FF 3E800000 v_cvt_i32_f32_e32 v4, v13 ; 7E08110D v_cvt_i32_f32_e32 v3, v3 ; 7E061103 v_mac_f32_e32 v17, s0, v9 ; 3E221200 v_mac_f32_e32 v18, s43, v9 ; 3E24122B v_cvt_f32_i32_e32 v7, v4 ; 7E0E0B04 v_lshlrev_b32_e32 v8, 2, v3 ; 34100682 v_cvt_f32_i32_e32 v8, v8 ; 7E100B08 v_mac_f32_e32 v19, s44, v9 ; 3E26122C v_mac_f32_e32 v20, s45, v9 ; 3E28122D v_subrev_f32_e32 v7, v7, v13 ; 0A0E1B07 v_subrev_f32_e32 v8, v8, v12 ; 0A101908 v_madmk_f32_e32 v1, v7, v1, 0x41200000 ; 40020307 41200000 v_ashrrev_i32_e32 v7, 31, v4 ; 300E089F v_lshrrev_b32_e32 v7, 24, v7 ; 2C0E0E98 v_cvt_i32_f32_e32 v1, v1 ; 7E021101 v_add_i32_e32 v7, v4, v7 ; 4A0E0F04 v_and_b32_e32 v9, 0xffffff00, v7 ; 36120EFF FFFFFF00 v_sub_i32_e32 v4, v4, v9 ; 4C081304 v_cmp_eq_i32_e32 vcc, 0, v1 ; 7D040280 v_cndmask_b32_e64 v9, 0, 1.0, vcc ; D2000009 01A9E480 v_cvt_f32_i32_e32 v12, v3 ; 7E180B03 v_ashrrev_i32_e32 v3, 8, v7 ; 30060E88 v_cvt_f32_i32_e32 v7, v3 ; 7E0E0B03 v_cmp_eq_i32_e32 vcc, 1, v1 ; 7D040281 v_cmp_eq_i32_e64 s[0:1], 2, v1 ; D1040000 00020282 v_cndmask_b32_e64 v1, 0, 1.0, vcc ; D2000001 01A9E480 v_cvt_f32_i32_e32 v13, v4 ; 7E1A0B04 v_mul_f32_e32 v3, v7, v1 ; 10060307 v_mac_f32_e32 v3, v12, v9 ; 3E06130C v_cndmask_b32_e64 v21, 0, 1.0, s[0:1] ; D2000015 0001E480 v_mac_f32_e32 v3, v13, v21 ; 3E062B0D v_add_f32_e32 v3, 0, v3 ; 06060680 v_mul_f32_e32 v3, 0x3b808081, v3 ; 100606FF 3B808081 v_mov_b32_e32 v4, 0x3dcccccd ; 7E0802FF 3DCCCCCD image_sample_l v[3:4], 3, 0, 0, 0, 0, 0, 0, 0, v[3:6], s[4:11], s[12:15] ; F0900300 00610303 v_add_f32_e32 v5, -2.0, v8 ; 060A10F5 v_mac_f32_e32 v14, s16, v5 ; 3E1C0A10 v_mac_f32_e32 v15, s19, v5 ; 3E1E0A13 v_mac_f32_e32 v10, s22, v5 ; 3E140A16 v_add_f32_e32 v5, -1.0, v12 ; 060A18F3 v_add_f32_e32 v7, -1.0, v7 ; 060E0EF3 v_mul_f32_e32 v8, v14, v14 ; 10101D0E v_mac_f32_e32 v8, v15, v15 ; 3E101F0F v_mac_f32_e32 v8, v10, v10 ; 3E10150A v_rsq_clamp_f32_e32 v8, v8 ; 7E105908 v_add_f32_e32 v12, -1.0, v13 ; 06181AF3 s_waitcnt vmcnt(0) ; BF8C0770 v_madmk_f32_e32 v2, v3, v2, 0x41800000 ; 40040503 41800000 exp 15, 32, 0, 0, 0, v5, v7, v12, v2 ; F800020F 020C0705 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v2, v8, v14 ; 10041D08 v_mul_f32_e32 v3, v8, v15 ; 10061F08 v_mul_f32_e32 v4, 4.0, v4 ; 100808F6 v_mul_f32_e32 v5, v8, v10 ; 100A1508 exp 15, 33, 0, 0, 0, v9, v1, v21, v4 ; F800021F 04150109 exp 15, 34, 0, 0, 0, v2, v3, v5, v18 ; F800022F 12050302 exp 15, 35, 0, 0, 0, v19, v20, v20, v6 ; F800023F 06141413 exp 15, 12, 0, 1, 0, v0, v11, v16, v17 ; F80008CF 11100B00 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 64 VGPRS: 24 Code Size: 680 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL SVIEW[4], 2D, FLOAT DCL CONST[0] DCL CONST[6..12] DCL TEMP[0..28], LOCAL IMM[0] FLT32 { -0.2000, 7.0000, 0.0100, 0.5000} IMM[1] FLT32 { 64.0000, -64.0000, 4.0000, 0.6931} IMM[2] FLT32 { 0.0039, 0.0020, 1.0000, 2.0000} IMM[3] FLT32 { 3.0000, 0.0000, -1.0000, 0.0000} 0: MOV TEMP[0].x, IN[2].wwww 1: MOV TEMP[0].yz, IN[3].yxyy 2: ABS TEMP[1].xyz, IN[2].xyzz 3: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz 4: RSQ TEMP[2].x, TEMP[2].xxxx 5: MAD TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx, IMM[0].xxxx 6: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[0].yyyy 7: MAX TEMP[1].xyz, TEMP[1].xyzz, IMM[0].zzzz 8: ADD TEMP[2].x, TEMP[1].xxxx, TEMP[1].yyyy 9: ADD TEMP[2].x, TEMP[2].xxxx, TEMP[1].zzzz 10: RCP TEMP[2].xyz, TEMP[2].xxxx 11: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xyzz 12: ADD TEMP[2], IN[0], IMM[0].wwww 13: FLR TEMP[2].xyz, TEMP[2] 14: MOV TEMP[3].x, CONST[6].xxxx 15: MUL TEMP[4].x, TEMP[2].xxxx, CONST[6].xxxx 16: MOV TEMP[5].x, TEMP[4].xxxx 17: FLR TEMP[4].x, TEMP[4].xxxx 18: MUL TEMP[4].x, TEMP[4].xxxx, CONST[6].xxxx 19: FSGE TEMP[6].x, TEMP[2].xxxx, IMM[1].xxxx 20: UIF TEMP[6].xxxx :0 21: MOV TEMP[3].x, CONST[7].xxxx 22: ADD TEMP[6].x, TEMP[2].xxxx, IMM[1].yyyy 23: MUL TEMP[6].x, TEMP[6].xxxx, CONST[7].xxxx 24: MOV TEMP[5].x, TEMP[6].xxxx 25: FLR TEMP[7].x, TEMP[6].xxxx 26: MUL TEMP[7].x, TEMP[7].xxxx, CONST[7].xxxx 27: MOV TEMP[4].x, TEMP[7].xxxx 28: FRC TEMP[6].x, TEMP[6].xxxx 29: FRC TEMP[8].x, TEMP[7].xxxx 30: MOV TEMP[6].y, TEMP[8].xxxx 31: FLR TEMP[7].x, TEMP[7].xxxx 32: ADD TEMP[7].x, TEMP[7].xxxx, IMM[1].zzzz 33: MOV TEMP[6].z, TEMP[7].xxxx 34: MOV TEMP[6].xyz, TEMP[6].xyzx 35: ELSE :0 36: FRC TEMP[5].x, TEMP[5].xxxx 37: FRC TEMP[7].x, TEMP[4].xxxx 38: MOV TEMP[5].y, TEMP[7].xxxx 39: FLR TEMP[4].x, TEMP[4].xxxx 40: MOV TEMP[5].z, TEMP[4].xxxx 41: MOV TEMP[6].xyz, TEMP[5].xyzx 42: ENDIF 43: MOV TEMP[4].x, CONST[6].xxxx 44: MUL TEMP[5].x, TEMP[2].yyyy, CONST[6].xxxx 45: MOV TEMP[7].x, TEMP[5].xxxx 46: FLR TEMP[5].x, TEMP[5].xxxx 47: MUL TEMP[5].x, TEMP[5].xxxx, CONST[6].xxxx 48: FSGE TEMP[8].x, TEMP[2].yyyy, IMM[1].xxxx 49: UIF TEMP[8].xxxx :0 50: MOV TEMP[4].x, CONST[7].xxxx 51: ADD TEMP[8].x, TEMP[2].yyyy, IMM[1].yyyy 52: MUL TEMP[8].x, TEMP[8].xxxx, CONST[7].xxxx 53: MOV TEMP[7].x, TEMP[8].xxxx 54: FLR TEMP[9].x, TEMP[8].xxxx 55: MUL TEMP[9].x, TEMP[9].xxxx, CONST[7].xxxx 56: MOV TEMP[5].x, TEMP[9].xxxx 57: FRC TEMP[8].x, TEMP[8].xxxx 58: FRC TEMP[10].x, TEMP[9].xxxx 59: MOV TEMP[8].y, TEMP[10].xxxx 60: FLR TEMP[9].x, TEMP[9].xxxx 61: ADD TEMP[9].x, TEMP[9].xxxx, IMM[1].zzzz 62: MOV TEMP[8].z, TEMP[9].xxxx 63: MOV TEMP[8].xyz, TEMP[8].xyzx 64: ELSE :0 65: FRC TEMP[7].x, TEMP[7].xxxx 66: FRC TEMP[9].x, TEMP[5].xxxx 67: MOV TEMP[7].y, TEMP[9].xxxx 68: FLR TEMP[5].x, TEMP[5].xxxx 69: MOV TEMP[7].z, TEMP[5].xxxx 70: MOV TEMP[8].xyz, TEMP[7].xyzx 71: ENDIF 72: MOV TEMP[5].x, CONST[6].xxxx 73: MUL TEMP[7].x, TEMP[2].zzzz, CONST[6].xxxx 74: MOV TEMP[9].x, TEMP[7].xxxx 75: FLR TEMP[7].x, TEMP[7].xxxx 76: MUL TEMP[7].x, TEMP[7].xxxx, CONST[6].xxxx 77: FSGE TEMP[10].x, TEMP[2].zzzz, IMM[1].xxxx 78: UIF TEMP[10].xxxx :0 79: MOV TEMP[5].x, CONST[7].xxxx 80: ADD TEMP[2].x, TEMP[2].zzzz, IMM[1].yyyy 81: MUL TEMP[2].x, TEMP[2].xxxx, CONST[7].xxxx 82: MOV TEMP[9].x, TEMP[2].xxxx 83: FLR TEMP[10].x, TEMP[2].xxxx 84: MUL TEMP[10].x, TEMP[10].xxxx, CONST[7].xxxx 85: MOV TEMP[7].x, TEMP[10].xxxx 86: FRC TEMP[2].x, TEMP[2].xxxx 87: FRC TEMP[11].x, TEMP[10].xxxx 88: MOV TEMP[2].y, TEMP[11].xxxx 89: FLR TEMP[10].x, TEMP[10].xxxx 90: ADD TEMP[10].x, TEMP[10].xxxx, IMM[1].zzzz 91: MOV TEMP[2].z, TEMP[10].xxxx 92: MOV TEMP[2].xyz, TEMP[2].xyzx 93: ELSE :0 94: FRC TEMP[9].x, TEMP[9].xxxx 95: FRC TEMP[10].x, TEMP[7].xxxx 96: MOV TEMP[9].y, TEMP[10].xxxx 97: FLR TEMP[7].x, TEMP[7].xxxx 98: MOV TEMP[9].z, TEMP[7].xxxx 99: MOV TEMP[2].xyz, TEMP[9].xyzx 100: ENDIF 101: ADD TEMP[7].xyz, TEMP[0].xyzz, -CONST[0].xyzz 102: DP3 TEMP[7].x, TEMP[7].xyzz, TEMP[7].xyzz 103: MUL TEMP[7].x, CONST[12].xxxx, TEMP[7].xxxx 104: LG2 TEMP[7].x, TEMP[7].xxxx 105: MUL TEMP[7].x, TEMP[7].xxxx, IMM[1].wwww 106: MUL TEMP[7].x, TEMP[7].xxxx, CONST[11].xxxx 107: MOV TEMP[9].xy, IN[3].yxyy 108: MOV TEMP[10].x, IMM[2].xxxx 109: FSNE TEMP[11].x, CONST[6].xxxx, TEMP[3].xxxx 110: UIF TEMP[11].xxxx :0 111: MOV TEMP[10].x, IMM[2].yyyy 112: RCP TEMP[11].x, CONST[9].xxxx 113: MUL TEMP[9].xy, IN[3].yxxx, TEMP[11].xxxx 114: ELSE :0 115: RCP TEMP[11].x, CONST[8].xxxx 116: MUL TEMP[9].xy, TEMP[9].xyyy, TEMP[11].xxxx 117: ENDIF 118: FRC TEMP[9].xy, TEMP[9].xyyy 119: MUL TEMP[11].x, CONST[10].xxxx, IMM[2].wwww 120: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[10].xxxx 121: ADD TEMP[11].x, IMM[2].zzzz, -TEMP[11].xxxx 122: MUL TEMP[10].x, TEMP[10].xxxx, CONST[10].xxxx 123: MAD TEMP[9].xy, TEMP[9].xyyy, TEMP[11].xxxx, TEMP[10].xxxx 124: MAD TEMP[9].xy, TEMP[9].xyyy, TEMP[3].xxxx, TEMP[6].xyyy 125: MOV TEMP[10].xy, TEMP[9].xyyy 126: MOV TEMP[10].w, TEMP[7].xxxx 127: TXL TEMP[10], TEMP[10], SAMP[4], 2D 128: FSEQ TEMP[11].x, TEMP[6].zzzz, IMM[1].zzzz 129: AND TEMP[11].x, TEMP[11].xxxx, IMM[2].zzzz 130: MOV TEMP[12].xy, TEMP[9].xyyy 131: MOV TEMP[12].w, TEMP[7].xxxx 132: TXL TEMP[12], TEMP[12], SAMP[3], 2D 133: FSEQ TEMP[13].x, TEMP[6].zzzz, IMM[3].xxxx 134: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz 135: MOV TEMP[14].xy, TEMP[9].xyyy 136: MOV TEMP[14].w, TEMP[7].xxxx 137: TXL TEMP[14], TEMP[14], SAMP[2], 2D 138: FSEQ TEMP[15].x, TEMP[6].zzzz, IMM[2].wwww 139: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz 140: MOV TEMP[16].xy, TEMP[9].xyyy 141: MOV TEMP[16].w, TEMP[7].xxxx 142: TXL TEMP[16], TEMP[16], SAMP[1], 2D 143: FSEQ TEMP[17].x, TEMP[6].zzzz, IMM[2].zzzz 144: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz 145: MOV TEMP[9].xy, TEMP[9].xyyy 146: MOV TEMP[9].w, TEMP[7].xxxx 147: TXL TEMP[9], TEMP[9], SAMP[0], 2D 148: FSEQ TEMP[18].x, TEMP[6].zzzz, IMM[3].yyyy 149: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz 150: MUL TEMP[9], TEMP[9], TEMP[18].xxxx 151: MAD TEMP[9], TEMP[16], TEMP[17].xxxx, TEMP[9] 152: MAD TEMP[9], TEMP[14], TEMP[15].xxxx, TEMP[9] 153: MAD TEMP[9], TEMP[12], TEMP[13].xxxx, TEMP[9] 154: MAD TEMP[9].yw, TEMP[10], TEMP[11].xxxx, TEMP[9] 155: MAD TEMP[9].xy, TEMP[9].wyyy, IMM[2].wwww, IMM[3].zzzz 156: DP2 TEMP[10].x, TEMP[9].xyyy, TEMP[9].xyyy 157: MOV_SAT TEMP[19].x, TEMP[10].xxxx 158: MOV TEMP[10].xy, TEMP[0].zxzz 159: MOV TEMP[11].x, IMM[2].xxxx 160: FSNE TEMP[12].x, CONST[6].xxxx, TEMP[3].xxxx 161: UIF TEMP[12].xxxx :0 162: MOV TEMP[11].x, IMM[2].yyyy 163: RCP TEMP[12].x, CONST[9].xxxx 164: MUL TEMP[10].xy, TEMP[0].zxxx, TEMP[12].xxxx 165: ELSE :0 166: RCP TEMP[12].x, CONST[8].xxxx 167: MUL TEMP[10].xy, TEMP[10].xyyy, TEMP[12].xxxx 168: ENDIF 169: FRC TEMP[10].xy, TEMP[10].xyyy 170: MUL TEMP[12].x, CONST[10].xxxx, IMM[2].wwww 171: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[11].xxxx 172: ADD TEMP[12].x, IMM[2].zzzz, -TEMP[12].xxxx 173: MUL TEMP[11].x, TEMP[11].xxxx, CONST[10].xxxx 174: MAD TEMP[10].xy, TEMP[10].xyyy, TEMP[12].xxxx, TEMP[11].xxxx 175: MAD TEMP[10].xy, TEMP[10].xyyy, TEMP[3].xxxx, TEMP[6].xyyy 176: MOV TEMP[11].xy, TEMP[10].xyyy 177: MOV TEMP[11].w, TEMP[7].xxxx 178: TXL TEMP[11], TEMP[11], SAMP[4], 2D 179: FSEQ TEMP[12].x, TEMP[6].zzzz, IMM[1].zzzz 180: AND TEMP[12].x, TEMP[12].xxxx, IMM[2].zzzz 181: MOV TEMP[13].xy, TEMP[10].xyyy 182: MOV TEMP[13].w, TEMP[7].xxxx 183: TXL TEMP[13], TEMP[13], SAMP[3], 2D 184: FSEQ TEMP[14].x, TEMP[6].zzzz, IMM[3].xxxx 185: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz 186: MOV TEMP[15].xy, TEMP[10].xyyy 187: MOV TEMP[15].w, TEMP[7].xxxx 188: TXL TEMP[15], TEMP[15], SAMP[2], 2D 189: FSEQ TEMP[16].x, TEMP[6].zzzz, IMM[2].wwww 190: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz 191: MOV TEMP[17].xy, TEMP[10].xyyy 192: MOV TEMP[17].w, TEMP[7].xxxx 193: TXL TEMP[17], TEMP[17], SAMP[1], 2D 194: FSEQ TEMP[18].x, TEMP[6].zzzz, IMM[2].zzzz 195: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz 196: MOV TEMP[10].xy, TEMP[10].xyyy 197: MOV TEMP[10].w, TEMP[7].xxxx 198: TXL TEMP[10], TEMP[10], SAMP[0], 2D 199: FSEQ TEMP[20].x, TEMP[6].zzzz, IMM[3].yyyy 200: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz 201: MUL TEMP[10], TEMP[10], TEMP[20].xxxx 202: MAD TEMP[10], TEMP[17], TEMP[18].xxxx, TEMP[10] 203: MAD TEMP[10], TEMP[15], TEMP[16].xxxx, TEMP[10] 204: MAD TEMP[10], TEMP[13], TEMP[14].xxxx, TEMP[10] 205: MAD TEMP[10].yw, TEMP[11], TEMP[12].xxxx, TEMP[10] 206: MAD TEMP[10].xy, TEMP[10].wyyy, IMM[2].wwww, IMM[3].zzzz 207: DP2 TEMP[11].x, TEMP[10].xyyy, TEMP[10].xyyy 208: MOV_SAT TEMP[21].x, TEMP[11].xxxx 209: MOV TEMP[11].xy, TEMP[0].xyxx 210: MOV TEMP[12].x, IMM[2].xxxx 211: FSNE TEMP[13].x, CONST[6].xxxx, TEMP[3].xxxx 212: UIF TEMP[13].xxxx :0 213: MOV TEMP[12].x, IMM[2].yyyy 214: RCP TEMP[13].x, CONST[9].xxxx 215: MUL TEMP[11].xy, TEMP[0].xyyy, TEMP[13].xxxx 216: ELSE :0 217: RCP TEMP[13].x, CONST[8].xxxx 218: MUL TEMP[11].xy, TEMP[11].xyyy, TEMP[13].xxxx 219: ENDIF 220: FRC TEMP[11].xy, TEMP[11].xyyy 221: MUL TEMP[13].x, CONST[10].xxxx, IMM[2].wwww 222: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[12].xxxx 223: ADD TEMP[13].x, IMM[2].zzzz, -TEMP[13].xxxx 224: MUL TEMP[12].x, TEMP[12].xxxx, CONST[10].xxxx 225: MAD TEMP[11].xy, TEMP[11].xyyy, TEMP[13].xxxx, TEMP[12].xxxx 226: MAD TEMP[3].xy, TEMP[11].xyyy, TEMP[3].xxxx, TEMP[6].xyyy 227: MOV TEMP[11].xy, TEMP[3].xyyy 228: MOV TEMP[11].w, TEMP[7].xxxx 229: TXL TEMP[11], TEMP[11], SAMP[4], 2D 230: FSEQ TEMP[12].x, TEMP[6].zzzz, IMM[1].zzzz 231: AND TEMP[12].x, TEMP[12].xxxx, IMM[2].zzzz 232: MOV TEMP[13].xy, TEMP[3].xyyy 233: MOV TEMP[13].w, TEMP[7].xxxx 234: TXL TEMP[13], TEMP[13], SAMP[3], 2D 235: FSEQ TEMP[14].x, TEMP[6].zzzz, IMM[3].xxxx 236: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz 237: MOV TEMP[15].xy, TEMP[3].xyyy 238: MOV TEMP[15].w, TEMP[7].xxxx 239: TXL TEMP[15], TEMP[15], SAMP[2], 2D 240: FSEQ TEMP[16].x, TEMP[6].zzzz, IMM[2].wwww 241: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz 242: MOV TEMP[17].xy, TEMP[3].xyyy 243: MOV TEMP[17].w, TEMP[7].xxxx 244: TXL TEMP[17], TEMP[17], SAMP[1], 2D 245: FSEQ TEMP[18].x, TEMP[6].zzzz, IMM[2].zzzz 246: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz 247: MOV TEMP[3].xy, TEMP[3].xyyy 248: MOV TEMP[3].w, TEMP[7].xxxx 249: TXL TEMP[3], TEMP[3], SAMP[0], 2D 250: FSEQ TEMP[6].x, TEMP[6].zzzz, IMM[3].yyyy 251: AND TEMP[6].x, TEMP[6].xxxx, IMM[2].zzzz 252: MUL TEMP[3], TEMP[3], TEMP[6].xxxx 253: MAD TEMP[3], TEMP[17], TEMP[18].xxxx, TEMP[3] 254: MAD TEMP[3], TEMP[15], TEMP[16].xxxx, TEMP[3] 255: MAD TEMP[3], TEMP[13], TEMP[14].xxxx, TEMP[3] 256: MAD TEMP[3].yw, TEMP[11], TEMP[12].xxxx, TEMP[3] 257: MAD TEMP[3].xy, TEMP[3].wyyy, IMM[2].wwww, IMM[3].zzzz 258: DP2 TEMP[6].x, TEMP[3].xyyy, TEMP[3].xyyy 259: MOV_SAT TEMP[22].x, TEMP[6].xxxx 260: MOV TEMP[6].x, IMM[3].yyyy 261: MOV TEMP[6].y, TEMP[9].xxxx 262: MOV TEMP[6].z, TEMP[9].yyyy 263: MOV TEMP[9].y, IMM[3].yyyy 264: MOV TEMP[9].x, TEMP[10].yyyy 265: MOV TEMP[9].z, TEMP[10].xxxx 266: MOV TEMP[10].z, IMM[3].yyyy 267: MOV TEMP[10].xy, TEMP[3].xyxx 268: MUL TEMP[3].xyz, TEMP[6].xyzz, TEMP[1].xxxx 269: MAD TEMP[3].xyz, TEMP[9].xyzz, TEMP[1].yyyy, TEMP[3].xyzz 270: MAD TEMP[3].xyz, TEMP[10].xyzz, TEMP[1].zzzz, TEMP[3].xyzz 271: MOV TEMP[6].xy, IN[3].yxyy 272: MOV TEMP[9].x, IMM[2].xxxx 273: FSNE TEMP[10].x, CONST[6].xxxx, TEMP[4].xxxx 274: UIF TEMP[10].xxxx :0 275: MOV TEMP[9].x, IMM[2].yyyy 276: RCP TEMP[10].x, CONST[9].xxxx 277: MUL TEMP[6].xy, IN[3].yxxx, TEMP[10].xxxx 278: ELSE :0 279: RCP TEMP[10].x, CONST[8].xxxx 280: MUL TEMP[6].xy, TEMP[6].xyyy, TEMP[10].xxxx 281: ENDIF 282: FRC TEMP[6].xy, TEMP[6].xyyy 283: MUL TEMP[10].x, CONST[10].xxxx, IMM[2].wwww 284: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[9].xxxx 285: ADD TEMP[10].x, IMM[2].zzzz, -TEMP[10].xxxx 286: MUL TEMP[9].x, TEMP[9].xxxx, CONST[10].xxxx 287: MAD TEMP[6].xy, TEMP[6].xyyy, TEMP[10].xxxx, TEMP[9].xxxx 288: MAD TEMP[6].xy, TEMP[6].xyyy, TEMP[4].xxxx, TEMP[8].xyyy 289: MOV TEMP[9].xy, TEMP[6].xyyy 290: MOV TEMP[9].w, TEMP[7].xxxx 291: TXL TEMP[9], TEMP[9], SAMP[4], 2D 292: FSEQ TEMP[10].x, TEMP[8].zzzz, IMM[1].zzzz 293: AND TEMP[10].x, TEMP[10].xxxx, IMM[2].zzzz 294: MOV TEMP[11].xy, TEMP[6].xyyy 295: MOV TEMP[11].w, TEMP[7].xxxx 296: TXL TEMP[11], TEMP[11], SAMP[3], 2D 297: FSEQ TEMP[12].x, TEMP[8].zzzz, IMM[3].xxxx 298: AND TEMP[12].x, TEMP[12].xxxx, IMM[2].zzzz 299: MOV TEMP[13].xy, TEMP[6].xyyy 300: MOV TEMP[13].w, TEMP[7].xxxx 301: TXL TEMP[13], TEMP[13], SAMP[2], 2D 302: FSEQ TEMP[14].x, TEMP[8].zzzz, IMM[2].wwww 303: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz 304: MOV TEMP[15].xy, TEMP[6].xyyy 305: MOV TEMP[15].w, TEMP[7].xxxx 306: TXL TEMP[15], TEMP[15], SAMP[1], 2D 307: FSEQ TEMP[16].x, TEMP[8].zzzz, IMM[2].zzzz 308: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz 309: MOV TEMP[6].xy, TEMP[6].xyyy 310: MOV TEMP[6].w, TEMP[7].xxxx 311: TXL TEMP[6], TEMP[6], SAMP[0], 2D 312: FSEQ TEMP[17].x, TEMP[8].zzzz, IMM[3].yyyy 313: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz 314: MUL TEMP[6], TEMP[6], TEMP[17].xxxx 315: MAD TEMP[6], TEMP[15], TEMP[16].xxxx, TEMP[6] 316: MAD TEMP[6], TEMP[13], TEMP[14].xxxx, TEMP[6] 317: MAD TEMP[6], TEMP[11], TEMP[12].xxxx, TEMP[6] 318: MAD TEMP[6].yw, TEMP[9], TEMP[10].xxxx, TEMP[6] 319: MAD TEMP[6].xy, TEMP[6].wyyy, IMM[2].wwww, IMM[3].zzzz 320: DP2 TEMP[9].x, TEMP[6].xyyy, TEMP[6].xyyy 321: MOV_SAT TEMP[23].x, TEMP[9].xxxx 322: MOV TEMP[9].xy, TEMP[0].zxzz 323: MOV TEMP[10].x, IMM[2].xxxx 324: FSNE TEMP[11].x, CONST[6].xxxx, TEMP[4].xxxx 325: UIF TEMP[11].xxxx :0 326: MOV TEMP[10].x, IMM[2].yyyy 327: RCP TEMP[11].x, CONST[9].xxxx 328: MUL TEMP[9].xy, TEMP[0].zxxx, TEMP[11].xxxx 329: ELSE :0 330: RCP TEMP[11].x, CONST[8].xxxx 331: MUL TEMP[9].xy, TEMP[9].xyyy, TEMP[11].xxxx 332: ENDIF 333: FRC TEMP[9].xy, TEMP[9].xyyy 334: MUL TEMP[11].x, CONST[10].xxxx, IMM[2].wwww 335: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[10].xxxx 336: ADD TEMP[11].x, IMM[2].zzzz, -TEMP[11].xxxx 337: MUL TEMP[10].x, TEMP[10].xxxx, CONST[10].xxxx 338: MAD TEMP[9].xy, TEMP[9].xyyy, TEMP[11].xxxx, TEMP[10].xxxx 339: MAD TEMP[9].xy, TEMP[9].xyyy, TEMP[4].xxxx, TEMP[8].xyyy 340: MOV TEMP[10].xy, TEMP[9].xyyy 341: MOV TEMP[10].w, TEMP[7].xxxx 342: TXL TEMP[10], TEMP[10], SAMP[4], 2D 343: FSEQ TEMP[11].x, TEMP[8].zzzz, IMM[1].zzzz 344: AND TEMP[11].x, TEMP[11].xxxx, IMM[2].zzzz 345: MOV TEMP[12].xy, TEMP[9].xyyy 346: MOV TEMP[12].w, TEMP[7].xxxx 347: TXL TEMP[12], TEMP[12], SAMP[3], 2D 348: FSEQ TEMP[13].x, TEMP[8].zzzz, IMM[3].xxxx 349: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz 350: MOV TEMP[14].xy, TEMP[9].xyyy 351: MOV TEMP[14].w, TEMP[7].xxxx 352: TXL TEMP[14], TEMP[14], SAMP[2], 2D 353: FSEQ TEMP[15].x, TEMP[8].zzzz, IMM[2].wwww 354: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz 355: MOV TEMP[16].xy, TEMP[9].xyyy 356: MOV TEMP[16].w, TEMP[7].xxxx 357: TXL TEMP[16], TEMP[16], SAMP[1], 2D 358: FSEQ TEMP[17].x, TEMP[8].zzzz, IMM[2].zzzz 359: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz 360: MOV TEMP[9].xy, TEMP[9].xyyy 361: MOV TEMP[9].w, TEMP[7].xxxx 362: TXL TEMP[9], TEMP[9], SAMP[0], 2D 363: FSEQ TEMP[18].x, TEMP[8].zzzz, IMM[3].yyyy 364: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz 365: MUL TEMP[9], TEMP[9], TEMP[18].xxxx 366: MAD TEMP[9], TEMP[16], TEMP[17].xxxx, TEMP[9] 367: MAD TEMP[9], TEMP[14], TEMP[15].xxxx, TEMP[9] 368: MAD TEMP[9], TEMP[12], TEMP[13].xxxx, TEMP[9] 369: MAD TEMP[9].yw, TEMP[10], TEMP[11].xxxx, TEMP[9] 370: MAD TEMP[9].xy, TEMP[9].wyyy, IMM[2].wwww, IMM[3].zzzz 371: DP2 TEMP[10].x, TEMP[9].xyyy, TEMP[9].xyyy 372: MOV_SAT TEMP[24].x, TEMP[10].xxxx 373: MOV TEMP[10].xy, TEMP[0].xyxx 374: MOV TEMP[11].x, IMM[2].xxxx 375: FSNE TEMP[12].x, CONST[6].xxxx, TEMP[4].xxxx 376: UIF TEMP[12].xxxx :0 377: MOV TEMP[11].x, IMM[2].yyyy 378: RCP TEMP[12].x, CONST[9].xxxx 379: MUL TEMP[10].xy, TEMP[0].xyyy, TEMP[12].xxxx 380: ELSE :0 381: RCP TEMP[12].x, CONST[8].xxxx 382: MUL TEMP[10].xy, TEMP[10].xyyy, TEMP[12].xxxx 383: ENDIF 384: FRC TEMP[10].xy, TEMP[10].xyyy 385: MUL TEMP[12].x, CONST[10].xxxx, IMM[2].wwww 386: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[11].xxxx 387: ADD TEMP[12].x, IMM[2].zzzz, -TEMP[12].xxxx 388: MUL TEMP[11].x, TEMP[11].xxxx, CONST[10].xxxx 389: MAD TEMP[10].xy, TEMP[10].xyyy, TEMP[12].xxxx, TEMP[11].xxxx 390: MAD TEMP[4].xy, TEMP[10].xyyy, TEMP[4].xxxx, TEMP[8].xyyy 391: MOV TEMP[10].xy, TEMP[4].xyyy 392: MOV TEMP[10].w, TEMP[7].xxxx 393: TXL TEMP[10], TEMP[10], SAMP[4], 2D 394: FSEQ TEMP[11].x, TEMP[8].zzzz, IMM[1].zzzz 395: AND TEMP[11].x, TEMP[11].xxxx, IMM[2].zzzz 396: MOV TEMP[12].xy, TEMP[4].xyyy 397: MOV TEMP[12].w, TEMP[7].xxxx 398: TXL TEMP[12], TEMP[12], SAMP[3], 2D 399: FSEQ TEMP[13].x, TEMP[8].zzzz, IMM[3].xxxx 400: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz 401: MOV TEMP[14].xy, TEMP[4].xyyy 402: MOV TEMP[14].w, TEMP[7].xxxx 403: TXL TEMP[14], TEMP[14], SAMP[2], 2D 404: FSEQ TEMP[15].x, TEMP[8].zzzz, IMM[2].wwww 405: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz 406: MOV TEMP[16].xy, TEMP[4].xyyy 407: MOV TEMP[16].w, TEMP[7].xxxx 408: TXL TEMP[16], TEMP[16], SAMP[1], 2D 409: FSEQ TEMP[17].x, TEMP[8].zzzz, IMM[2].zzzz 410: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz 411: MOV TEMP[4].xy, TEMP[4].xyyy 412: MOV TEMP[4].w, TEMP[7].xxxx 413: TXL TEMP[4], TEMP[4], SAMP[0], 2D 414: FSEQ TEMP[8].x, TEMP[8].zzzz, IMM[3].yyyy 415: AND TEMP[8].x, TEMP[8].xxxx, IMM[2].zzzz 416: MUL TEMP[4], TEMP[4], TEMP[8].xxxx 417: MAD TEMP[4], TEMP[16], TEMP[17].xxxx, TEMP[4] 418: MAD TEMP[4], TEMP[14], TEMP[15].xxxx, TEMP[4] 419: MAD TEMP[4], TEMP[12], TEMP[13].xxxx, TEMP[4] 420: MAD TEMP[4].yw, TEMP[10], TEMP[11].xxxx, TEMP[4] 421: MAD TEMP[4].xy, TEMP[4].wyyy, IMM[2].wwww, IMM[3].zzzz 422: DP2 TEMP[8].x, TEMP[4].xyyy, TEMP[4].xyyy 423: MOV_SAT TEMP[25].x, TEMP[8].xxxx 424: MOV TEMP[8].x, IMM[3].yyyy 425: MOV TEMP[8].y, TEMP[6].xxxx 426: MOV TEMP[8].z, TEMP[6].yyyy 427: MOV TEMP[6].y, IMM[3].yyyy 428: MOV TEMP[6].x, TEMP[9].yyyy 429: MOV TEMP[6].z, TEMP[9].xxxx 430: MOV TEMP[9].z, IMM[3].yyyy 431: MOV TEMP[9].xy, TEMP[4].xyxx 432: MUL TEMP[4].xyz, TEMP[8].xyzz, TEMP[1].xxxx 433: MAD TEMP[4].xyz, TEMP[6].xyzz, TEMP[1].yyyy, TEMP[4].xyzz 434: MAD TEMP[4].xyz, TEMP[9].xyzz, TEMP[1].zzzz, TEMP[4].xyzz 435: MOV TEMP[6].xy, IN[3].yxyy 436: MOV TEMP[8].x, IMM[2].xxxx 437: FSNE TEMP[9].x, CONST[6].xxxx, TEMP[5].xxxx 438: UIF TEMP[9].xxxx :0 439: MOV TEMP[8].x, IMM[2].yyyy 440: RCP TEMP[9].x, CONST[9].xxxx 441: MUL TEMP[6].xy, IN[3].yxxx, TEMP[9].xxxx 442: ELSE :0 443: RCP TEMP[9].x, CONST[8].xxxx 444: MUL TEMP[6].xy, TEMP[6].xyyy, TEMP[9].xxxx 445: ENDIF 446: FRC TEMP[6].xy, TEMP[6].xyyy 447: MUL TEMP[9].x, CONST[10].xxxx, IMM[2].wwww 448: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[8].xxxx 449: ADD TEMP[9].x, IMM[2].zzzz, -TEMP[9].xxxx 450: MUL TEMP[8].x, TEMP[8].xxxx, CONST[10].xxxx 451: MAD TEMP[6].xy, TEMP[6].xyyy, TEMP[9].xxxx, TEMP[8].xxxx 452: MAD TEMP[6].xy, TEMP[6].xyyy, TEMP[5].xxxx, TEMP[2].xyyy 453: MOV TEMP[8].xy, TEMP[6].xyyy 454: MOV TEMP[8].w, TEMP[7].xxxx 455: TXL TEMP[8], TEMP[8], SAMP[4], 2D 456: FSEQ TEMP[9].x, TEMP[2].zzzz, IMM[1].zzzz 457: AND TEMP[9].x, TEMP[9].xxxx, IMM[2].zzzz 458: MOV TEMP[10].xy, TEMP[6].xyyy 459: MOV TEMP[10].w, TEMP[7].xxxx 460: TXL TEMP[10], TEMP[10], SAMP[3], 2D 461: FSEQ TEMP[11].x, TEMP[2].zzzz, IMM[3].xxxx 462: AND TEMP[11].x, TEMP[11].xxxx, IMM[2].zzzz 463: MOV TEMP[12].xy, TEMP[6].xyyy 464: MOV TEMP[12].w, TEMP[7].xxxx 465: TXL TEMP[12], TEMP[12], SAMP[2], 2D 466: FSEQ TEMP[13].x, TEMP[2].zzzz, IMM[2].wwww 467: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz 468: MOV TEMP[14].xy, TEMP[6].xyyy 469: MOV TEMP[14].w, TEMP[7].xxxx 470: TXL TEMP[14], TEMP[14], SAMP[1], 2D 471: FSEQ TEMP[15].x, TEMP[2].zzzz, IMM[2].zzzz 472: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz 473: MOV TEMP[6].xy, TEMP[6].xyyy 474: MOV TEMP[6].w, TEMP[7].xxxx 475: TXL TEMP[6], TEMP[6], SAMP[0], 2D 476: FSEQ TEMP[16].x, TEMP[2].zzzz, IMM[3].yyyy 477: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz 478: MUL TEMP[6], TEMP[6], TEMP[16].xxxx 479: MAD TEMP[6], TEMP[14], TEMP[15].xxxx, TEMP[6] 480: MAD TEMP[6], TEMP[12], TEMP[13].xxxx, TEMP[6] 481: MAD TEMP[6], TEMP[10], TEMP[11].xxxx, TEMP[6] 482: MAD TEMP[6].yw, TEMP[8], TEMP[9].xxxx, TEMP[6] 483: MAD TEMP[6].xy, TEMP[6].wyyy, IMM[2].wwww, IMM[3].zzzz 484: DP2 TEMP[8].x, TEMP[6].xyyy, TEMP[6].xyyy 485: MOV_SAT TEMP[26].x, TEMP[8].xxxx 486: MOV TEMP[8].xy, TEMP[0].zxzz 487: MOV TEMP[9].x, IMM[2].xxxx 488: FSNE TEMP[10].x, CONST[6].xxxx, TEMP[5].xxxx 489: UIF TEMP[10].xxxx :0 490: MOV TEMP[9].x, IMM[2].yyyy 491: RCP TEMP[10].x, CONST[9].xxxx 492: MUL TEMP[8].xy, TEMP[0].zxxx, TEMP[10].xxxx 493: ELSE :0 494: RCP TEMP[10].x, CONST[8].xxxx 495: MUL TEMP[8].xy, TEMP[8].xyyy, TEMP[10].xxxx 496: ENDIF 497: FRC TEMP[8].xy, TEMP[8].xyyy 498: MUL TEMP[10].x, CONST[10].xxxx, IMM[2].wwww 499: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[9].xxxx 500: ADD TEMP[10].x, IMM[2].zzzz, -TEMP[10].xxxx 501: MUL TEMP[9].x, TEMP[9].xxxx, CONST[10].xxxx 502: MAD TEMP[8].xy, TEMP[8].xyyy, TEMP[10].xxxx, TEMP[9].xxxx 503: MAD TEMP[8].xy, TEMP[8].xyyy, TEMP[5].xxxx, TEMP[2].xyyy 504: MOV TEMP[9].xy, TEMP[8].xyyy 505: MOV TEMP[9].w, TEMP[7].xxxx 506: TXL TEMP[9], TEMP[9], SAMP[4], 2D 507: FSEQ TEMP[10].x, TEMP[2].zzzz, IMM[1].zzzz 508: AND TEMP[10].x, TEMP[10].xxxx, IMM[2].zzzz 509: MOV TEMP[11].xy, TEMP[8].xyyy 510: MOV TEMP[11].w, TEMP[7].xxxx 511: TXL TEMP[11], TEMP[11], SAMP[3], 2D 512: FSEQ TEMP[12].x, TEMP[2].zzzz, IMM[3].xxxx 513: AND TEMP[12].x, TEMP[12].xxxx, IMM[2].zzzz 514: MOV TEMP[13].xy, TEMP[8].xyyy 515: MOV TEMP[13].w, TEMP[7].xxxx 516: TXL TEMP[13], TEMP[13], SAMP[2], 2D 517: FSEQ TEMP[14].x, TEMP[2].zzzz, IMM[2].wwww 518: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz 519: MOV TEMP[15].xy, TEMP[8].xyyy 520: MOV TEMP[15].w, TEMP[7].xxxx 521: TXL TEMP[15], TEMP[15], SAMP[1], 2D 522: FSEQ TEMP[16].x, TEMP[2].zzzz, IMM[2].zzzz 523: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz 524: MOV TEMP[8].xy, TEMP[8].xyyy 525: MOV TEMP[8].w, TEMP[7].xxxx 526: TXL TEMP[8], TEMP[8], SAMP[0], 2D 527: FSEQ TEMP[17].x, TEMP[2].zzzz, IMM[3].yyyy 528: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz 529: MUL TEMP[8], TEMP[8], TEMP[17].xxxx 530: MAD TEMP[8], TEMP[15], TEMP[16].xxxx, TEMP[8] 531: MAD TEMP[8], TEMP[13], TEMP[14].xxxx, TEMP[8] 532: MAD TEMP[8], TEMP[11], TEMP[12].xxxx, TEMP[8] 533: MAD TEMP[8].yw, TEMP[9], TEMP[10].xxxx, TEMP[8] 534: MAD TEMP[8].xy, TEMP[8].wyyy, IMM[2].wwww, IMM[3].zzzz 535: DP2 TEMP[9].x, TEMP[8].xyyy, TEMP[8].xyyy 536: MOV_SAT TEMP[27].x, TEMP[9].xxxx 537: MOV TEMP[9].xy, TEMP[0].xyxx 538: MOV TEMP[10].x, IMM[2].xxxx 539: FSNE TEMP[11].x, CONST[6].xxxx, TEMP[5].xxxx 540: UIF TEMP[11].xxxx :0 541: MOV TEMP[10].x, IMM[2].yyyy 542: RCP TEMP[11].x, CONST[9].xxxx 543: MUL TEMP[9].xy, TEMP[0].xyyy, TEMP[11].xxxx 544: ELSE :0 545: RCP TEMP[0].x, CONST[8].xxxx 546: MUL TEMP[9].xy, TEMP[9].xyyy, TEMP[0].xxxx 547: ENDIF 548: FRC TEMP[0].xy, TEMP[9].xyyy 549: MUL TEMP[9].x, CONST[10].xxxx, IMM[2].wwww 550: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[10].xxxx 551: ADD TEMP[9].x, IMM[2].zzzz, -TEMP[9].xxxx 552: MUL TEMP[10].x, TEMP[10].xxxx, CONST[10].xxxx 553: MAD TEMP[0].xy, TEMP[0].xyyy, TEMP[9].xxxx, TEMP[10].xxxx 554: MAD TEMP[0].xy, TEMP[0].xyyy, TEMP[5].xxxx, TEMP[2].xyyy 555: MOV TEMP[5].xy, TEMP[0].xyyy 556: MOV TEMP[5].w, TEMP[7].xxxx 557: TXL TEMP[5], TEMP[5], SAMP[4], 2D 558: FSEQ TEMP[9].x, TEMP[2].zzzz, IMM[1].zzzz 559: AND TEMP[9].x, TEMP[9].xxxx, IMM[2].zzzz 560: MOV TEMP[10].xy, TEMP[0].xyyy 561: MOV TEMP[10].w, TEMP[7].xxxx 562: TXL TEMP[10], TEMP[10], SAMP[3], 2D 563: FSEQ TEMP[11].x, TEMP[2].zzzz, IMM[3].xxxx 564: AND TEMP[11].x, TEMP[11].xxxx, IMM[2].zzzz 565: MOV TEMP[12].xy, TEMP[0].xyyy 566: MOV TEMP[12].w, TEMP[7].xxxx 567: TXL TEMP[12], TEMP[12], SAMP[2], 2D 568: FSEQ TEMP[13].x, TEMP[2].zzzz, IMM[2].wwww 569: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz 570: MOV TEMP[14].xy, TEMP[0].xyyy 571: MOV TEMP[14].w, TEMP[7].xxxx 572: TXL TEMP[14], TEMP[14], SAMP[1], 2D 573: FSEQ TEMP[15].x, TEMP[2].zzzz, IMM[2].zzzz 574: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz 575: MOV TEMP[0].xy, TEMP[0].xyyy 576: MOV TEMP[0].w, TEMP[7].xxxx 577: TXL TEMP[0], TEMP[0], SAMP[0], 2D 578: FSEQ TEMP[2].x, TEMP[2].zzzz, IMM[3].yyyy 579: AND TEMP[2].x, TEMP[2].xxxx, IMM[2].zzzz 580: MUL TEMP[0], TEMP[0], TEMP[2].xxxx 581: MAD TEMP[0], TEMP[14], TEMP[15].xxxx, TEMP[0] 582: MAD TEMP[0], TEMP[12], TEMP[13].xxxx, TEMP[0] 583: MAD TEMP[0], TEMP[10], TEMP[11].xxxx, TEMP[0] 584: MAD TEMP[0].yw, TEMP[5], TEMP[9].xxxx, TEMP[0] 585: MAD TEMP[0].xy, TEMP[0].wyyy, IMM[2].wwww, IMM[3].zzzz 586: DP2 TEMP[2].x, TEMP[0].xyyy, TEMP[0].xyyy 587: MOV_SAT TEMP[28].x, TEMP[2].xxxx 588: MOV TEMP[2].x, IMM[3].yyyy 589: MOV TEMP[2].y, TEMP[6].xxxx 590: MOV TEMP[2].z, TEMP[6].yyyy 591: MOV TEMP[5].y, IMM[3].yyyy 592: MOV TEMP[5].x, TEMP[8].yyyy 593: MOV TEMP[5].z, TEMP[8].xxxx 594: MOV TEMP[6].z, IMM[3].yyyy 595: MOV TEMP[6].xy, TEMP[0].xyxx 596: MOV TEMP[0].w, IMM[2].zzzz 597: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[1].xxxx 598: MAD TEMP[2].xyz, TEMP[5].xyzz, TEMP[1].yyyy, TEMP[2].xyzz 599: MAD TEMP[1].xyz, TEMP[6].xyzz, TEMP[1].zzzz, TEMP[2].xyzz 600: MUL TEMP[2].xyz, IN[1].xxxx, TEMP[3].xyzz 601: MAD TEMP[2].xyz, IN[1].yyyy, TEMP[4].xyzz, TEMP[2].xyzz 602: MAD TEMP[0].xyz, IN[1].zzzz, TEMP[1].xyzz, TEMP[2].xyzz 603: DP4 TEMP[1].x, TEMP[0], TEMP[0] 604: RSQ TEMP[1].x, TEMP[1].xxxx 605: MUL TEMP[0].xyz, TEMP[0], TEMP[1].xxxx 606: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[0].wwww 607: ADD TEMP[0].xyz, IN[2].xyzz, -TEMP[0].xyzz 608: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[0].xyzz 609: RSQ TEMP[1].x, TEMP[1].xxxx 610: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xxxx 611: MAD TEMP[0].xyz, TEMP[0].xyzz, IMM[0].wwww, IMM[0].wwww 612: MOV TEMP[0].w, IN[1].wwww 613: MOV OUT[0], TEMP[0] 614: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %34 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %35 = load <8 x i32>, <8 x i32> addrspace(2)* %34, align 32, !tbaa !0 %36 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %37 = load <4 x i32>, <4 x i32> addrspace(2)* %36, align 16, !tbaa !0 %38 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %39 = load <8 x i32>, <8 x i32> addrspace(2)* %38, align 32, !tbaa !0 %40 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %41 = load <4 x i32>, <4 x i32> addrspace(2)* %40, align 16, !tbaa !0 %42 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %43 = load <8 x i32>, <8 x i32> addrspace(2)* %42, align 32, !tbaa !0 %44 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %45 = load <4 x i32>, <4 x i32> addrspace(2)* %44, align 16, !tbaa !0 %46 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %47 = load <8 x i32>, <8 x i32> addrspace(2)* %46, align 32, !tbaa !0 %48 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %49 = load <4 x i32>, <4 x i32> addrspace(2)* %48, align 16, !tbaa !0 %50 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %51 = load <8 x i32>, <8 x i32> addrspace(2)* %50, align 32, !tbaa !0 %52 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %53 = load <4 x i32>, <4 x i32> addrspace(2)* %52, align 16, !tbaa !0 %54 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %55 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %56 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %57 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %58 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %59 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %60 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %61 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %62 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %63 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %64 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %65 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %66 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %67 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %68 = call float @llvm.fabs.f32(float %62) %69 = call float @llvm.fabs.f32(float %63) %70 = call float @llvm.fabs.f32(float %64) %71 = fmul float %68, %68 %72 = fmul float %69, %69 %73 = fadd float %72, %71 %74 = fmul float %70, %70 %75 = fadd float %73, %74 %76 = call float @llvm.AMDGPU.rsq.clamped.f32(float %75) %77 = fmul float %68, %76 %78 = fadd float %77, 0xBFC99999A0000000 %79 = fmul float %69, %76 %80 = fadd float %79, 0xBFC99999A0000000 %81 = fmul float %70, %76 %82 = fadd float %81, 0xBFC99999A0000000 %83 = fmul float %78, 7.000000e+00 %84 = fmul float %80, 7.000000e+00 %85 = fmul float %82, 7.000000e+00 %86 = call float @llvm.maxnum.f32(float %83, float 0x3F847AE140000000) %87 = call float @llvm.maxnum.f32(float %84, float 0x3F847AE140000000) %88 = call float @llvm.maxnum.f32(float %85, float 0x3F847AE140000000) %89 = fadd float %86, %87 %90 = fadd float %89, %88 %91 = fdiv float 1.000000e+00, %90 %92 = fmul float %86, %91 %93 = fmul float %87, %91 %94 = fmul float %88, %91 %95 = fadd float %54, 5.000000e-01 %96 = fadd float %55, 5.000000e-01 %97 = fadd float %56, 5.000000e-01 %98 = call float @llvm.floor.f32(float %95) %99 = call float @llvm.floor.f32(float %96) %100 = call float @llvm.floor.f32(float %97) %101 = fmul float %98, %27 %102 = call float @llvm.floor.f32(float %101) %103 = fmul float %102, %27 %104 = fcmp ult float %98, 6.400000e+01 br i1 %104, label %ELSE, label %IF IF: ; preds = %main_body %105 = fadd float %98, -6.400000e+01 %106 = fmul float %105, %28 %107 = call float @llvm.floor.f32(float %106) %108 = fmul float %107, %28 %109 = call float @llvm.floor.f32(float %106) %110 = fsub float %106, %109 %111 = call float @llvm.floor.f32(float %108) %112 = fsub float %108, %111 %113 = call float @llvm.floor.f32(float %108) %114 = fadd float %113, 4.000000e+00 br label %ENDIF ELSE: ; preds = %main_body %115 = call float @llvm.floor.f32(float %101) %116 = fsub float %101, %115 %117 = call float @llvm.floor.f32(float %103) %118 = fsub float %103, %117 %119 = call float @llvm.floor.f32(float %103) br label %ENDIF ENDIF: ; preds = %ELSE, %IF %temp26.0 = phi float [ %114, %IF ], [ %119, %ELSE ] %temp25.0 = phi float [ %112, %IF ], [ %118, %ELSE ] %temp24.0 = phi float [ %110, %IF ], [ %116, %ELSE ] %temp12.0 = phi float [ %28, %IF ], [ %27, %ELSE ] %120 = fmul float %99, %27 %121 = call float @llvm.floor.f32(float %120) %122 = fmul float %121, %27 %123 = fcmp ult float %99, 6.400000e+01 br i1 %123, label %ELSE118, label %IF117 IF117: ; preds = %ENDIF %124 = fadd float %99, -6.400000e+01 %125 = fmul float %124, %28 %126 = call float @llvm.floor.f32(float %125) %127 = fmul float %126, %28 %128 = call float @llvm.floor.f32(float %125) %129 = fsub float %125, %128 %130 = call float @llvm.floor.f32(float %127) %131 = fsub float %127, %130 %132 = call float @llvm.floor.f32(float %127) %133 = fadd float %132, 4.000000e+00 br label %ENDIF116 ELSE118: ; preds = %ENDIF %134 = call float @llvm.floor.f32(float %120) %135 = fsub float %120, %134 %136 = call float @llvm.floor.f32(float %122) %137 = fsub float %122, %136 %138 = call float @llvm.floor.f32(float %122) br label %ENDIF116 ENDIF116: ; preds = %ELSE118, %IF117 %temp32.0 = phi float [ %129, %IF117 ], [ %135, %ELSE118 ] %temp33.0 = phi float [ %131, %IF117 ], [ %137, %ELSE118 ] %temp34.0 = phi float [ %133, %IF117 ], [ %138, %ELSE118 ] %temp16.0 = phi float [ %28, %IF117 ], [ %27, %ELSE118 ] %139 = fmul float %100, %27 %140 = call float @llvm.floor.f32(float %139) %141 = fmul float %140, %27 %142 = fcmp ult float %100, 6.400000e+01 br i1 %142, label %ELSE121, label %IF120 IF120: ; preds = %ENDIF116 %143 = fadd float %100, -6.400000e+01 %144 = fmul float %143, %28 %145 = call float @llvm.floor.f32(float %144) %146 = fmul float %145, %28 %147 = call float @llvm.floor.f32(float %144) %148 = fsub float %144, %147 %149 = call float @llvm.floor.f32(float %146) %150 = fsub float %146, %149 %151 = call float @llvm.floor.f32(float %146) %152 = fadd float %151, 4.000000e+00 br label %ENDIF119 ELSE121: ; preds = %ENDIF116 %153 = call float @llvm.floor.f32(float %139) %154 = fsub float %139, %153 %155 = call float @llvm.floor.f32(float %141) %156 = fsub float %141, %155 %157 = call float @llvm.floor.f32(float %141) br label %ENDIF119 ENDIF119: ; preds = %ELSE121, %IF120 %temp20.0 = phi float [ %28, %IF120 ], [ %27, %ELSE121 ] %temp10.0 = phi float [ %152, %IF120 ], [ %157, %ELSE121 ] %temp9.0 = phi float [ %150, %IF120 ], [ %156, %ELSE121 ] %temp8.0 = phi float [ %148, %IF120 ], [ %154, %ELSE121 ] %158 = fsub float %65, %24 %159 = fsub float %66, %25 %160 = fsub float %67, %26 %161 = fmul float %158, %158 %162 = fmul float %159, %159 %163 = fadd float %162, %161 %164 = fmul float %160, %160 %165 = fadd float %163, %164 %166 = fmul float %33, %165 %167 = call float @llvm.log2.f32(float %166) %168 = fmul float %167, 0x3FE62E4300000000 %169 = fmul float %168, %32 %170 = fcmp une float %27, %temp12.0 %.sink149 = select i1 %170, float %30, float %29 %temp40.0 = select i1 %170, float 1.953125e-03, float 3.906250e-03 %171 = fdiv float 1.000000e+00, %.sink149 %172 = fmul float %67, %171 %173 = fmul float %66, %171 %174 = call float @llvm.floor.f32(float %172) %175 = fsub float %172, %174 %176 = call float @llvm.floor.f32(float %173) %177 = fsub float %173, %176 %178 = fmul float %31, 2.000000e+00 %179 = fmul float %178, %temp40.0 %180 = fsub float 1.000000e+00, %179 %181 = fmul float %temp40.0, %31 %182 = fmul float %175, %180 %183 = fadd float %182, %181 %184 = fmul float %177, %180 %185 = fadd float %184, %181 %186 = fmul float %183, %temp12.0 %187 = fadd float %186, %temp24.0 %188 = fmul float %185, %temp12.0 %189 = fadd float %188, %temp25.0 %190 = bitcast float %187 to i32 %191 = bitcast float %189 to i32 %192 = bitcast float %169 to i32 %193 = insertelement <4 x i32> undef, i32 %190, i32 0 %194 = insertelement <4 x i32> %193, i32 %191, i32 1 %195 = insertelement <4 x i32> %194, i32 %192, i32 2 %196 = bitcast <8 x i32> %51 to <32 x i8> %197 = bitcast <4 x i32> %53 to <16 x i8> %198 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %195, <32 x i8> %196, <16 x i8> %197, i32 2) %199 = extractelement <4 x float> %198, i32 1 %200 = extractelement <4 x float> %198, i32 3 %201 = fcmp oeq float %temp26.0, 4.000000e+00 %202 = select i1 %201, float 1.000000e+00, float 0.000000e+00 %203 = bitcast float %187 to i32 %204 = bitcast float %189 to i32 %205 = bitcast float %169 to i32 %206 = insertelement <4 x i32> undef, i32 %203, i32 0 %207 = insertelement <4 x i32> %206, i32 %204, i32 1 %208 = insertelement <4 x i32> %207, i32 %205, i32 2 %209 = bitcast <8 x i32> %47 to <32 x i8> %210 = bitcast <4 x i32> %49 to <16 x i8> %211 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %208, <32 x i8> %209, <16 x i8> %210, i32 2) %212 = extractelement <4 x float> %211, i32 1 %213 = extractelement <4 x float> %211, i32 3 %214 = fcmp oeq float %temp26.0, 3.000000e+00 %215 = select i1 %214, float 1.000000e+00, float 0.000000e+00 %216 = bitcast float %187 to i32 %217 = bitcast float %189 to i32 %218 = bitcast float %169 to i32 %219 = insertelement <4 x i32> undef, i32 %216, i32 0 %220 = insertelement <4 x i32> %219, i32 %217, i32 1 %221 = insertelement <4 x i32> %220, i32 %218, i32 2 %222 = bitcast <8 x i32> %43 to <32 x i8> %223 = bitcast <4 x i32> %45 to <16 x i8> %224 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %221, <32 x i8> %222, <16 x i8> %223, i32 2) %225 = extractelement <4 x float> %224, i32 1 %226 = extractelement <4 x float> %224, i32 3 %227 = fcmp oeq float %temp26.0, 2.000000e+00 %228 = select i1 %227, float 1.000000e+00, float 0.000000e+00 %229 = bitcast float %187 to i32 %230 = bitcast float %189 to i32 %231 = bitcast float %169 to i32 %232 = insertelement <4 x i32> undef, i32 %229, i32 0 %233 = insertelement <4 x i32> %232, i32 %230, i32 1 %234 = insertelement <4 x i32> %233, i32 %231, i32 2 %235 = bitcast <8 x i32> %39 to <32 x i8> %236 = bitcast <4 x i32> %41 to <16 x i8> %237 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %234, <32 x i8> %235, <16 x i8> %236, i32 2) %238 = extractelement <4 x float> %237, i32 1 %239 = extractelement <4 x float> %237, i32 3 %240 = fcmp oeq float %temp26.0, 1.000000e+00 %241 = select i1 %240, float 1.000000e+00, float 0.000000e+00 %242 = bitcast float %187 to i32 %243 = bitcast float %189 to i32 %244 = bitcast float %169 to i32 %245 = insertelement <4 x i32> undef, i32 %242, i32 0 %246 = insertelement <4 x i32> %245, i32 %243, i32 1 %247 = insertelement <4 x i32> %246, i32 %244, i32 2 %248 = bitcast <8 x i32> %35 to <32 x i8> %249 = bitcast <4 x i32> %37 to <16 x i8> %250 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %247, <32 x i8> %248, <16 x i8> %249, i32 2) %251 = extractelement <4 x float> %250, i32 1 %252 = extractelement <4 x float> %250, i32 3 %253 = fcmp oeq float %temp26.0, 0.000000e+00 %254 = select i1 %253, float 1.000000e+00, float 0.000000e+00 %255 = fmul float %251, %254 %256 = fmul float %252, %254 %257 = fmul float %238, %241 %258 = fadd float %257, %255 %259 = fmul float %239, %241 %260 = fadd float %259, %256 %261 = fmul float %225, %228 %262 = fadd float %261, %258 %263 = fmul float %226, %228 %264 = fadd float %263, %260 %265 = fmul float %212, %215 %266 = fadd float %265, %262 %267 = fmul float %213, %215 %268 = fadd float %267, %264 %269 = fmul float %199, %202 %270 = fadd float %269, %266 %271 = fmul float %200, %202 %272 = fadd float %271, %268 %273 = fmul float %272, 2.000000e+00 %274 = fadd float %273, -1.000000e+00 %275 = fmul float %270, 2.000000e+00 %276 = fadd float %275, -1.000000e+00 %277 = fmul float %274, %274 %278 = fmul float %276, %276 %279 = fadd float %277, %278 %280 = call float @llvm.AMDIL.clamp.(float %279, float 0.000000e+00, float 1.000000e+00) %281 = fcmp une float %27, %temp12.0 %.sink150 = select i1 %281, float %30, float %29 %temp44.0 = select i1 %281, float 1.953125e-03, float 3.906250e-03 %282 = fdiv float 1.000000e+00, %.sink150 %283 = fmul float %67, %282 %284 = fmul float %65, %282 %285 = call float @llvm.floor.f32(float %283) %286 = fsub float %283, %285 %287 = call float @llvm.floor.f32(float %284) %288 = fsub float %284, %287 %289 = fmul float %31, 2.000000e+00 %290 = fmul float %289, %temp44.0 %291 = fsub float 1.000000e+00, %290 %292 = fmul float %temp44.0, %31 %293 = fmul float %286, %291 %294 = fadd float %293, %292 %295 = fmul float %288, %291 %296 = fadd float %295, %292 %297 = fmul float %294, %temp12.0 %298 = fadd float %297, %temp24.0 %299 = fmul float %296, %temp12.0 %300 = fadd float %299, %temp25.0 %301 = bitcast float %298 to i32 %302 = bitcast float %300 to i32 %303 = bitcast float %169 to i32 %304 = insertelement <4 x i32> undef, i32 %301, i32 0 %305 = insertelement <4 x i32> %304, i32 %302, i32 1 %306 = insertelement <4 x i32> %305, i32 %303, i32 2 %307 = bitcast <8 x i32> %51 to <32 x i8> %308 = bitcast <4 x i32> %53 to <16 x i8> %309 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %306, <32 x i8> %307, <16 x i8> %308, i32 2) %310 = extractelement <4 x float> %309, i32 1 %311 = extractelement <4 x float> %309, i32 3 %312 = fcmp oeq float %temp26.0, 4.000000e+00 %313 = select i1 %312, float 1.000000e+00, float 0.000000e+00 %314 = bitcast float %298 to i32 %315 = bitcast float %300 to i32 %316 = bitcast float %169 to i32 %317 = insertelement <4 x i32> undef, i32 %314, i32 0 %318 = insertelement <4 x i32> %317, i32 %315, i32 1 %319 = insertelement <4 x i32> %318, i32 %316, i32 2 %320 = bitcast <8 x i32> %47 to <32 x i8> %321 = bitcast <4 x i32> %49 to <16 x i8> %322 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %319, <32 x i8> %320, <16 x i8> %321, i32 2) %323 = extractelement <4 x float> %322, i32 1 %324 = extractelement <4 x float> %322, i32 3 %325 = fcmp oeq float %temp26.0, 3.000000e+00 %326 = select i1 %325, float 1.000000e+00, float 0.000000e+00 %327 = bitcast float %298 to i32 %328 = bitcast float %300 to i32 %329 = bitcast float %169 to i32 %330 = insertelement <4 x i32> undef, i32 %327, i32 0 %331 = insertelement <4 x i32> %330, i32 %328, i32 1 %332 = insertelement <4 x i32> %331, i32 %329, i32 2 %333 = bitcast <8 x i32> %43 to <32 x i8> %334 = bitcast <4 x i32> %45 to <16 x i8> %335 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %332, <32 x i8> %333, <16 x i8> %334, i32 2) %336 = extractelement <4 x float> %335, i32 1 %337 = extractelement <4 x float> %335, i32 3 %338 = fcmp oeq float %temp26.0, 2.000000e+00 %339 = select i1 %338, float 1.000000e+00, float 0.000000e+00 %340 = bitcast float %298 to i32 %341 = bitcast float %300 to i32 %342 = bitcast float %169 to i32 %343 = insertelement <4 x i32> undef, i32 %340, i32 0 %344 = insertelement <4 x i32> %343, i32 %341, i32 1 %345 = insertelement <4 x i32> %344, i32 %342, i32 2 %346 = bitcast <8 x i32> %39 to <32 x i8> %347 = bitcast <4 x i32> %41 to <16 x i8> %348 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %345, <32 x i8> %346, <16 x i8> %347, i32 2) %349 = extractelement <4 x float> %348, i32 1 %350 = extractelement <4 x float> %348, i32 3 %351 = fcmp oeq float %temp26.0, 1.000000e+00 %352 = select i1 %351, float 1.000000e+00, float 0.000000e+00 %353 = bitcast float %298 to i32 %354 = bitcast float %300 to i32 %355 = bitcast float %169 to i32 %356 = insertelement <4 x i32> undef, i32 %353, i32 0 %357 = insertelement <4 x i32> %356, i32 %354, i32 1 %358 = insertelement <4 x i32> %357, i32 %355, i32 2 %359 = bitcast <8 x i32> %35 to <32 x i8> %360 = bitcast <4 x i32> %37 to <16 x i8> %361 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %358, <32 x i8> %359, <16 x i8> %360, i32 2) %362 = extractelement <4 x float> %361, i32 1 %363 = extractelement <4 x float> %361, i32 3 %364 = fcmp oeq float %temp26.0, 0.000000e+00 %365 = select i1 %364, float 1.000000e+00, float 0.000000e+00 %366 = fmul float %362, %365 %367 = fmul float %363, %365 %368 = fmul float %349, %352 %369 = fadd float %368, %366 %370 = fmul float %350, %352 %371 = fadd float %370, %367 %372 = fmul float %336, %339 %373 = fadd float %372, %369 %374 = fmul float %337, %339 %375 = fadd float %374, %371 %376 = fmul float %323, %326 %377 = fadd float %376, %373 %378 = fmul float %324, %326 %379 = fadd float %378, %375 %380 = fmul float %310, %313 %381 = fadd float %380, %377 %382 = fmul float %311, %313 %383 = fadd float %382, %379 %384 = fmul float %383, 2.000000e+00 %385 = fadd float %384, -1.000000e+00 %386 = fmul float %381, 2.000000e+00 %387 = fadd float %386, -1.000000e+00 %388 = fmul float %385, %385 %389 = fmul float %387, %387 %390 = fadd float %388, %389 %391 = call float @llvm.AMDIL.clamp.(float %390, float 0.000000e+00, float 1.000000e+00) %392 = fcmp une float %27, %temp12.0 %.sink151 = select i1 %392, float %30, float %29 %temp48.0 = select i1 %392, float 1.953125e-03, float 3.906250e-03 %393 = fdiv float 1.000000e+00, %.sink151 %394 = fmul float %65, %393 %395 = fmul float %66, %393 %396 = call float @llvm.floor.f32(float %394) %397 = fsub float %394, %396 %398 = call float @llvm.floor.f32(float %395) %399 = fsub float %395, %398 %400 = fmul float %31, 2.000000e+00 %401 = fmul float %400, %temp48.0 %402 = fsub float 1.000000e+00, %401 %403 = fmul float %temp48.0, %31 %404 = fmul float %397, %402 %405 = fadd float %404, %403 %406 = fmul float %399, %402 %407 = fadd float %406, %403 %408 = fmul float %405, %temp12.0 %409 = fadd float %408, %temp24.0 %410 = fmul float %407, %temp12.0 %411 = fadd float %410, %temp25.0 %412 = bitcast float %409 to i32 %413 = bitcast float %411 to i32 %414 = bitcast float %169 to i32 %415 = insertelement <4 x i32> undef, i32 %412, i32 0 %416 = insertelement <4 x i32> %415, i32 %413, i32 1 %417 = insertelement <4 x i32> %416, i32 %414, i32 2 %418 = bitcast <8 x i32> %51 to <32 x i8> %419 = bitcast <4 x i32> %53 to <16 x i8> %420 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %417, <32 x i8> %418, <16 x i8> %419, i32 2) %421 = extractelement <4 x float> %420, i32 1 %422 = extractelement <4 x float> %420, i32 3 %423 = fcmp oeq float %temp26.0, 4.000000e+00 %424 = select i1 %423, float 1.000000e+00, float 0.000000e+00 %425 = bitcast float %409 to i32 %426 = bitcast float %411 to i32 %427 = bitcast float %169 to i32 %428 = insertelement <4 x i32> undef, i32 %425, i32 0 %429 = insertelement <4 x i32> %428, i32 %426, i32 1 %430 = insertelement <4 x i32> %429, i32 %427, i32 2 %431 = bitcast <8 x i32> %47 to <32 x i8> %432 = bitcast <4 x i32> %49 to <16 x i8> %433 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %430, <32 x i8> %431, <16 x i8> %432, i32 2) %434 = extractelement <4 x float> %433, i32 1 %435 = extractelement <4 x float> %433, i32 3 %436 = fcmp oeq float %temp26.0, 3.000000e+00 %437 = select i1 %436, float 1.000000e+00, float 0.000000e+00 %438 = bitcast float %409 to i32 %439 = bitcast float %411 to i32 %440 = bitcast float %169 to i32 %441 = insertelement <4 x i32> undef, i32 %438, i32 0 %442 = insertelement <4 x i32> %441, i32 %439, i32 1 %443 = insertelement <4 x i32> %442, i32 %440, i32 2 %444 = bitcast <8 x i32> %43 to <32 x i8> %445 = bitcast <4 x i32> %45 to <16 x i8> %446 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %443, <32 x i8> %444, <16 x i8> %445, i32 2) %447 = extractelement <4 x float> %446, i32 1 %448 = extractelement <4 x float> %446, i32 3 %449 = fcmp oeq float %temp26.0, 2.000000e+00 %450 = select i1 %449, float 1.000000e+00, float 0.000000e+00 %451 = bitcast float %409 to i32 %452 = bitcast float %411 to i32 %453 = bitcast float %169 to i32 %454 = insertelement <4 x i32> undef, i32 %451, i32 0 %455 = insertelement <4 x i32> %454, i32 %452, i32 1 %456 = insertelement <4 x i32> %455, i32 %453, i32 2 %457 = bitcast <8 x i32> %39 to <32 x i8> %458 = bitcast <4 x i32> %41 to <16 x i8> %459 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %456, <32 x i8> %457, <16 x i8> %458, i32 2) %460 = extractelement <4 x float> %459, i32 1 %461 = extractelement <4 x float> %459, i32 3 %462 = fcmp oeq float %temp26.0, 1.000000e+00 %463 = select i1 %462, float 1.000000e+00, float 0.000000e+00 %464 = bitcast float %409 to i32 %465 = bitcast float %411 to i32 %466 = bitcast float %169 to i32 %467 = insertelement <4 x i32> undef, i32 %464, i32 0 %468 = insertelement <4 x i32> %467, i32 %465, i32 1 %469 = insertelement <4 x i32> %468, i32 %466, i32 2 %470 = bitcast <8 x i32> %35 to <32 x i8> %471 = bitcast <4 x i32> %37 to <16 x i8> %472 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %469, <32 x i8> %470, <16 x i8> %471, i32 2) %473 = extractelement <4 x float> %472, i32 1 %474 = extractelement <4 x float> %472, i32 3 %475 = fcmp oeq float %temp26.0, 0.000000e+00 %476 = select i1 %475, float 1.000000e+00, float 0.000000e+00 %477 = fmul float %473, %476 %478 = fmul float %474, %476 %479 = fmul float %460, %463 %480 = fadd float %479, %477 %481 = fmul float %461, %463 %482 = fadd float %481, %478 %483 = fmul float %447, %450 %484 = fadd float %483, %480 %485 = fmul float %448, %450 %486 = fadd float %485, %482 %487 = fmul float %434, %437 %488 = fadd float %487, %484 %489 = fmul float %435, %437 %490 = fadd float %489, %486 %491 = fmul float %421, %424 %492 = fadd float %491, %488 %493 = fmul float %422, %424 %494 = fadd float %493, %490 %495 = fmul float %494, 2.000000e+00 %496 = fadd float %495, -1.000000e+00 %497 = fmul float %492, 2.000000e+00 %498 = fadd float %497, -1.000000e+00 %499 = fmul float %496, %496 %500 = fmul float %498, %498 %501 = fadd float %499, %500 %502 = call float @llvm.AMDIL.clamp.(float %501, float 0.000000e+00, float 1.000000e+00) %503 = fmul float %92, 0.000000e+00 %504 = fmul float %274, %92 %505 = fmul float %276, %92 %506 = fmul float %387, %93 %507 = fadd float %506, %503 %508 = fmul float %93, 0.000000e+00 %509 = fadd float %508, %504 %510 = fmul float %385, %93 %511 = fadd float %510, %505 %512 = fmul float %496, %94 %513 = fadd float %512, %507 %514 = fmul float %498, %94 %515 = fadd float %514, %509 %516 = fmul float %94, 0.000000e+00 %517 = fadd float %516, %511 %518 = fcmp une float %27, %temp16.0 %.sink152 = select i1 %518, float %30, float %29 %temp36.1 = select i1 %518, float 1.953125e-03, float 3.906250e-03 %519 = fdiv float 1.000000e+00, %.sink152 %520 = fmul float %67, %519 %521 = fmul float %66, %519 %522 = call float @llvm.floor.f32(float %520) %523 = fsub float %520, %522 %524 = call float @llvm.floor.f32(float %521) %525 = fsub float %521, %524 %526 = fmul float %31, 2.000000e+00 %527 = fmul float %526, %temp36.1 %528 = fsub float 1.000000e+00, %527 %529 = fmul float %temp36.1, %31 %530 = fmul float %523, %528 %531 = fadd float %530, %529 %532 = fmul float %525, %528 %533 = fadd float %532, %529 %534 = fmul float %531, %temp16.0 %535 = fadd float %534, %temp32.0 %536 = fmul float %533, %temp16.0 %537 = fadd float %536, %temp33.0 %538 = bitcast float %535 to i32 %539 = bitcast float %537 to i32 %540 = bitcast float %169 to i32 %541 = insertelement <4 x i32> undef, i32 %538, i32 0 %542 = insertelement <4 x i32> %541, i32 %539, i32 1 %543 = insertelement <4 x i32> %542, i32 %540, i32 2 %544 = bitcast <8 x i32> %51 to <32 x i8> %545 = bitcast <4 x i32> %53 to <16 x i8> %546 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %543, <32 x i8> %544, <16 x i8> %545, i32 2) %547 = extractelement <4 x float> %546, i32 1 %548 = extractelement <4 x float> %546, i32 3 %549 = fcmp oeq float %temp34.0, 4.000000e+00 %550 = select i1 %549, float 1.000000e+00, float 0.000000e+00 %551 = bitcast float %535 to i32 %552 = bitcast float %537 to i32 %553 = bitcast float %169 to i32 %554 = insertelement <4 x i32> undef, i32 %551, i32 0 %555 = insertelement <4 x i32> %554, i32 %552, i32 1 %556 = insertelement <4 x i32> %555, i32 %553, i32 2 %557 = bitcast <8 x i32> %47 to <32 x i8> %558 = bitcast <4 x i32> %49 to <16 x i8> %559 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %556, <32 x i8> %557, <16 x i8> %558, i32 2) %560 = extractelement <4 x float> %559, i32 1 %561 = extractelement <4 x float> %559, i32 3 %562 = fcmp oeq float %temp34.0, 3.000000e+00 %563 = select i1 %562, float 1.000000e+00, float 0.000000e+00 %564 = bitcast float %535 to i32 %565 = bitcast float %537 to i32 %566 = bitcast float %169 to i32 %567 = insertelement <4 x i32> undef, i32 %564, i32 0 %568 = insertelement <4 x i32> %567, i32 %565, i32 1 %569 = insertelement <4 x i32> %568, i32 %566, i32 2 %570 = bitcast <8 x i32> %43 to <32 x i8> %571 = bitcast <4 x i32> %45 to <16 x i8> %572 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %569, <32 x i8> %570, <16 x i8> %571, i32 2) %573 = extractelement <4 x float> %572, i32 1 %574 = extractelement <4 x float> %572, i32 3 %575 = fcmp oeq float %temp34.0, 2.000000e+00 %576 = select i1 %575, float 1.000000e+00, float 0.000000e+00 %577 = bitcast float %535 to i32 %578 = bitcast float %537 to i32 %579 = bitcast float %169 to i32 %580 = insertelement <4 x i32> undef, i32 %577, i32 0 %581 = insertelement <4 x i32> %580, i32 %578, i32 1 %582 = insertelement <4 x i32> %581, i32 %579, i32 2 %583 = bitcast <8 x i32> %39 to <32 x i8> %584 = bitcast <4 x i32> %41 to <16 x i8> %585 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %582, <32 x i8> %583, <16 x i8> %584, i32 2) %586 = extractelement <4 x float> %585, i32 1 %587 = extractelement <4 x float> %585, i32 3 %588 = fcmp oeq float %temp34.0, 1.000000e+00 %589 = select i1 %588, float 1.000000e+00, float 0.000000e+00 %590 = bitcast float %535 to i32 %591 = bitcast float %537 to i32 %592 = bitcast float %169 to i32 %593 = insertelement <4 x i32> undef, i32 %590, i32 0 %594 = insertelement <4 x i32> %593, i32 %591, i32 1 %595 = insertelement <4 x i32> %594, i32 %592, i32 2 %596 = bitcast <8 x i32> %35 to <32 x i8> %597 = bitcast <4 x i32> %37 to <16 x i8> %598 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %595, <32 x i8> %596, <16 x i8> %597, i32 2) %599 = extractelement <4 x float> %598, i32 1 %600 = extractelement <4 x float> %598, i32 3 %601 = fcmp oeq float %temp34.0, 0.000000e+00 %602 = select i1 %601, float 1.000000e+00, float 0.000000e+00 %603 = fmul float %599, %602 %604 = fmul float %600, %602 %605 = fmul float %586, %589 %606 = fadd float %605, %603 %607 = fmul float %587, %589 %608 = fadd float %607, %604 %609 = fmul float %573, %576 %610 = fadd float %609, %606 %611 = fmul float %574, %576 %612 = fadd float %611, %608 %613 = fmul float %560, %563 %614 = fadd float %613, %610 %615 = fmul float %561, %563 %616 = fadd float %615, %612 %617 = fmul float %547, %550 %618 = fadd float %617, %614 %619 = fmul float %548, %550 %620 = fadd float %619, %616 %621 = fmul float %620, 2.000000e+00 %622 = fadd float %621, -1.000000e+00 %623 = fmul float %618, 2.000000e+00 %624 = fadd float %623, -1.000000e+00 %625 = fmul float %622, %622 %626 = fmul float %624, %624 %627 = fadd float %625, %626 %628 = call float @llvm.AMDIL.clamp.(float %627, float 0.000000e+00, float 1.000000e+00) %629 = fcmp une float %27, %temp16.0 %.sink153 = select i1 %629, float %30, float %29 %temp40.2 = select i1 %629, float 1.953125e-03, float 3.906250e-03 %630 = fdiv float 1.000000e+00, %.sink153 %631 = fmul float %67, %630 %632 = fmul float %65, %630 %633 = call float @llvm.floor.f32(float %631) %634 = fsub float %631, %633 %635 = call float @llvm.floor.f32(float %632) %636 = fsub float %632, %635 %637 = fmul float %31, 2.000000e+00 %638 = fmul float %637, %temp40.2 %639 = fsub float 1.000000e+00, %638 %640 = fmul float %temp40.2, %31 %641 = fmul float %634, %639 %642 = fadd float %641, %640 %643 = fmul float %636, %639 %644 = fadd float %643, %640 %645 = fmul float %642, %temp16.0 %646 = fadd float %645, %temp32.0 %647 = fmul float %644, %temp16.0 %648 = fadd float %647, %temp33.0 %649 = bitcast float %646 to i32 %650 = bitcast float %648 to i32 %651 = bitcast float %169 to i32 %652 = insertelement <4 x i32> undef, i32 %649, i32 0 %653 = insertelement <4 x i32> %652, i32 %650, i32 1 %654 = insertelement <4 x i32> %653, i32 %651, i32 2 %655 = bitcast <8 x i32> %51 to <32 x i8> %656 = bitcast <4 x i32> %53 to <16 x i8> %657 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %654, <32 x i8> %655, <16 x i8> %656, i32 2) %658 = extractelement <4 x float> %657, i32 1 %659 = extractelement <4 x float> %657, i32 3 %660 = fcmp oeq float %temp34.0, 4.000000e+00 %661 = select i1 %660, float 1.000000e+00, float 0.000000e+00 %662 = bitcast float %646 to i32 %663 = bitcast float %648 to i32 %664 = bitcast float %169 to i32 %665 = insertelement <4 x i32> undef, i32 %662, i32 0 %666 = insertelement <4 x i32> %665, i32 %663, i32 1 %667 = insertelement <4 x i32> %666, i32 %664, i32 2 %668 = bitcast <8 x i32> %47 to <32 x i8> %669 = bitcast <4 x i32> %49 to <16 x i8> %670 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %667, <32 x i8> %668, <16 x i8> %669, i32 2) %671 = extractelement <4 x float> %670, i32 1 %672 = extractelement <4 x float> %670, i32 3 %673 = fcmp oeq float %temp34.0, 3.000000e+00 %674 = select i1 %673, float 1.000000e+00, float 0.000000e+00 %675 = bitcast float %646 to i32 %676 = bitcast float %648 to i32 %677 = bitcast float %169 to i32 %678 = insertelement <4 x i32> undef, i32 %675, i32 0 %679 = insertelement <4 x i32> %678, i32 %676, i32 1 %680 = insertelement <4 x i32> %679, i32 %677, i32 2 %681 = bitcast <8 x i32> %43 to <32 x i8> %682 = bitcast <4 x i32> %45 to <16 x i8> %683 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %680, <32 x i8> %681, <16 x i8> %682, i32 2) %684 = extractelement <4 x float> %683, i32 1 %685 = extractelement <4 x float> %683, i32 3 %686 = fcmp oeq float %temp34.0, 2.000000e+00 %687 = select i1 %686, float 1.000000e+00, float 0.000000e+00 %688 = bitcast float %646 to i32 %689 = bitcast float %648 to i32 %690 = bitcast float %169 to i32 %691 = insertelement <4 x i32> undef, i32 %688, i32 0 %692 = insertelement <4 x i32> %691, i32 %689, i32 1 %693 = insertelement <4 x i32> %692, i32 %690, i32 2 %694 = bitcast <8 x i32> %39 to <32 x i8> %695 = bitcast <4 x i32> %41 to <16 x i8> %696 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %693, <32 x i8> %694, <16 x i8> %695, i32 2) %697 = extractelement <4 x float> %696, i32 1 %698 = extractelement <4 x float> %696, i32 3 %699 = fcmp oeq float %temp34.0, 1.000000e+00 %700 = select i1 %699, float 1.000000e+00, float 0.000000e+00 %701 = bitcast float %646 to i32 %702 = bitcast float %648 to i32 %703 = bitcast float %169 to i32 %704 = insertelement <4 x i32> undef, i32 %701, i32 0 %705 = insertelement <4 x i32> %704, i32 %702, i32 1 %706 = insertelement <4 x i32> %705, i32 %703, i32 2 %707 = bitcast <8 x i32> %35 to <32 x i8> %708 = bitcast <4 x i32> %37 to <16 x i8> %709 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %706, <32 x i8> %707, <16 x i8> %708, i32 2) %710 = extractelement <4 x float> %709, i32 1 %711 = extractelement <4 x float> %709, i32 3 %712 = fcmp oeq float %temp34.0, 0.000000e+00 %713 = select i1 %712, float 1.000000e+00, float 0.000000e+00 %714 = fmul float %710, %713 %715 = fmul float %711, %713 %716 = fmul float %697, %700 %717 = fadd float %716, %714 %718 = fmul float %698, %700 %719 = fadd float %718, %715 %720 = fmul float %684, %687 %721 = fadd float %720, %717 %722 = fmul float %685, %687 %723 = fadd float %722, %719 %724 = fmul float %671, %674 %725 = fadd float %724, %721 %726 = fmul float %672, %674 %727 = fadd float %726, %723 %728 = fmul float %658, %661 %729 = fadd float %728, %725 %730 = fmul float %659, %661 %731 = fadd float %730, %727 %732 = fmul float %731, 2.000000e+00 %733 = fadd float %732, -1.000000e+00 %734 = fmul float %729, 2.000000e+00 %735 = fadd float %734, -1.000000e+00 %736 = fmul float %733, %733 %737 = fmul float %735, %735 %738 = fadd float %736, %737 %739 = call float @llvm.AMDIL.clamp.(float %738, float 0.000000e+00, float 1.000000e+00) %740 = fcmp une float %27, %temp16.0 %.sink154 = select i1 %740, float %30, float %29 %temp44.2 = select i1 %740, float 1.953125e-03, float 3.906250e-03 %741 = fdiv float 1.000000e+00, %.sink154 %742 = fmul float %65, %741 %743 = fmul float %66, %741 %744 = call float @llvm.floor.f32(float %742) %745 = fsub float %742, %744 %746 = call float @llvm.floor.f32(float %743) %747 = fsub float %743, %746 %748 = fmul float %31, 2.000000e+00 %749 = fmul float %748, %temp44.2 %750 = fsub float 1.000000e+00, %749 %751 = fmul float %temp44.2, %31 %752 = fmul float %745, %750 %753 = fadd float %752, %751 %754 = fmul float %747, %750 %755 = fadd float %754, %751 %756 = fmul float %753, %temp16.0 %757 = fadd float %756, %temp32.0 %758 = fmul float %755, %temp16.0 %759 = fadd float %758, %temp33.0 %760 = bitcast float %757 to i32 %761 = bitcast float %759 to i32 %762 = bitcast float %169 to i32 %763 = insertelement <4 x i32> undef, i32 %760, i32 0 %764 = insertelement <4 x i32> %763, i32 %761, i32 1 %765 = insertelement <4 x i32> %764, i32 %762, i32 2 %766 = bitcast <8 x i32> %51 to <32 x i8> %767 = bitcast <4 x i32> %53 to <16 x i8> %768 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %765, <32 x i8> %766, <16 x i8> %767, i32 2) %769 = extractelement <4 x float> %768, i32 1 %770 = extractelement <4 x float> %768, i32 3 %771 = fcmp oeq float %temp34.0, 4.000000e+00 %772 = select i1 %771, float 1.000000e+00, float 0.000000e+00 %773 = bitcast float %757 to i32 %774 = bitcast float %759 to i32 %775 = bitcast float %169 to i32 %776 = insertelement <4 x i32> undef, i32 %773, i32 0 %777 = insertelement <4 x i32> %776, i32 %774, i32 1 %778 = insertelement <4 x i32> %777, i32 %775, i32 2 %779 = bitcast <8 x i32> %47 to <32 x i8> %780 = bitcast <4 x i32> %49 to <16 x i8> %781 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %778, <32 x i8> %779, <16 x i8> %780, i32 2) %782 = extractelement <4 x float> %781, i32 1 %783 = extractelement <4 x float> %781, i32 3 %784 = fcmp oeq float %temp34.0, 3.000000e+00 %785 = select i1 %784, float 1.000000e+00, float 0.000000e+00 %786 = bitcast float %757 to i32 %787 = bitcast float %759 to i32 %788 = bitcast float %169 to i32 %789 = insertelement <4 x i32> undef, i32 %786, i32 0 %790 = insertelement <4 x i32> %789, i32 %787, i32 1 %791 = insertelement <4 x i32> %790, i32 %788, i32 2 %792 = bitcast <8 x i32> %43 to <32 x i8> %793 = bitcast <4 x i32> %45 to <16 x i8> %794 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %791, <32 x i8> %792, <16 x i8> %793, i32 2) %795 = extractelement <4 x float> %794, i32 1 %796 = extractelement <4 x float> %794, i32 3 %797 = fcmp oeq float %temp34.0, 2.000000e+00 %798 = select i1 %797, float 1.000000e+00, float 0.000000e+00 %799 = bitcast float %757 to i32 %800 = bitcast float %759 to i32 %801 = bitcast float %169 to i32 %802 = insertelement <4 x i32> undef, i32 %799, i32 0 %803 = insertelement <4 x i32> %802, i32 %800, i32 1 %804 = insertelement <4 x i32> %803, i32 %801, i32 2 %805 = bitcast <8 x i32> %39 to <32 x i8> %806 = bitcast <4 x i32> %41 to <16 x i8> %807 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %804, <32 x i8> %805, <16 x i8> %806, i32 2) %808 = extractelement <4 x float> %807, i32 1 %809 = extractelement <4 x float> %807, i32 3 %810 = fcmp oeq float %temp34.0, 1.000000e+00 %811 = select i1 %810, float 1.000000e+00, float 0.000000e+00 %812 = bitcast float %757 to i32 %813 = bitcast float %759 to i32 %814 = bitcast float %169 to i32 %815 = insertelement <4 x i32> undef, i32 %812, i32 0 %816 = insertelement <4 x i32> %815, i32 %813, i32 1 %817 = insertelement <4 x i32> %816, i32 %814, i32 2 %818 = bitcast <8 x i32> %35 to <32 x i8> %819 = bitcast <4 x i32> %37 to <16 x i8> %820 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %817, <32 x i8> %818, <16 x i8> %819, i32 2) %821 = extractelement <4 x float> %820, i32 1 %822 = extractelement <4 x float> %820, i32 3 %823 = fcmp oeq float %temp34.0, 0.000000e+00 %824 = select i1 %823, float 1.000000e+00, float 0.000000e+00 %825 = fmul float %821, %824 %826 = fmul float %822, %824 %827 = fmul float %808, %811 %828 = fadd float %827, %825 %829 = fmul float %809, %811 %830 = fadd float %829, %826 %831 = fmul float %795, %798 %832 = fadd float %831, %828 %833 = fmul float %796, %798 %834 = fadd float %833, %830 %835 = fmul float %782, %785 %836 = fadd float %835, %832 %837 = fmul float %783, %785 %838 = fadd float %837, %834 %839 = fmul float %769, %772 %840 = fadd float %839, %836 %841 = fmul float %770, %772 %842 = fadd float %841, %838 %843 = fmul float %842, 2.000000e+00 %844 = fadd float %843, -1.000000e+00 %845 = fmul float %840, 2.000000e+00 %846 = fadd float %845, -1.000000e+00 %847 = fmul float %844, %844 %848 = fmul float %846, %846 %849 = fadd float %847, %848 %850 = call float @llvm.AMDIL.clamp.(float %849, float 0.000000e+00, float 1.000000e+00) %851 = fmul float %92, 0.000000e+00 %852 = fmul float %622, %92 %853 = fmul float %624, %92 %854 = fmul float %735, %93 %855 = fadd float %854, %851 %856 = fmul float %93, 0.000000e+00 %857 = fadd float %856, %852 %858 = fmul float %733, %93 %859 = fadd float %858, %853 %860 = fmul float %844, %94 %861 = fadd float %860, %855 %862 = fmul float %846, %94 %863 = fadd float %862, %857 %864 = fmul float %94, 0.000000e+00 %865 = fadd float %864, %859 %866 = fcmp une float %27, %temp20.0 %.sink155 = select i1 %866, float %30, float %29 %temp32.1 = select i1 %866, float 1.953125e-03, float 3.906250e-03 %867 = fdiv float 1.000000e+00, %.sink155 %868 = fmul float %67, %867 %869 = fmul float %66, %867 %870 = call float @llvm.floor.f32(float %868) %871 = fsub float %868, %870 %872 = call float @llvm.floor.f32(float %869) %873 = fsub float %869, %872 %874 = fmul float %31, 2.000000e+00 %875 = fmul float %874, %temp32.1 %876 = fsub float 1.000000e+00, %875 %877 = fmul float %temp32.1, %31 %878 = fmul float %871, %876 %879 = fadd float %878, %877 %880 = fmul float %873, %876 %881 = fadd float %880, %877 %882 = fmul float %879, %temp20.0 %883 = fadd float %882, %temp8.0 %884 = fmul float %881, %temp20.0 %885 = fadd float %884, %temp9.0 %886 = bitcast float %883 to i32 %887 = bitcast float %885 to i32 %888 = bitcast float %169 to i32 %889 = insertelement <4 x i32> undef, i32 %886, i32 0 %890 = insertelement <4 x i32> %889, i32 %887, i32 1 %891 = insertelement <4 x i32> %890, i32 %888, i32 2 %892 = bitcast <8 x i32> %51 to <32 x i8> %893 = bitcast <4 x i32> %53 to <16 x i8> %894 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %891, <32 x i8> %892, <16 x i8> %893, i32 2) %895 = extractelement <4 x float> %894, i32 1 %896 = extractelement <4 x float> %894, i32 3 %897 = fcmp oeq float %temp10.0, 4.000000e+00 %898 = select i1 %897, float 1.000000e+00, float 0.000000e+00 %899 = bitcast float %883 to i32 %900 = bitcast float %885 to i32 %901 = bitcast float %169 to i32 %902 = insertelement <4 x i32> undef, i32 %899, i32 0 %903 = insertelement <4 x i32> %902, i32 %900, i32 1 %904 = insertelement <4 x i32> %903, i32 %901, i32 2 %905 = bitcast <8 x i32> %47 to <32 x i8> %906 = bitcast <4 x i32> %49 to <16 x i8> %907 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %904, <32 x i8> %905, <16 x i8> %906, i32 2) %908 = extractelement <4 x float> %907, i32 1 %909 = extractelement <4 x float> %907, i32 3 %910 = fcmp oeq float %temp10.0, 3.000000e+00 %911 = select i1 %910, float 1.000000e+00, float 0.000000e+00 %912 = bitcast float %883 to i32 %913 = bitcast float %885 to i32 %914 = bitcast float %169 to i32 %915 = insertelement <4 x i32> undef, i32 %912, i32 0 %916 = insertelement <4 x i32> %915, i32 %913, i32 1 %917 = insertelement <4 x i32> %916, i32 %914, i32 2 %918 = bitcast <8 x i32> %43 to <32 x i8> %919 = bitcast <4 x i32> %45 to <16 x i8> %920 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %917, <32 x i8> %918, <16 x i8> %919, i32 2) %921 = extractelement <4 x float> %920, i32 1 %922 = extractelement <4 x float> %920, i32 3 %923 = fcmp oeq float %temp10.0, 2.000000e+00 %924 = select i1 %923, float 1.000000e+00, float 0.000000e+00 %925 = bitcast float %883 to i32 %926 = bitcast float %885 to i32 %927 = bitcast float %169 to i32 %928 = insertelement <4 x i32> undef, i32 %925, i32 0 %929 = insertelement <4 x i32> %928, i32 %926, i32 1 %930 = insertelement <4 x i32> %929, i32 %927, i32 2 %931 = bitcast <8 x i32> %39 to <32 x i8> %932 = bitcast <4 x i32> %41 to <16 x i8> %933 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %930, <32 x i8> %931, <16 x i8> %932, i32 2) %934 = extractelement <4 x float> %933, i32 1 %935 = extractelement <4 x float> %933, i32 3 %936 = fcmp oeq float %temp10.0, 1.000000e+00 %937 = select i1 %936, float 1.000000e+00, float 0.000000e+00 %938 = bitcast float %883 to i32 %939 = bitcast float %885 to i32 %940 = bitcast float %169 to i32 %941 = insertelement <4 x i32> undef, i32 %938, i32 0 %942 = insertelement <4 x i32> %941, i32 %939, i32 1 %943 = insertelement <4 x i32> %942, i32 %940, i32 2 %944 = bitcast <8 x i32> %35 to <32 x i8> %945 = bitcast <4 x i32> %37 to <16 x i8> %946 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %943, <32 x i8> %944, <16 x i8> %945, i32 2) %947 = extractelement <4 x float> %946, i32 1 %948 = extractelement <4 x float> %946, i32 3 %949 = fcmp oeq float %temp10.0, 0.000000e+00 %950 = select i1 %949, float 1.000000e+00, float 0.000000e+00 %951 = fmul float %947, %950 %952 = fmul float %948, %950 %953 = fmul float %934, %937 %954 = fadd float %953, %951 %955 = fmul float %935, %937 %956 = fadd float %955, %952 %957 = fmul float %921, %924 %958 = fadd float %957, %954 %959 = fmul float %922, %924 %960 = fadd float %959, %956 %961 = fmul float %908, %911 %962 = fadd float %961, %958 %963 = fmul float %909, %911 %964 = fadd float %963, %960 %965 = fmul float %895, %898 %966 = fadd float %965, %962 %967 = fmul float %896, %898 %968 = fadd float %967, %964 %969 = fmul float %968, 2.000000e+00 %970 = fadd float %969, -1.000000e+00 %971 = fmul float %966, 2.000000e+00 %972 = fadd float %971, -1.000000e+00 %973 = fmul float %970, %970 %974 = fmul float %972, %972 %975 = fadd float %973, %974 %976 = call float @llvm.AMDIL.clamp.(float %975, float 0.000000e+00, float 1.000000e+00) %977 = fcmp une float %27, %temp20.0 %.sink156 = select i1 %977, float %30, float %29 %temp36.3 = select i1 %977, float 1.953125e-03, float 3.906250e-03 %978 = fdiv float 1.000000e+00, %.sink156 %979 = fmul float %67, %978 %980 = fmul float %65, %978 %981 = call float @llvm.floor.f32(float %979) %982 = fsub float %979, %981 %983 = call float @llvm.floor.f32(float %980) %984 = fsub float %980, %983 %985 = fmul float %31, 2.000000e+00 %986 = fmul float %985, %temp36.3 %987 = fsub float 1.000000e+00, %986 %988 = fmul float %temp36.3, %31 %989 = fmul float %982, %987 %990 = fadd float %989, %988 %991 = fmul float %984, %987 %992 = fadd float %991, %988 %993 = fmul float %990, %temp20.0 %994 = fadd float %993, %temp8.0 %995 = fmul float %992, %temp20.0 %996 = fadd float %995, %temp9.0 %997 = bitcast float %994 to i32 %998 = bitcast float %996 to i32 %999 = bitcast float %169 to i32 %1000 = insertelement <4 x i32> undef, i32 %997, i32 0 %1001 = insertelement <4 x i32> %1000, i32 %998, i32 1 %1002 = insertelement <4 x i32> %1001, i32 %999, i32 2 %1003 = bitcast <8 x i32> %51 to <32 x i8> %1004 = bitcast <4 x i32> %53 to <16 x i8> %1005 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1002, <32 x i8> %1003, <16 x i8> %1004, i32 2) %1006 = extractelement <4 x float> %1005, i32 1 %1007 = extractelement <4 x float> %1005, i32 3 %1008 = fcmp oeq float %temp10.0, 4.000000e+00 %1009 = select i1 %1008, float 1.000000e+00, float 0.000000e+00 %1010 = bitcast float %994 to i32 %1011 = bitcast float %996 to i32 %1012 = bitcast float %169 to i32 %1013 = insertelement <4 x i32> undef, i32 %1010, i32 0 %1014 = insertelement <4 x i32> %1013, i32 %1011, i32 1 %1015 = insertelement <4 x i32> %1014, i32 %1012, i32 2 %1016 = bitcast <8 x i32> %47 to <32 x i8> %1017 = bitcast <4 x i32> %49 to <16 x i8> %1018 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1015, <32 x i8> %1016, <16 x i8> %1017, i32 2) %1019 = extractelement <4 x float> %1018, i32 1 %1020 = extractelement <4 x float> %1018, i32 3 %1021 = fcmp oeq float %temp10.0, 3.000000e+00 %1022 = select i1 %1021, float 1.000000e+00, float 0.000000e+00 %1023 = bitcast float %994 to i32 %1024 = bitcast float %996 to i32 %1025 = bitcast float %169 to i32 %1026 = insertelement <4 x i32> undef, i32 %1023, i32 0 %1027 = insertelement <4 x i32> %1026, i32 %1024, i32 1 %1028 = insertelement <4 x i32> %1027, i32 %1025, i32 2 %1029 = bitcast <8 x i32> %43 to <32 x i8> %1030 = bitcast <4 x i32> %45 to <16 x i8> %1031 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1028, <32 x i8> %1029, <16 x i8> %1030, i32 2) %1032 = extractelement <4 x float> %1031, i32 1 %1033 = extractelement <4 x float> %1031, i32 3 %1034 = fcmp oeq float %temp10.0, 2.000000e+00 %1035 = select i1 %1034, float 1.000000e+00, float 0.000000e+00 %1036 = bitcast float %994 to i32 %1037 = bitcast float %996 to i32 %1038 = bitcast float %169 to i32 %1039 = insertelement <4 x i32> undef, i32 %1036, i32 0 %1040 = insertelement <4 x i32> %1039, i32 %1037, i32 1 %1041 = insertelement <4 x i32> %1040, i32 %1038, i32 2 %1042 = bitcast <8 x i32> %39 to <32 x i8> %1043 = bitcast <4 x i32> %41 to <16 x i8> %1044 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1041, <32 x i8> %1042, <16 x i8> %1043, i32 2) %1045 = extractelement <4 x float> %1044, i32 1 %1046 = extractelement <4 x float> %1044, i32 3 %1047 = fcmp oeq float %temp10.0, 1.000000e+00 %1048 = select i1 %1047, float 1.000000e+00, float 0.000000e+00 %1049 = bitcast float %994 to i32 %1050 = bitcast float %996 to i32 %1051 = bitcast float %169 to i32 %1052 = insertelement <4 x i32> undef, i32 %1049, i32 0 %1053 = insertelement <4 x i32> %1052, i32 %1050, i32 1 %1054 = insertelement <4 x i32> %1053, i32 %1051, i32 2 %1055 = bitcast <8 x i32> %35 to <32 x i8> %1056 = bitcast <4 x i32> %37 to <16 x i8> %1057 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1054, <32 x i8> %1055, <16 x i8> %1056, i32 2) %1058 = extractelement <4 x float> %1057, i32 1 %1059 = extractelement <4 x float> %1057, i32 3 %1060 = fcmp oeq float %temp10.0, 0.000000e+00 %1061 = select i1 %1060, float 1.000000e+00, float 0.000000e+00 %1062 = fmul float %1058, %1061 %1063 = fmul float %1059, %1061 %1064 = fmul float %1045, %1048 %1065 = fadd float %1064, %1062 %1066 = fmul float %1046, %1048 %1067 = fadd float %1066, %1063 %1068 = fmul float %1032, %1035 %1069 = fadd float %1068, %1065 %1070 = fmul float %1033, %1035 %1071 = fadd float %1070, %1067 %1072 = fmul float %1019, %1022 %1073 = fadd float %1072, %1069 %1074 = fmul float %1020, %1022 %1075 = fadd float %1074, %1071 %1076 = fmul float %1006, %1009 %1077 = fadd float %1076, %1073 %1078 = fmul float %1007, %1009 %1079 = fadd float %1078, %1075 %1080 = fmul float %1079, 2.000000e+00 %1081 = fadd float %1080, -1.000000e+00 %1082 = fmul float %1077, 2.000000e+00 %1083 = fadd float %1082, -1.000000e+00 %1084 = fmul float %1081, %1081 %1085 = fmul float %1083, %1083 %1086 = fadd float %1084, %1085 %1087 = call float @llvm.AMDIL.clamp.(float %1086, float 0.000000e+00, float 1.000000e+00) %1088 = fcmp une float %27, %temp20.0 %.sink157 = select i1 %1088, float %30, float %29 %temp40.4 = select i1 %1088, float 1.953125e-03, float 3.906250e-03 %1089 = fdiv float 1.000000e+00, %.sink157 %1090 = fmul float %65, %1089 %1091 = fmul float %66, %1089 %1092 = call float @llvm.floor.f32(float %1090) %1093 = fsub float %1090, %1092 %1094 = call float @llvm.floor.f32(float %1091) %1095 = fsub float %1091, %1094 %1096 = fmul float %31, 2.000000e+00 %1097 = fmul float %1096, %temp40.4 %1098 = fsub float 1.000000e+00, %1097 %1099 = fmul float %temp40.4, %31 %1100 = fmul float %1093, %1098 %1101 = fadd float %1100, %1099 %1102 = fmul float %1095, %1098 %1103 = fadd float %1102, %1099 %1104 = fmul float %1101, %temp20.0 %1105 = fadd float %1104, %temp8.0 %1106 = fmul float %1103, %temp20.0 %1107 = fadd float %1106, %temp9.0 %1108 = bitcast float %1105 to i32 %1109 = bitcast float %1107 to i32 %1110 = bitcast float %169 to i32 %1111 = insertelement <4 x i32> undef, i32 %1108, i32 0 %1112 = insertelement <4 x i32> %1111, i32 %1109, i32 1 %1113 = insertelement <4 x i32> %1112, i32 %1110, i32 2 %1114 = bitcast <8 x i32> %51 to <32 x i8> %1115 = bitcast <4 x i32> %53 to <16 x i8> %1116 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1113, <32 x i8> %1114, <16 x i8> %1115, i32 2) %1117 = extractelement <4 x float> %1116, i32 1 %1118 = extractelement <4 x float> %1116, i32 3 %1119 = fcmp oeq float %temp10.0, 4.000000e+00 %1120 = select i1 %1119, float 1.000000e+00, float 0.000000e+00 %1121 = bitcast float %1105 to i32 %1122 = bitcast float %1107 to i32 %1123 = bitcast float %169 to i32 %1124 = insertelement <4 x i32> undef, i32 %1121, i32 0 %1125 = insertelement <4 x i32> %1124, i32 %1122, i32 1 %1126 = insertelement <4 x i32> %1125, i32 %1123, i32 2 %1127 = bitcast <8 x i32> %47 to <32 x i8> %1128 = bitcast <4 x i32> %49 to <16 x i8> %1129 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1126, <32 x i8> %1127, <16 x i8> %1128, i32 2) %1130 = extractelement <4 x float> %1129, i32 1 %1131 = extractelement <4 x float> %1129, i32 3 %1132 = fcmp oeq float %temp10.0, 3.000000e+00 %1133 = select i1 %1132, float 1.000000e+00, float 0.000000e+00 %1134 = bitcast float %1105 to i32 %1135 = bitcast float %1107 to i32 %1136 = bitcast float %169 to i32 %1137 = insertelement <4 x i32> undef, i32 %1134, i32 0 %1138 = insertelement <4 x i32> %1137, i32 %1135, i32 1 %1139 = insertelement <4 x i32> %1138, i32 %1136, i32 2 %1140 = bitcast <8 x i32> %43 to <32 x i8> %1141 = bitcast <4 x i32> %45 to <16 x i8> %1142 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1139, <32 x i8> %1140, <16 x i8> %1141, i32 2) %1143 = extractelement <4 x float> %1142, i32 1 %1144 = extractelement <4 x float> %1142, i32 3 %1145 = fcmp oeq float %temp10.0, 2.000000e+00 %1146 = select i1 %1145, float 1.000000e+00, float 0.000000e+00 %1147 = bitcast float %1105 to i32 %1148 = bitcast float %1107 to i32 %1149 = bitcast float %169 to i32 %1150 = insertelement <4 x i32> undef, i32 %1147, i32 0 %1151 = insertelement <4 x i32> %1150, i32 %1148, i32 1 %1152 = insertelement <4 x i32> %1151, i32 %1149, i32 2 %1153 = bitcast <8 x i32> %39 to <32 x i8> %1154 = bitcast <4 x i32> %41 to <16 x i8> %1155 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1152, <32 x i8> %1153, <16 x i8> %1154, i32 2) %1156 = extractelement <4 x float> %1155, i32 1 %1157 = extractelement <4 x float> %1155, i32 3 %1158 = fcmp oeq float %temp10.0, 1.000000e+00 %1159 = select i1 %1158, float 1.000000e+00, float 0.000000e+00 %1160 = bitcast float %1105 to i32 %1161 = bitcast float %1107 to i32 %1162 = bitcast float %169 to i32 %1163 = insertelement <4 x i32> undef, i32 %1160, i32 0 %1164 = insertelement <4 x i32> %1163, i32 %1161, i32 1 %1165 = insertelement <4 x i32> %1164, i32 %1162, i32 2 %1166 = bitcast <8 x i32> %35 to <32 x i8> %1167 = bitcast <4 x i32> %37 to <16 x i8> %1168 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1165, <32 x i8> %1166, <16 x i8> %1167, i32 2) %1169 = extractelement <4 x float> %1168, i32 1 %1170 = extractelement <4 x float> %1168, i32 3 %1171 = fcmp oeq float %temp10.0, 0.000000e+00 %1172 = select i1 %1171, float 1.000000e+00, float 0.000000e+00 %1173 = fmul float %1169, %1172 %1174 = fmul float %1170, %1172 %1175 = fmul float %1156, %1159 %1176 = fadd float %1175, %1173 %1177 = fmul float %1157, %1159 %1178 = fadd float %1177, %1174 %1179 = fmul float %1143, %1146 %1180 = fadd float %1179, %1176 %1181 = fmul float %1144, %1146 %1182 = fadd float %1181, %1178 %1183 = fmul float %1130, %1133 %1184 = fadd float %1183, %1180 %1185 = fmul float %1131, %1133 %1186 = fadd float %1185, %1182 %1187 = fmul float %1117, %1120 %1188 = fadd float %1187, %1184 %1189 = fmul float %1118, %1120 %1190 = fadd float %1189, %1186 %1191 = fmul float %1190, 2.000000e+00 %1192 = fadd float %1191, -1.000000e+00 %1193 = fmul float %1188, 2.000000e+00 %1194 = fadd float %1193, -1.000000e+00 %1195 = fmul float %1192, %1192 %1196 = fmul float %1194, %1194 %1197 = fadd float %1195, %1196 %1198 = call float @llvm.AMDIL.clamp.(float %1197, float 0.000000e+00, float 1.000000e+00) %1199 = fmul float %92, 0.000000e+00 %1200 = fmul float %970, %92 %1201 = fmul float %972, %92 %1202 = fmul float %1083, %93 %1203 = fadd float %1202, %1199 %1204 = fmul float %93, 0.000000e+00 %1205 = fadd float %1204, %1200 %1206 = fmul float %1081, %93 %1207 = fadd float %1206, %1201 %1208 = fmul float %1192, %94 %1209 = fadd float %1208, %1203 %1210 = fmul float %1194, %94 %1211 = fadd float %1210, %1205 %1212 = fmul float %94, 0.000000e+00 %1213 = fadd float %1212, %1207 %1214 = fmul float %58, %513 %1215 = fmul float %58, %515 %1216 = fmul float %58, %517 %1217 = fmul float %59, %861 %1218 = fadd float %1217, %1214 %1219 = fmul float %59, %863 %1220 = fadd float %1219, %1215 %1221 = fmul float %59, %865 %1222 = fadd float %1221, %1216 %1223 = fmul float %60, %1209 %1224 = fadd float %1223, %1218 %1225 = fmul float %60, %1211 %1226 = fadd float %1225, %1220 %1227 = fmul float %60, %1213 %1228 = fadd float %1227, %1222 %1229 = fmul float %1224, %1224 %1230 = fmul float %1226, %1226 %1231 = fadd float %1229, %1230 %1232 = fmul float %1228, %1228 %1233 = fadd float %1231, %1232 %1234 = fadd float %1233, 1.000000e+00 %1235 = call float @llvm.AMDGPU.rsq.clamped.f32(float %1234) %1236 = fmul float %1224, %1235 %1237 = fmul float %1226, %1235 %1238 = fmul float %1228, %1235 %1239 = fmul float %1236, %57 %1240 = fmul float %1237, %57 %1241 = fmul float %1238, %57 %1242 = fsub float %62, %1239 %1243 = fsub float %63, %1240 %1244 = fsub float %64, %1241 %1245 = fmul float %1242, %1242 %1246 = fmul float %1243, %1243 %1247 = fadd float %1246, %1245 %1248 = fmul float %1244, %1244 %1249 = fadd float %1247, %1248 %1250 = call float @llvm.AMDGPU.rsq.clamped.f32(float %1249) %1251 = fmul float %1242, %1250 %1252 = fmul float %1243, %1250 %1253 = fmul float %1244, %1250 %1254 = fmul float %1251, 5.000000e-01 %1255 = fadd float %1254, 5.000000e-01 %1256 = fmul float %1252, 5.000000e-01 %1257 = fadd float %1256, 5.000000e-01 %1258 = fmul float %1253, 5.000000e-01 %1259 = fadd float %1258, 5.000000e-01 %1260 = call i32 @llvm.SI.packf16(float %1255, float %1257) %1261 = bitcast i32 %1260 to float %1262 = call i32 @llvm.SI.packf16(float %1259, float %61) %1263 = bitcast i32 %1262 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %1261, float %1263, float %1261, float %1263) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.floor.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v12, v0, 0, 0, [m0] ; C8300000 v_interp_p2_f32 v12, [v12], v1, 0, 0, [m0] ; C8310001 v_interp_p1_f32 v13, v0, 1, 0, [m0] ; C8340100 v_interp_p2_f32 v13, [v13], v1, 1, 0, [m0] ; C8350101 v_interp_p1_f32 v15, v0, 2, 0, [m0] ; C83C0200 v_interp_p2_f32 v15, [v15], v1, 2, 0, [m0] ; C83D0201 v_interp_p1_f32 v2, v0, 3, 0, [m0] ; C8080300 v_interp_p2_f32 v2, [v2], v1, 3, 0, [m0] ; C8090301 v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400 v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401 v_interp_p1_f32 v5, v0, 1, 1, [m0] ; C8140500 v_interp_p2_f32 v5, [v5], v1, 1, 1, [m0] ; C8150501 v_interp_p1_f32 v6, v0, 2, 1, [m0] ; C8180600 v_interp_p2_f32 v6, [v6], v1, 2, 1, [m0] ; C8190601 v_interp_p1_f32 v3, v0, 3, 1, [m0] ; C80C0700 v_interp_p2_f32 v3, [v3], v1, 3, 1, [m0] ; C80D0701 v_interp_p1_f32 v7, v0, 0, 2, [m0] ; C81C0800 v_interp_p2_f32 v7, [v7], v1, 0, 2, [m0] ; C81D0801 v_interp_p1_f32 v8, v0, 1, 2, [m0] ; C8200900 v_interp_p2_f32 v8, [v8], v1, 1, 2, [m0] ; C8210901 v_interp_p1_f32 v9, v0, 2, 2, [m0] ; C8240A00 v_interp_p2_f32 v9, [v9], v1, 2, 2, [m0] ; C8250A01 v_interp_p1_f32 v14, v0, 3, 2, [m0] ; C8380B00 s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300 v_interp_p2_f32 v14, [v14], v1, 3, 2, [m0] ; C8390B01 v_interp_p1_f32 v16, v0, 0, 3, [m0] ; C8400C00 v_interp_p2_f32 v16, [v16], v1, 0, 3, [m0] ; C8410C01 v_interp_p1_f32 v18, v0, 1, 3, [m0] ; C8480D00 v_interp_p2_f32 v18, [v18], v1, 1, 3, [m0] ; C8490D01 v_mov_b32_e32 v10, 0x7fffffff ; 7E1402FF 7FFFFFFF v_and_b32_e32 v0, v7, v10 ; 36001507 v_and_b32_e32 v1, v8, v10 ; 36021508 v_and_b32_e32 v10, v9, v10 ; 36141509 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s0, s[8:11], 0x18 ; C2000918 s_buffer_load_dword s1, s[8:11], 0x1c ; C200891C v_mul_f32_e64 v11, |v7|, |v7| ; D210030B 00020F07 v_mad_f32 v11, |v8|, |v8|, v11 ; D282030B 042E1108 v_mad_f32 v11, |v9|, |v9|, v11 ; D282030B 042E1309 v_rsq_clamp_f32_e32 v11, v11 ; 7E16590B v_add_f32_e32 v12, 0.5, v12 ; 061818F0 v_add_f32_e32 v13, 0.5, v13 ; 061A1AF0 v_add_f32_e32 v15, 0.5, v15 ; 061E1EF0 v_floor_f32_e32 v21, v12 ; 7E2A490C v_floor_f32_e32 v17, v13 ; 7E22490D v_floor_f32_e32 v25, v15 ; 7E32490F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v13, s1 ; 7E1A0201 v_mul_f32_e32 v19, s0, v21 ; 10262A00 v_floor_f32_e32 v15, v19 ; 7E1E4913 v_mov_b32_e32 v12, 0x42800000 ; 7E1802FF 42800000 v_cmp_le_f32_e32 vcc, v12, v21 ; 7C062B0C s_and_saveexec_b64 s[2:3], vcc ; BE82246A s_xor_b64 s[2:3], exec, s[2:3] ; 8982027E v_mov_b32_e32 v12, 0xc2800000 ; 7E1802FF C2800000 v_add_f32_e32 v12, v21, v12 ; 06181915 v_mul_f32_e32 v20, s1, v12 ; 10281801 v_floor_f32_e32 v20, v20 ; 7E284914 v_mul_f32_e32 v21, s1, v20 ; 102A2801 v_mad_f32 v23, v12, s1, -v20 ; D2820017 8450030C v_floor_f32_e32 v12, v21 ; 7E184915 v_mad_f32 v20, v20, s1, -v12 ; D2820014 84300314 v_add_f32_e32 v12, 4.0, v12 ; 061818F6 s_or_saveexec_b64 s[2:3], s[2:3] ; BE822502 v_mov_b32_e32 v29, s0 ; 7E3A0200 v_mov_b32_e32 v24, v13 ; 7E30030D s_xor_b64 exec, exec, s[2:3] ; 89FE027E v_mul_f32_e32 v12, s0, v15 ; 10181E00 v_floor_f32_e32 v20, v19 ; 7E284913 v_subrev_f32_e32 v23, v20, v19 ; 0A2E2714 v_floor_f32_e32 v12, v12 ; 7E18490C v_mad_f32 v20, v15, s0, -v12 ; D2820014 8430010F v_mov_b32_e32 v24, v29 ; 7E30031D s_or_b64 exec, exec, s[2:3] ; 88FE027E v_mul_f32_e32 v26, s0, v17 ; 10342200 v_floor_f32_e32 v22, v26 ; 7E2C491A v_mov_b32_e32 v15, 0x42800000 ; 7E1E02FF 42800000 v_cmp_le_f32_e32 vcc, v15, v17 ; 7C06230F s_and_saveexec_b64 s[2:3], vcc ; BE82246A s_xor_b64 s[2:3], exec, s[2:3] ; 8982027E v_mov_b32_e32 v15, 0xc2800000 ; 7E1E02FF C2800000 v_add_f32_e32 v15, v17, v15 ; 061E1F11 v_mul_f32_e32 v17, s1, v15 ; 10221E01 v_floor_f32_e32 v17, v17 ; 7E224911 v_mul_f32_e32 v21, s1, v17 ; 102A2201 v_mad_f32 v19, v15, s1, -v17 ; D2820013 8444030F v_floor_f32_e32 v15, v21 ; 7E1E4915 v_mad_f32 v21, v17, s1, -v15 ; D2820015 843C0311 v_add_f32_e32 v15, 4.0, v15 ; 061E1EF6 s_or_saveexec_b64 s[2:3], s[2:3] ; BE822502 v_mov_b32_e32 v27, v13 ; 7E36030D s_xor_b64 exec, exec, s[2:3] ; 89FE027E v_mul_f32_e32 v15, s0, v22 ; 101E2C00 v_floor_f32_e32 v17, v26 ; 7E22491A v_subrev_f32_e32 v19, v17, v26 ; 0A263511 v_floor_f32_e32 v15, v15 ; 7E1E490F v_mad_f32 v21, v22, s0, -v15 ; D2820015 843C0116 v_mov_b32_e32 v27, v29 ; 7E36031D s_or_b64 exec, exec, s[2:3] ; 88FE027E s_buffer_load_dword s12, s[8:11], 0x20 ; C2060920 s_buffer_load_dword s13, s[8:11], 0x24 ; C2068924 v_mul_f32_e32 v31, s0, v25 ; 103E3200 v_mov_b32_e32 v17, 0x42800000 ; 7E2202FF 42800000 v_floor_f32_e32 v30, v31 ; 7E3C491F v_cmp_le_f32_e32 vcc, v17, v25 ; 7C063311 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[2:3], vcc ; BE82246A s_xor_b64 s[2:3], exec, s[2:3] ; 8982027E v_mov_b32_e32 v17, 0xc2800000 ; 7E2202FF C2800000 v_add_f32_e32 v17, v25, v17 ; 06222319 v_mul_f32_e32 v22, s1, v17 ; 102C2201 v_floor_f32_e32 v25, v22 ; 7E324916 v_mul_f32_e32 v26, s1, v25 ; 10343201 v_mad_f32 v22, v17, s1, -v25 ; D2820016 84640311 v_floor_f32_e32 v17, v26 ; 7E22491A v_mad_f32 v26, v25, s1, -v17 ; D282001A 84440319 v_add_f32_e32 v17, 4.0, v17 ; 062222F6 s_or_saveexec_b64 s[2:3], s[2:3] ; BE822502 s_buffer_load_dword s38, s[8:11], 0x0 ; C2130900 s_buffer_load_dword s39, s[8:11], 0x1 ; C2138901 s_buffer_load_dword s40, s[8:11], 0x2 ; C2140902 s_buffer_load_dword s36, s[8:11], 0x28 ; C2120928 s_buffer_load_dword s37, s[8:11], 0x2c ; C212892C s_buffer_load_dword s41, s[8:11], 0x30 ; C2148930 v_mov_b32_e32 v25, s12 ; 7E32020C v_mov_b32_e32 v28, s13 ; 7E38020D s_waitcnt lgkmcnt(0) ; BF8C007F s_xor_b64 exec, exec, s[2:3] ; 89FE027E v_mul_f32_e32 v13, s0, v30 ; 101A3C00 v_floor_f32_e32 v17, v31 ; 7E22491F v_subrev_f32_e32 v22, v17, v31 ; 0A2C3F11 v_floor_f32_e32 v17, v13 ; 7E22490D v_mad_f32 v26, v30, s0, -v17 ; D282001A 8444011E v_mov_b32_e32 v13, v29 ; 7E1A031D s_or_b64 exec, exec, s[2:3] ; 88FE027E v_cmp_neq_f32_e64 s[30:31], s0, v24 ; D01A001E 00023000 v_cmp_eq_f32_e64 s[24:25], 4.0, v12 ; D0040018 000218F6 v_cmp_eq_f32_e64 s[20:21], 2.0, v12 ; D0040014 000218F4 v_cmp_eq_f32_e64 s[16:17], 1.0, v12 ; D0040010 000218F2 v_cmp_eq_f32_e64 s[18:19], 0, v12 ; D0040012 00021880 v_cmp_neq_f32_e64 s[28:29], s0, v27 ; D01A001C 00023600 v_cmp_neq_f32_e64 s[26:27], s0, v13 ; D01A001A 00021A00 v_cmp_eq_f32_e64 s[22:23], 4.0, v15 ; D0040016 00021EF6 v_cmp_eq_f32_e64 s[14:15], 2.0, v15 ; D004000E 00021EF4 v_cmp_eq_f32_e64 s[10:11], 1.0, v15 ; D004000A 00021EF2 v_cmp_eq_f32_e64 s[12:13], 0, v15 ; D004000C 00021E80 v_cmp_eq_f32_e32 vcc, 4.0, v17 ; 7C0422F6 v_cmp_eq_f32_e64 s[0:1], 2.0, v17 ; D0040000 000222F4 v_cmp_eq_f32_e64 s[2:3], 1.0, v17 ; D0040002 000222F2 v_cmp_eq_f32_e64 s[8:9], 0, v17 ; D0040008 00022280 s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500 v_subrev_f32_e32 v29, s38, v14 ; 0A3A1C26 v_mul_f32_e32 v29, v29, v29 ; 103A3B1D v_subrev_f32_e32 v30, s39, v16 ; 0A3C2027 v_mac_f32_e32 v29, v30, v30 ; 3E3A3D1E v_subrev_f32_e32 v30, s40, v18 ; 0A3C2428 v_mac_f32_e32 v29, v30, v30 ; 3E3A3D1E v_mul_f32_e32 v29, s41, v29 ; 103A3A29 v_log_f32_e32 v29, v29 ; 7E3A4F1D v_cndmask_b32_e64 v30, v25, v28, s[30:31] ; D200001E 007A3919 v_rcp_f32_e32 v30, v30 ; 7E3C551E v_mul_f32_e32 v31, v30, v18 ; 103E251E v_floor_f32_e32 v31, v31 ; 7E3E491F v_mad_f32 v31, v18, v30, -v31 ; D282001F 847E3D12 v_mul_f32_e32 v32, v30, v16 ; 1040211E v_floor_f32_e32 v32, v32 ; 7E404920 v_mad_f32 v32, v16, v30, -v32 ; D2820020 84823D10 v_mul_f32_e32 v33, v30, v14 ; 10421D1E v_floor_f32_e32 v33, v33 ; 7E424921 v_mad_f32 v30, v14, v30, -v33 ; D282001E 84863D0E v_mov_b32_e32 v33, 0x3b000000 ; 7E4202FF 3B000000 v_mov_b32_e32 v34, 0x3b800000 ; 7E4402FF 3B800000 v_cndmask_b32_e64 v35, v34, v33, s[30:31] ; D2000023 007A4322 v_add_f32_e64 v36, s36, s36 ; D2060024 00004824 v_mad_f32 v37, -v36, v35, 1.0 ; D2820025 23CA4724 v_mul_f32_e32 v35, s36, v35 ; 10464624 v_mad_f32 v31, v37, v31, v35 ; D282001F 048E3F25 v_mad_f32 v32, v37, v32, v35 ; D2820020 048E4125 v_mac_f32_e32 v35, v37, v30 ; 3E463D25 v_cndmask_b32_e64 v30, v34, v33, s[28:29] ; D200001E 00724322 v_cndmask_b32_e64 v33, v34, v33, s[26:27] ; D2000021 006A4322 v_mul_f32_e32 v29, 0x3f317218, v29 ; 103A3AFF 3F317218 s_load_dwordx4 s[40:43], s[4:5], 0x8 ; C0940508 v_cndmask_b32_e64 v34, v25, v28, s[28:29] ; D2000022 00723919 s_load_dwordx4 s[68:71], s[4:5], 0xc ; C0A2050C v_mul_f32_e32 v37, s36, v30 ; 104A3C24 v_mul_f32_e32 v38, s36, v33 ; 104C4224 v_mul_f32_e32 v41, s37, v29 ; 10523A25 s_load_dwordx4 s[36:39], s[4:5], 0x10 ; C0920510 s_load_dwordx8 s[72:79], s[6:7], 0x18 ; C0E40718 s_load_dwordx8 s[80:87], s[6:7], 0x20 ; C0E80720 s_load_dwordx8 s[52:59], s[6:7], 0x8 ; C0DA0708 s_load_dwordx8 s[60:67], s[6:7], 0x10 ; C0DE0710 s_load_dwordx4 s[28:31], s[4:5], 0x4 ; C08E0504 s_load_dwordx8 s[44:51], s[6:7], 0x0 ; C0D60700 v_mad_f32 v39, v24, v31, v23 ; D2820027 045E3F18 v_mad_f32 v40, v24, v32, v20 ; D2820028 04524118 v_mac_f32_e32 v20, v24, v35 ; 3E284718 v_mov_b32_e32 v42, v39 ; 7E540327 v_mov_b32_e32 v43, v40 ; 7E560328 v_mov_b32_e32 v44, v41 ; 7E580329 v_mov_b32_e32 v45, v42 ; 7E5A032A v_mac_f32_e32 v23, v24, v35 ; 3E2E4718 v_mov_b32_e32 v43, v20 ; 7E560314 v_cndmask_b32_e64 v20, v25, v28, s[26:27] ; D2000014 006A3919 v_mov_b32_e32 v44, v41 ; 7E580329 v_mov_b32_e32 v24, v40 ; 7E300328 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[28:29], 10, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[80:87], s[36:39] ; F0900A00 01341C27 image_sample_l v[31:32], 10, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[72:79], s[68:71] ; F0900A00 02321F27 image_sample_l v[46:47], 10, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[60:67], s[40:43] ; F0900A00 014F2E27 image_sample_l v[48:49], 10, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[52:59], s[28:31] ; F0900A00 00ED3027 image_sample_l v[50:51], 10, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[44:51], s[32:35] ; F0900A00 010B3227 image_sample_l v[52:53], 10, 0, 0, 0, 0, 0, 0, 0, v[42:45], s[80:87], s[36:39] ; F0900A00 0134342A image_sample_l v[54:55], 10, 0, 0, 0, 0, 0, 0, 0, v[42:45], s[72:79], s[68:71] ; F0900A00 0232362A image_sample_l v[56:57], 10, 0, 0, 0, 0, 0, 0, 0, v[42:45], s[60:67], s[40:43] ; F0900A00 014F382A image_sample_l v[58:59], 10, 0, 0, 0, 0, 0, 0, 0, v[42:45], s[52:59], s[28:31] ; F0900A00 00ED3A2A image_sample_l v[42:43], 10, 0, 0, 0, 0, 0, 0, 0, v[42:45], s[44:51], s[32:35] ; F0900A00 010B2A2A v_mov_b32_e32 v25, v41 ; 7E320329 v_rcp_f32_e32 v34, v34 ; 7E445522 image_sample_l v[44:45], 10, 0, 0, 0, 0, 0, 0, 0, v[23:26], s[80:87], s[36:39] ; F0900A00 01342C17 image_sample_l v[60:61], 10, 0, 0, 0, 0, 0, 0, 0, v[23:26], s[72:79], s[68:71] ; F0900A00 02323C17 image_sample_l v[62:63], 10, 0, 0, 0, 0, 0, 0, 0, v[23:26], s[60:67], s[40:43] ; F0900A00 014F3E17 image_sample_l v[64:65], 10, 0, 0, 0, 0, 0, 0, 0, v[23:26], s[52:59], s[28:31] ; F0900A00 00ED4017 image_sample_l v[66:67], 10, 0, 0, 0, 0, 0, 0, 0, v[23:26], s[44:51], s[32:35] ; F0900A00 010B4217 v_mul_f32_e32 v23, v34, v18 ; 102E2522 v_floor_f32_e32 v23, v23 ; 7E2E4917 v_mad_f32 v23, v18, v34, -v23 ; D2820017 845E4512 v_mul_f32_e32 v24, v34, v16 ; 10302122 v_floor_f32_e32 v24, v24 ; 7E304918 v_mad_f32 v24, v16, v34, -v24 ; D2820018 84624510 v_mul_f32_e32 v25, v34, v14 ; 10321D22 v_floor_f32_e32 v25, v25 ; 7E324919 v_mad_f32 v25, v14, v34, -v25 ; D2820019 8466450E v_mad_f32 v30, -v36, v30, 1.0 ; D282001E 23CA3D24 v_mad_f32 v23, v30, v23, v37 ; D2820017 04962F1E v_mad_f32 v24, v30, v24, v37 ; D2820018 0496311E v_mac_f32_e32 v37, v30, v25 ; 3E4A331E v_mad_f32 v39, v27, v23, v19 ; D2820027 044E2F1B v_rcp_f32_e32 v20, v20 ; 7E285514 v_mad_f32 v40, v27, v24, v21 ; D2820028 0456311B v_mac_f32_e32 v21, v27, v37 ; 3E2A4B1B v_mac_f32_e32 v19, v27, v37 ; 3E264B1B v_mul_f32_e32 v23, v20, v18 ; 102E2514 v_floor_f32_e32 v23, v23 ; 7E2E4917 v_mad_f32 v18, v18, v20, -v23 ; D2820012 845E2912 v_mul_f32_e32 v23, v20, v16 ; 102E2114 v_floor_f32_e32 v23, v23 ; 7E2E4917 v_mad_f32 v16, v16, v20, -v23 ; D2820010 845E2910 v_mul_f32_e32 v23, v20, v14 ; 102E1D14 v_floor_f32_e32 v23, v23 ; 7E2E4917 v_mov_b32_e32 v68, v39 ; 7E880327 v_mov_b32_e32 v69, v40 ; 7E8A0328 v_mov_b32_e32 v70, v41 ; 7E8C0329 s_waitcnt vmcnt(5) ; BF8C0775 v_mov_b32_e32 v71, v42 ; 7E8E032A v_mad_f32 v14, v14, v20, -v23 ; D282000E 845E290E v_mov_b32_e32 v69, v21 ; 7E8A0315 v_mad_f32 v21, -v36, v33, 1.0 ; D2820015 23CA4324 v_mov_b32_e32 v20, v40 ; 7E280328 v_mad_f32 v18, v21, v18, v38 ; D2820012 049A2515 v_mad_f32 v16, v21, v16, v38 ; D2820010 049A2115 v_mac_f32_e32 v38, v21, v14 ; 3E4C1D15 v_mov_b32_e32 v70, v41 ; 7E8C0329 v_mov_b32_e32 v21, v41 ; 7E2A0329 image_sample_l v[33:34], 10, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[80:87], s[36:39] ; F0900A00 01342127 image_sample_l v[35:36], 10, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[72:79], s[68:71] ; F0900A00 02322327 image_sample_l v[72:73], 10, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[60:67], s[40:43] ; F0900A00 014F4827 image_sample_l v[74:75], 10, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[52:59], s[28:31] ; F0900A00 00ED4A27 image_sample_l v[76:77], 10, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[44:51], s[32:35] ; F0900A00 010B4C27 v_mad_f32 v39, v13, v18, v22 ; D2820027 045A250D v_mad_f32 v40, v13, v16, v26 ; D2820028 046A210D v_mac_f32_e32 v26, v13, v38 ; 3E344D0D v_mac_f32_e32 v22, v13, v38 ; 3E2C4D0D image_sample_l v[13:14], 10, 0, 0, 0, 0, 0, 0, 0, v[68:71], s[80:87], s[36:39] ; F0900A00 01340D44 image_sample_l v[37:38], 10, 0, 0, 0, 0, 0, 0, 0, v[68:71], s[72:79], s[68:71] ; F0900A00 02322544 image_sample_l v[78:79], 10, 0, 0, 0, 0, 0, 0, 0, v[68:71], s[60:67], s[40:43] ; F0900A00 014F4E44 image_sample_l v[80:81], 10, 0, 0, 0, 0, 0, 0, 0, v[68:71], s[52:59], s[28:31] ; F0900A00 00ED5044 v_mov_b32_e32 v82, v39 ; 7EA40327 v_mov_b32_e32 v83, v40 ; 7EA60328 v_mov_b32_e32 v84, v41 ; 7EA80329 v_mov_b32_e32 v85, v42 ; 7EAA032A image_sample_l v[68:69], 10, 0, 0, 0, 0, 0, 0, 0, v[68:71], s[44:51], s[32:35] ; F0900A00 010B4444 v_mov_b32_e32 v83, v26 ; 7EA6031A v_mov_b32_e32 v23, v40 ; 7E2E0328 image_sample_l v[25:26], 10, 0, 0, 0, 0, 0, 0, 0, v[19:22], s[80:87], s[36:39] ; F0900A00 01341913 image_sample_l v[70:71], 10, 0, 0, 0, 0, 0, 0, 0, v[19:22], s[72:79], s[68:71] ; F0900A00 02324613 image_sample_l v[86:87], 10, 0, 0, 0, 0, 0, 0, 0, v[19:22], s[60:67], s[40:43] ; F0900A00 014F5613 image_sample_l v[88:89], 10, 0, 0, 0, 0, 0, 0, 0, v[19:22], s[52:59], s[28:31] ; F0900A00 00ED5813 image_sample_l v[18:19], 10, 0, 0, 0, 0, 0, 0, 0, v[19:22], s[44:51], s[32:35] ; F0900A00 010B1213 v_mov_b32_e32 v84, v41 ; 7EA80329 v_mov_b32_e32 v24, v41 ; 7E300329 image_sample_l v[20:21], 10, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[80:87], s[36:39] ; F0900A00 01341427 image_sample_l v[90:91], 10, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[72:79], s[68:71] ; F0900A00 02325A27 image_sample_l v[92:93], 10, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[60:67], s[40:43] ; F0900A00 014F5C27 image_sample_l v[94:95], 10, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[52:59], s[28:31] ; F0900A00 00ED5E27 image_sample_l v[39:40], 10, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[44:51], s[32:35] ; F0900A00 010B2727 image_sample_l v[96:97], 10, 0, 0, 0, 0, 0, 0, 0, v[82:85], s[80:87], s[36:39] ; F0900A00 01346052 image_sample_l v[98:99], 10, 0, 0, 0, 0, 0, 0, 0, v[82:85], s[72:79], s[68:71] ; F0900A00 02326252 image_sample_l v[100:101], 10, 0, 0, 0, 0, 0, 0, 0, v[82:85], s[60:67], s[40:43] ; F0900A00 014F6452 image_sample_l v[102:103], 10, 0, 0, 0, 0, 0, 0, 0, v[82:85], s[52:59], s[28:31] ; F0900A00 00ED6652 image_sample_l v[82:83], 10, 0, 0, 0, 0, 0, 0, 0, v[82:85], s[44:51], s[32:35] ; F0900A00 010B5252 s_waitcnt vmcnt(14) ; BF8C077E image_sample_l v[84:85], 10, 0, 0, 0, 0, 0, 0, 0, v[22:25], s[80:87], s[36:39] ; F0900A00 01345416 image_sample_l v[104:105], 10, 0, 0, 0, 0, 0, 0, 0, v[22:25], s[72:79], s[68:71] ; F0900A00 02326816 v_cndmask_b32_e64 v16, 0, 1.0, s[24:25] ; D2000010 0061E480 v_mov_b32_e32 v27, 0x40400000 ; 7E3602FF 40400000 v_cmp_eq_f32_e64 s[4:5], v12, v27 ; D0040004 0002370C v_cndmask_b32_e64 v12, 0, 1.0, s[4:5] ; D200000C 0011E480 v_cndmask_b32_e64 v30, 0, 1.0, s[20:21] ; D200001E 0051E480 v_cndmask_b32_e64 v41, 0, 1.0, s[16:17] ; D2000029 0041E480 v_cndmask_b32_e64 v106, 0, 1.0, s[18:19] ; D200006A 0049E480 v_cndmask_b32_e64 v107, 0, 1.0, s[22:23] ; D200006B 0059E480 v_cmp_eq_f32_e64 s[4:5], v15, v27 ; D0040004 0002370F v_cndmask_b32_e64 v15, 0, 1.0, s[4:5] ; D200000F 0011E480 v_cndmask_b32_e64 v108, 0, 1.0, s[14:15] ; D200006C 0039E480 v_cndmask_b32_e64 v109, 0, 1.0, s[10:11] ; D200006D 0029E480 v_cndmask_b32_e64 v110, 0, 1.0, s[12:13] ; D200006E 0031E480 v_mul_f32_e32 v50, v106, v50 ; 1064656A v_mul_f32_e32 v51, v106, v51 ; 1066676A v_mac_f32_e32 v50, v41, v48 ; 3E646129 v_mac_f32_e32 v51, v41, v49 ; 3E666329 v_mul_f32_e32 v42, v106, v42 ; 1054556A v_mul_f32_e32 v43, v106, v43 ; 1056576A v_mac_f32_e32 v42, v41, v58 ; 3E547529 v_mac_f32_e32 v43, v41, v59 ; 3E567729 v_mul_f32_e32 v48, v106, v66 ; 1060856A v_mul_f32_e32 v49, v106, v67 ; 1062876A v_mac_f32_e32 v48, v41, v64 ; 3E608129 v_mac_f32_e32 v49, v41, v65 ; 3E628329 v_mac_f32_e32 v50, v30, v46 ; 3E645D1E v_mac_f32_e32 v51, v30, v47 ; 3E665F1E v_mac_f32_e32 v42, v30, v56 ; 3E54711E v_mac_f32_e32 v43, v30, v57 ; 3E56731E v_mac_f32_e32 v48, v30, v62 ; 3E607D1E v_mac_f32_e32 v49, v30, v63 ; 3E627F1E v_mac_f32_e32 v50, v12, v31 ; 3E643F0C v_mac_f32_e32 v51, v12, v32 ; 3E66410C v_mac_f32_e32 v42, v12, v54 ; 3E546D0C v_mac_f32_e32 v43, v12, v55 ; 3E566F0C v_mac_f32_e32 v48, v12, v60 ; 3E60790C v_mac_f32_e32 v49, v12, v61 ; 3E627B0C v_mac_f32_e32 v50, v16, v28 ; 3E643910 v_mac_f32_e32 v51, v16, v29 ; 3E663B10 v_mac_f32_e32 v42, v16, v52 ; 3E546910 v_mac_f32_e32 v43, v16, v53 ; 3E566B10 v_mac_f32_e32 v48, v16, v44 ; 3E605910 v_mac_f32_e32 v49, v16, v45 ; 3E625B10 v_mul_f32_e32 v12, v110, v76 ; 1018996E v_mul_f32_e32 v16, v110, v77 ; 10209B6E v_mac_f32_e32 v12, v109, v74 ; 3E18956D v_mac_f32_e32 v16, v109, v75 ; 3E20976D v_mul_f32_e32 v28, v110, v68 ; 1038896E v_mul_f32_e32 v29, v110, v69 ; 103A8B6E v_mac_f32_e32 v28, v109, v80 ; 3E38A16D v_mac_f32_e32 v29, v109, v81 ; 3E3AA36D s_waitcnt vmcnt(12) ; BF8C077C v_mul_f32_e32 v18, v110, v18 ; 1024256E v_mul_f32_e32 v19, v110, v19 ; 1026276E v_mac_f32_e32 v18, v109, v88 ; 3E24B16D v_mac_f32_e32 v19, v109, v89 ; 3E26B36D v_mac_f32_e32 v12, v108, v72 ; 3E18916C v_mac_f32_e32 v16, v108, v73 ; 3E20936C v_mac_f32_e32 v28, v108, v78 ; 3E389D6C v_mac_f32_e32 v29, v108, v79 ; 3E3A9F6C v_mac_f32_e32 v18, v108, v86 ; 3E24AD6C v_mac_f32_e32 v19, v108, v87 ; 3E26AF6C v_mac_f32_e32 v12, v15, v35 ; 3E18470F v_mac_f32_e32 v16, v15, v36 ; 3E20490F v_mac_f32_e32 v28, v15, v37 ; 3E384B0F v_mac_f32_e32 v29, v15, v38 ; 3E3A4D0F v_mac_f32_e32 v18, v15, v70 ; 3E248D0F v_mac_f32_e32 v19, v15, v71 ; 3E268F0F v_mac_f32_e32 v12, v107, v33 ; 3E18436B v_mac_f32_e32 v16, v107, v34 ; 3E20456B v_mac_f32_e32 v28, v107, v13 ; 3E381B6B v_mac_f32_e32 v29, v107, v14 ; 3E3A1D6B v_mac_f32_e32 v18, v107, v25 ; 3E24336B v_mac_f32_e32 v19, v107, v26 ; 3E26356B image_sample_l v[13:14], 10, 0, 0, 0, 0, 0, 0, 0, v[22:25], s[60:67], s[40:43] ; F0900A00 014F0D16 image_sample_l v[25:26], 10, 0, 0, 0, 0, 0, 0, 0, v[22:25], s[52:59], s[28:31] ; F0900A00 00ED1916 s_waitcnt vmcnt(0) ; BF8C0770 image_sample_l v[22:23], 10, 0, 0, 0, 0, 0, 0, 0, v[22:25], s[44:51], s[32:35] ; F0900A00 010B1616 v_cndmask_b32_e64 v15, 0, 1.0, s[8:9] ; D200000F 0021E480 v_mul_f32_e32 v24, v15, v39 ; 10304F0F v_mul_f32_e32 v30, v15, v40 ; 103C510F v_cndmask_b32_e64 v31, 0, 1.0, s[2:3] ; D200001F 0009E480 v_mac_f32_e32 v24, v31, v94 ; 3E30BD1F v_mac_f32_e32 v30, v31, v95 ; 3E3CBF1F v_mul_f32_e32 v32, v15, v82 ; 1040A50F v_mul_f32_e32 v33, v15, v83 ; 1042A70F v_mac_f32_e32 v32, v31, v102 ; 3E40CD1F v_mac_f32_e32 v33, v31, v103 ; 3E42CF1F s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v22, v15, v22 ; 102C2D0F v_mul_f32_e32 v15, v15, v23 ; 101E2F0F v_mac_f32_e32 v22, v31, v25 ; 3E2C331F v_mac_f32_e32 v15, v31, v26 ; 3E1E351F v_cndmask_b32_e64 v23, 0, 1.0, s[0:1] ; D2000017 0001E480 v_mac_f32_e32 v24, v23, v92 ; 3E30B917 v_mac_f32_e32 v30, v23, v93 ; 3E3CBB17 v_mac_f32_e32 v32, v23, v100 ; 3E40C917 v_mac_f32_e32 v33, v23, v101 ; 3E42CB17 v_mac_f32_e32 v22, v23, v13 ; 3E2C1B17 v_mac_f32_e32 v15, v23, v14 ; 3E1E1D17 v_cmp_eq_f32_e64 s[0:1], v17, v27 ; D0040000 00023711 v_cndmask_b32_e64 v13, 0, 1.0, s[0:1] ; D200000D 0001E480 v_mac_f32_e32 v24, v13, v90 ; 3E30B50D v_mac_f32_e32 v30, v13, v91 ; 3E3CB70D v_mac_f32_e32 v32, v13, v98 ; 3E40C50D v_mac_f32_e32 v33, v13, v99 ; 3E42C70D v_mac_f32_e32 v22, v13, v104 ; 3E2CD10D v_mac_f32_e32 v15, v13, v105 ; 3E1ED30D v_mov_b32_e32 v13, 0xbe4ccccd ; 7E1A02FF BE4CCCCD v_mad_f32 v0, v11, v0, v13 ; D2820000 0436010B v_mad_f32 v1, v11, v1, v13 ; D2820001 0436030B v_mac_f32_e32 v13, v11, v10 ; 3E1A150B v_cndmask_b32_e64 v10, 0, 1.0, vcc ; D200000A 01A9E480 v_mac_f32_e32 v24, v10, v20 ; 3E30290A v_mac_f32_e32 v30, v10, v21 ; 3E3C2B0A v_mov_b32_e32 v11, 0x40e00000 ; 7E1602FF 40E00000 v_mul_f32_e32 v0, v11, v0 ; 1000010B v_mul_f32_e32 v1, v11, v1 ; 1002030B v_mul_f32_e32 v11, v11, v13 ; 10161B0B v_mov_b32_e32 v13, 0x3c23d70a ; 7E1A02FF 3C23D70A v_max_f32_e32 v0, v13, v0 ; 2000010D v_max_f32_e32 v1, v13, v1 ; 2002030D v_max_f32_e32 v11, v13, v11 ; 2016170D v_add_f32_e32 v13, v1, v0 ; 061A0101 v_add_f32_e32 v13, v11, v13 ; 061A1B0B v_rcp_f32_e32 v13, v13 ; 7E1A550D v_mac_f32_e32 v32, v10, v96 ; 3E40C10A v_mac_f32_e32 v33, v10, v97 ; 3E42C30A v_mac_f32_e32 v22, v10, v84 ; 3E2CA90A v_mac_f32_e32 v15, v10, v85 ; 3E1EAB0A v_mul_f32_e32 v0, v13, v0 ; 1000010D v_mul_f32_e32 v1, v13, v1 ; 1002030D v_mul_f32_e32 v10, v13, v11 ; 1014170D v_mad_f32 v11, 2.0, v50, -1.0 ; D282000B 03CE64F4 v_mad_f32 v13, 2.0, v43, -1.0 ; D282000D 03CE56F4 v_mul_f32_e32 v11, v0, v11 ; 10161700 v_mac_f32_e32 v11, v1, v13 ; 3E161B01 v_mad_f32 v13, 2.0, v42, -1.0 ; D282000D 03CE54F4 v_mad_f32 v14, 2.0, v49, -1.0 ; D282000E 03CE62F4 v_mul_f32_e32 v17, 0, v0 ; 10220080 v_mad_f32 v13, v1, v13, v17 ; D282000D 04461B01 v_mac_f32_e32 v13, v10, v14 ; 3E1A1D0A v_mad_f32 v14, 2.0, v51, -1.0 ; D282000E 03CE66F4 v_mad_f32 v20, 2.0, v48, -1.0 ; D2820014 03CE60F4 v_mul_f32_e32 v14, v0, v14 ; 101C1D00 v_mac_f32_e32 v14, 0, v1 ; 3E1C0280 v_mac_f32_e32 v14, v10, v20 ; 3E1C290A v_mad_f32 v12, 2.0, v12, -1.0 ; D282000C 03CE18F4 v_mad_f32 v20, 2.0, v29, -1.0 ; D2820014 03CE3AF4 v_mul_f32_e32 v12, v0, v12 ; 10181900 v_mac_f32_e32 v12, v1, v20 ; 3E182901 v_mad_f32 v20, 2.0, v28, -1.0 ; D2820014 03CE38F4 v_mad_f32 v19, 2.0, v19, -1.0 ; D2820013 03CE26F4 v_mad_f32 v20, v1, v20, v17 ; D2820014 04462901 v_mac_f32_e32 v20, v10, v19 ; 3E28270A v_mad_f32 v16, 2.0, v16, -1.0 ; D2820010 03CE20F4 v_mad_f32 v18, 2.0, v18, -1.0 ; D2820012 03CE24F4 v_mul_f32_e32 v16, v0, v16 ; 10202100 v_mac_f32_e32 v16, 0, v1 ; 3E200280 v_mac_f32_e32 v16, v10, v18 ; 3E20250A v_mad_f32 v18, 2.0, v30, -1.0 ; D2820012 03CE3CF4 v_mad_f32 v19, 2.0, v24, -1.0 ; D2820013 03CE30F4 v_mul_f32_e32 v18, v0, v18 ; 10242500 v_mul_f32_e32 v0, v0, v19 ; 10002700 v_mad_f32 v19, 2.0, v32, -1.0 ; D2820013 03CE40F4 v_mac_f32_e32 v17, v1, v19 ; 3E222701 v_mad_f32 v19, 2.0, v33, -1.0 ; D2820013 03CE42F4 v_mac_f32_e32 v0, v1, v19 ; 3E002701 v_mac_f32_e32 v18, 0, v1 ; 3E240280 v_mad_f32 v1, 2.0, v15, -1.0 ; D2820001 03CE1EF4 v_mac_f32_e32 v17, v10, v1 ; 3E22030A v_mad_f32 v1, 2.0, v22, -1.0 ; D2820001 03CE2CF4 v_mac_f32_e32 v18, v10, v1 ; 3E24030A v_mac_f32_e32 v11, 0, v10 ; 3E161480 v_mac_f32_e32 v12, 0, v10 ; 3E181480 v_mac_f32_e32 v0, 0, v10 ; 3E001480 v_mul_f32_e32 v1, v13, v4 ; 1002090D v_mul_f32_e32 v10, v14, v4 ; 1014090E v_mul_f32_e32 v4, v11, v4 ; 1008090B v_mac_f32_e32 v1, v20, v5 ; 3E020B14 v_mac_f32_e32 v10, v16, v5 ; 3E140B10 v_mac_f32_e32 v4, v12, v5 ; 3E080B0C v_mac_f32_e32 v1, v17, v6 ; 3E020D11 v_mac_f32_e32 v10, v18, v6 ; 3E140D12 v_mac_f32_e32 v4, v0, v6 ; 3E080D00 v_mul_f32_e32 v0, v10, v10 ; 1000150A v_mac_f32_e32 v0, v1, v1 ; 3E000301 v_mac_f32_e32 v0, v4, v4 ; 3E000904 v_add_f32_e32 v0, 1.0, v0 ; 060000F2 v_rsq_clamp_f32_e32 v0, v0 ; 7E005900 v_mul_f32_e32 v1, v0, v1 ; 10020300 v_mul_f32_e32 v5, v0, v10 ; 100A1500 v_mul_f32_e32 v0, v0, v4 ; 10000900 v_mad_f32 v1, -v1, v2, v7 ; D2820001 241E0501 v_mad_f32 v4, -v5, v2, v8 ; D2820004 24220505 v_mad_f32 v0, -v0, v2, v9 ; D2820000 24260500 v_mul_f32_e32 v2, v1, v1 ; 10040301 v_mac_f32_e32 v2, v4, v4 ; 3E040904 v_mac_f32_e32 v2, v0, v0 ; 3E040100 v_rsq_clamp_f32_e32 v2, v2 ; 7E045902 v_mul_f32_e32 v1, v2, v1 ; 10020302 v_mul_f32_e32 v4, v2, v4 ; 10080902 v_mul_f32_e32 v0, v2, v0 ; 10000102 v_mad_f32 v1, 0.5, v1, 0.5 ; D2820001 03C202F0 v_mad_f32 v2, 0.5, v4, 0.5 ; D2820002 03C208F0 v_mad_f32 v0, 0.5, v0, 0.5 ; D2820000 03C200F0 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_cvt_pkrtz_f16_f32_e32 v0, v0, v3 ; 5E000700 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 96 VGPRS: 112 Code Size: 2716 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL CONST[0..17] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: FSNE TEMP[0].x, CONST[1].zzzz, IMM[0].xxxx 1: UIF TEMP[0].xxxx :0 2: MUL TEMP[0], CONST[2], IN[0].xxxx 3: MAD TEMP[0], CONST[3], IN[0].yyyy, TEMP[0] 4: MAD TEMP[0], CONST[4], IN[0].zzzz, TEMP[0] 5: ADD TEMP[0].xyz, TEMP[0], CONST[5] 6: MOV TEMP[1].x, CONST[6].xxxx 7: MOV TEMP[1].y, CONST[7].xxxx 8: MOV TEMP[1].z, CONST[8].xxxx 9: MOV TEMP[2].x, CONST[6].yyyy 10: MOV TEMP[2].y, CONST[7].yyyy 11: MOV TEMP[2].z, CONST[8].yyyy 12: MOV TEMP[3].x, CONST[6].zzzz 13: MOV TEMP[3].y, CONST[7].zzzz 14: MOV TEMP[3].z, CONST[8].zzzz 15: MUL TEMP[1].xyz, TEMP[1].xyzz, IN[1].xxxx 16: MAD TEMP[1].xyz, TEMP[2].xyzz, IN[1].yyyy, TEMP[1].xyzz 17: MAD TEMP[1].xyz, TEMP[3].xyzz, IN[1].zzzz, TEMP[1].xyzz 18: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz 19: RSQ TEMP[2].x, TEMP[2].xxxx 20: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx 21: MUL TEMP[2].xyz, TEMP[0].xyzz, CONST[0].wwww 22: ADD TEMP[2].xyz, CONST[0].xyzz, -TEMP[2].xyzz 23: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz 24: RSQ TEMP[3].x, TEMP[3].xxxx 25: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx 26: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[2].xyzz 27: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[2].xxxx 28: ADD TEMP[2].x, IMM[0].yyyy, -TEMP[2].xxxx 29: SQRT TEMP[2].x, TEMP[2].xxxx 30: MUL TEMP[2].x, CONST[1].zzzz, TEMP[2].xxxx 31: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx 32: ADD TEMP[0].xyz, TEMP[0].xyzz, -TEMP[1].xyzz 33: MUL TEMP[1], CONST[10], TEMP[0].xxxx 34: MAD TEMP[1], CONST[11], TEMP[0].yyyy, TEMP[1] 35: MAD TEMP[0], CONST[12], TEMP[0].zzzz, TEMP[1] 36: ADD TEMP[0], TEMP[0], CONST[13] 37: ELSE :0 38: MUL TEMP[1], CONST[14], IN[0].xxxx 39: MAD TEMP[1], CONST[15], IN[0].yyyy, TEMP[1] 40: MAD TEMP[1], CONST[16], IN[0].zzzz, TEMP[1] 41: ADD TEMP[0], TEMP[1], CONST[17] 42: ENDIF 43: MOV TEMP[1].xyw, TEMP[0].xyxw 44: RCP TEMP[2].x, TEMP[0].wwww 45: MUL TEMP[2].x, CONST[1].xxxx, TEMP[2].xxxx 46: MOV_SAT TEMP[2].x, TEMP[2].xxxx 47: ADD TEMP[2].x, TEMP[0].zzzz, TEMP[2].xxxx 48: MAX TEMP[0].x, TEMP[2].xxxx, -TEMP[0].wwww 49: LRP TEMP[0].x, CONST[1].yyyy, TEMP[0].xxxx, TEMP[2].xxxx 50: MOV TEMP[1].z, TEMP[0].xxxx 51: MOV OUT[0], TEMP[1] 52: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %17 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 %19 = add i32 %5, %7 %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %19) %21 = extractelement <4 x float> %20, i32 0 %22 = extractelement <4 x float> %20, i32 1 %23 = extractelement <4 x float> %20, i32 2 %24 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = add i32 %5, %7 %27 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %25, i32 0, i32 %26) %28 = extractelement <4 x float> %27, i32 0 %29 = extractelement <4 x float> %27, i32 1 %30 = extractelement <4 x float> %27, i32 2 %31 = fcmp une float %16, 0.000000e+00 br i1 %31, label %IF, label %ELSE IF: ; preds = %main_body %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 204) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 172) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %72 = fmul float %68, %21 %73 = fmul float %67, %21 %74 = fmul float %66, %21 %75 = fmul float %65, %22 %76 = fadd float %75, %72 %77 = fmul float %64, %22 %78 = fadd float %77, %73 %79 = fmul float %63, %22 %80 = fadd float %79, %74 %81 = fmul float %62, %23 %82 = fadd float %81, %76 %83 = fmul float %61, %23 %84 = fadd float %83, %78 %85 = fmul float %60, %23 %86 = fadd float %85, %80 %87 = fadd float %82, %59 %88 = fadd float %84, %58 %89 = fadd float %86, %57 %90 = fmul float %56, %28 %91 = fmul float %53, %28 %92 = fmul float %50, %28 %93 = fmul float %55, %29 %94 = fadd float %93, %90 %95 = fmul float %52, %29 %96 = fadd float %95, %91 %97 = fmul float %49, %29 %98 = fadd float %97, %92 %99 = fmul float %54, %30 %100 = fadd float %99, %94 %101 = fmul float %51, %30 %102 = fadd float %101, %96 %103 = fmul float %48, %30 %104 = fadd float %103, %98 %105 = fmul float %100, %100 %106 = fmul float %102, %102 %107 = fadd float %106, %105 %108 = fmul float %104, %104 %109 = fadd float %107, %108 %110 = call float @llvm.AMDGPU.rsq.clamped.f32(float %109) %111 = fmul float %100, %110 %112 = fmul float %102, %110 %113 = fmul float %104, %110 %114 = fmul float %87, %13 %115 = fmul float %88, %13 %116 = fmul float %89, %13 %117 = fsub float %71, %114 %118 = fsub float %70, %115 %119 = fsub float %69, %116 %120 = fmul float %117, %117 %121 = fmul float %118, %118 %122 = fadd float %121, %120 %123 = fmul float %119, %119 %124 = fadd float %122, %123 %125 = call float @llvm.AMDGPU.rsq.clamped.f32(float %124) %126 = fmul float %117, %125 %127 = fmul float %118, %125 %128 = fmul float %119, %125 %129 = fmul float %111, %126 %130 = fmul float %112, %127 %131 = fadd float %130, %129 %132 = fmul float %113, %128 %133 = fadd float %131, %132 %134 = fmul float %133, %133 %135 = fsub float 1.000000e+00, %134 %136 = call float @llvm.sqrt.f32(float %135) %137 = fmul float %16, %136 %138 = fmul float %111, %137 %139 = fmul float %112, %137 %140 = fmul float %113, %137 %141 = fsub float %87, %138 %142 = fsub float %88, %139 %143 = fsub float %89, %140 %144 = fmul float %47, %141 %145 = fmul float %46, %141 %146 = fmul float %45, %141 %147 = fmul float %44, %141 %148 = fmul float %43, %142 %149 = fadd float %148, %144 %150 = fmul float %42, %142 %151 = fadd float %150, %145 %152 = fmul float %41, %142 %153 = fadd float %152, %146 %154 = fmul float %40, %142 %155 = fadd float %154, %147 %156 = fmul float %39, %143 %157 = fadd float %156, %149 %158 = fmul float %38, %143 %159 = fadd float %158, %151 %160 = fmul float %37, %143 %161 = fadd float %160, %153 %162 = fmul float %36, %143 %163 = fadd float %162, %155 %164 = fadd float %157, %35 %165 = fadd float %159, %34 %166 = fadd float %161, %33 %167 = fadd float %163, %32 br label %ENDIF ELSE: ; preds = %main_body %168 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284) %169 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280) %170 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276) %171 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272) %172 = call float @llvm.SI.load.const(<16 x i8> %12, i32 268) %173 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264) %174 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260) %175 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256) %176 = call float @llvm.SI.load.const(<16 x i8> %12, i32 252) %177 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248) %178 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244) %179 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240) %180 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236) %181 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232) %182 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228) %183 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224) %184 = fmul float %183, %21 %185 = fmul float %182, %21 %186 = fmul float %181, %21 %187 = fmul float %180, %21 %188 = fmul float %179, %22 %189 = fadd float %188, %184 %190 = fmul float %178, %22 %191 = fadd float %190, %185 %192 = fmul float %177, %22 %193 = fadd float %192, %186 %194 = fmul float %176, %22 %195 = fadd float %194, %187 %196 = fmul float %175, %23 %197 = fadd float %196, %189 %198 = fmul float %174, %23 %199 = fadd float %198, %191 %200 = fmul float %173, %23 %201 = fadd float %200, %193 %202 = fmul float %172, %23 %203 = fadd float %202, %195 %204 = fadd float %197, %171 %205 = fadd float %199, %170 %206 = fadd float %201, %169 %207 = fadd float %203, %168 br label %ENDIF ENDIF: ; preds = %ELSE, %IF %temp.0 = phi float [ %164, %IF ], [ %204, %ELSE ] %temp1.0 = phi float [ %165, %IF ], [ %205, %ELSE ] %temp2.0 = phi float [ %166, %IF ], [ %206, %ELSE ] %temp3.0 = phi float [ %167, %IF ], [ %207, %ELSE ] %208 = fdiv float 1.000000e+00, %temp3.0 %209 = fmul float %14, %208 %210 = call float @llvm.AMDIL.clamp.(float %209, float 0.000000e+00, float 1.000000e+00) %211 = fadd float %temp2.0, %210 %212 = fsub float -0.000000e+00, %temp3.0 %213 = call float @llvm.maxnum.f32(float %211, float %212) %214 = call float @llvm.AMDGPU.lrp(float %15, float %213, float %211) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %temp.0, float %temp1.0, float %214, float %temp3.0) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v4, s10, v0 ; 4A08000A s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v4, s[4:7], 0 idxen ; E00C2000 80010004 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[3:6], v4, s[8:11], 0 idxen ; E00C2000 80020304 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_cmp_eq_f32_e64 s[6:7], 0, s4 ; D0040006 00000880 s_and_saveexec_b64 s[6:7], s[6:7] ; BE862406 s_xor_b64 s[6:7], exec, s[6:7] ; 8986067E s_cbranch_execz BB0_1 ; BF880000 s_buffer_load_dword s5, s[0:3], 0x47 ; C2028147 s_buffer_load_dword s8, s[0:3], 0x42 ; C2040142 s_buffer_load_dword s9, s[0:3], 0x43 ; C2048143 s_buffer_load_dword s10, s[0:3], 0x44 ; C2050144 s_buffer_load_dword s11, s[0:3], 0x45 ; C2058145 s_buffer_load_dword s12, s[0:3], 0x46 ; C2060146 s_buffer_load_dword s13, s[0:3], 0x3d ; C206813D s_buffer_load_dword s14, s[0:3], 0x3e ; C207013E s_buffer_load_dword s15, s[0:3], 0x3f ; C207813F s_buffer_load_dword s16, s[0:3], 0x40 ; C2080140 s_buffer_load_dword s17, s[0:3], 0x41 ; C2088141 s_buffer_load_dword s18, s[0:3], 0x38 ; C2090138 s_buffer_load_dword s19, s[0:3], 0x39 ; C2098139 s_buffer_load_dword s20, s[0:3], 0x3a ; C20A013A s_buffer_load_dword s21, s[0:3], 0x3b ; C20A813B s_buffer_load_dword s22, s[0:3], 0x3c ; C20B013C s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s18, v0 ; 100C0012 v_mul_f32_e32 v7, s19, v0 ; 100E0013 v_mul_f32_e32 v9, s20, v0 ; 10120014 v_mul_f32_e32 v10, s21, v0 ; 10140015 v_mac_f32_e32 v6, s22, v1 ; 3E0C0216 v_mac_f32_e32 v7, s13, v1 ; 3E0E020D v_mac_f32_e32 v9, s14, v1 ; 3E12020E v_mac_f32_e32 v10, s15, v1 ; 3E14020F v_mac_f32_e32 v6, s16, v2 ; 3E0C0410 v_mac_f32_e32 v7, s17, v2 ; 3E0E0411 v_mac_f32_e32 v9, s8, v2 ; 3E120408 v_mac_f32_e32 v10, s9, v2 ; 3E140409 v_add_f32_e32 v6, s10, v6 ; 060C0C0A v_add_f32_e32 v8, s11, v7 ; 06100E0B v_add_f32_e32 v9, s12, v9 ; 0612120C v_add_f32_e32 v7, s5, v10 ; 060E1405 s_or_saveexec_b64 s[6:7], s[6:7] ; BE862506 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_waitcnt lgkmcnt(0) ; BF8C007F s_xor_b64 exec, exec, s[6:7] ; 89FE067E s_cbranch_execz BB0_4 ; BF880000 s_buffer_load_dword s9, s[0:3], 0x37 ; C2048137 s_buffer_load_dword s10, s[0:3], 0x32 ; C2050132 s_buffer_load_dword s11, s[0:3], 0x33 ; C2058133 s_buffer_load_dword s12, s[0:3], 0x34 ; C2060134 s_buffer_load_dword s13, s[0:3], 0x35 ; C2068135 s_buffer_load_dword s14, s[0:3], 0x36 ; C2070136 s_buffer_load_dword s15, s[0:3], 0x2d ; C207812D s_buffer_load_dword s16, s[0:3], 0x2e ; C208012E s_buffer_load_dword s17, s[0:3], 0x2f ; C208812F s_buffer_load_dword s18, s[0:3], 0x30 ; C2090130 s_buffer_load_dword s19, s[0:3], 0x31 ; C2098131 s_buffer_load_dword s20, s[0:3], 0x28 ; C20A0128 s_buffer_load_dword s21, s[0:3], 0x29 ; C20A8129 s_buffer_load_dword s22, s[0:3], 0x2a ; C20B012A s_buffer_load_dword s23, s[0:3], 0x2b ; C20B812B s_buffer_load_dword s24, s[0:3], 0x2c ; C20C012C s_buffer_load_dword s25, s[0:3], 0x1d ; C20C811D s_buffer_load_dword s26, s[0:3], 0x1e ; C20D011E s_buffer_load_dword s27, s[0:3], 0x20 ; C20D8120 s_buffer_load_dword s28, s[0:3], 0x21 ; C20E0121 s_buffer_load_dword s29, s[0:3], 0x22 ; C20E8122 s_buffer_load_dword s30, s[0:3], 0x16 ; C20F0116 s_buffer_load_dword s31, s[0:3], 0x18 ; C20F8118 s_buffer_load_dword s32, s[0:3], 0x19 ; C2100119 s_buffer_load_dword s33, s[0:3], 0x1a ; C210811A s_buffer_load_dword s34, s[0:3], 0x1c ; C211011C s_buffer_load_dword s35, s[0:3], 0x10 ; C2118110 s_buffer_load_dword s36, s[0:3], 0x11 ; C2120111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s27, v3 ; 100C061B v_mac_f32_e32 v6, s28, v4 ; 3E0C081C v_mac_f32_e32 v6, s29, v5 ; 3E0C0A1D s_buffer_load_dword s27, s[0:3], 0x12 ; C20D8112 v_mul_f32_e32 v7, s31, v3 ; 100E061F v_mac_f32_e32 v7, s32, v4 ; 3E0E0820 v_mac_f32_e32 v7, s33, v5 ; 3E0E0A21 v_mul_f32_e32 v3, s34, v3 ; 10060622 v_mac_f32_e32 v3, s25, v4 ; 3E060819 v_mac_f32_e32 v3, s26, v5 ; 3E060A1A s_buffer_load_dword s25, s[0:3], 0x14 ; C20C8114 s_buffer_load_dword s26, s[0:3], 0x15 ; C20D0115 s_buffer_load_dword s28, s[0:3], 0x9 ; C20E0109 s_buffer_load_dword s29, s[0:3], 0xa ; C20E810A s_buffer_load_dword s31, s[0:3], 0xc ; C20F810C s_buffer_load_dword s32, s[0:3], 0xd ; C210010D s_buffer_load_dword s33, s[0:3], 0xe ; C210810E s_buffer_load_dword s34, s[0:3], 0x0 ; C2110100 v_mul_f32_e32 v4, v7, v7 ; 10080F07 v_mac_f32_e32 v4, v3, v3 ; 3E080703 v_mac_f32_e32 v4, v6, v6 ; 3E080D06 v_rsq_clamp_f32_e32 v4, v4 ; 7E085904 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s28, v0 ; 100A001C v_mul_f32_e32 v8, s29, v0 ; 1010001D s_buffer_load_dword s28, s[0:3], 0x1 ; C20E0101 v_mac_f32_e32 v5, s32, v1 ; 3E0A0220 v_mac_f32_e32 v8, s33, v1 ; 3E100221 v_mac_f32_e32 v5, s36, v2 ; 3E0A0424 v_mac_f32_e32 v8, s27, v2 ; 3E10041B v_add_f32_e32 v5, s26, v5 ; 060A0A1A v_add_f32_e32 v8, s30, v8 ; 0610101E s_buffer_load_dword s26, s[0:3], 0x2 ; C20D0102 s_buffer_load_dword s27, s[0:3], 0x3 ; C20D8103 s_buffer_load_dword s29, s[0:3], 0x8 ; C20E8108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v9, s27 ; 7E12021B v_mad_f32 v10, -v5, v9, s28 ; D282000A 20721305 v_mul_f32_e32 v0, s29, v0 ; 1000001D v_mac_f32_e32 v0, s31, v1 ; 3E00021F v_mac_f32_e32 v0, s35, v2 ; 3E000423 v_add_f32_e32 v0, s25, v0 ; 06000019 v_mad_f32 v1, -v0, v9, s34 ; D2820001 208A1300 v_mad_f32 v2, -v8, v9, s26 ; D2820002 206A1308 v_mul_f32_e32 v9, v1, v1 ; 10120301 v_mac_f32_e32 v9, v10, v10 ; 3E12150A v_mac_f32_e32 v9, v2, v2 ; 3E120502 v_rsq_clamp_f32_e32 v9, v9 ; 7E125909 v_mul_f32_e32 v7, v4, v7 ; 100E0F04 v_mul_f32_e32 v3, v4, v3 ; 10060704 v_mul_f32_e32 v4, v4, v6 ; 10080D04 v_mul_f32_e32 v1, v9, v1 ; 10020309 v_mul_f32_e32 v6, v9, v10 ; 100C1509 v_mul_f32_e32 v2, v9, v2 ; 10040509 v_mul_f32_e32 v1, v1, v7 ; 10020F01 v_mac_f32_e32 v1, v6, v3 ; 3E020706 v_mac_f32_e32 v1, v2, v4 ; 3E020902 v_mad_f32 v1, -v1, v1, 1.0 ; D2820001 23CA0301 v_sqrt_f32_e32 v1, v1 ; 7E026701 v_mul_f32_e32 v1, s4, v1 ; 10020204 v_mad_f32 v0, -v7, v1, v0 ; D2820000 24020307 v_mad_f32 v2, -v3, v1, v5 ; D2820002 24160303 v_mad_f32 v1, -v4, v1, v8 ; D2820001 24220304 v_mul_f32_e32 v3, s20, v0 ; 10060014 v_mul_f32_e32 v4, s21, v0 ; 10080015 v_mul_f32_e32 v5, s22, v0 ; 100A0016 v_mul_f32_e32 v0, s23, v0 ; 10000017 v_mac_f32_e32 v3, s24, v2 ; 3E060418 v_mac_f32_e32 v4, s15, v2 ; 3E08040F v_mac_f32_e32 v5, s16, v2 ; 3E0A0410 v_mac_f32_e32 v0, s17, v2 ; 3E000411 v_mac_f32_e32 v3, s18, v1 ; 3E060212 v_mac_f32_e32 v4, s19, v1 ; 3E080213 v_mac_f32_e32 v5, s10, v1 ; 3E0A020A v_mac_f32_e32 v0, s11, v1 ; 3E00020B v_add_f32_e32 v6, s12, v3 ; 060C060C v_add_f32_e32 v8, s13, v4 ; 0610080D v_add_f32_e32 v9, s14, v5 ; 06120A0E v_add_f32_e32 v7, s9, v0 ; 060E0009 s_or_b64 exec, exec, s[6:7] ; 88FE067E v_rcp_f32_e32 v0, v7 ; 7E005507 v_sub_f32_e64 v1, 1.0, s5 ; D2080001 00000AF2 v_mul_f32_e32 v0, s8, v0 ; 10000008 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_add_f32_e32 v0, v0, v9 ; 06001300 v_max_f32_e64 v2, v0, -v7 ; D2200002 40020F00 v_mul_f32_e32 v0, v0, v1 ; 10000300 v_mac_f32_e32 v0, s5, v2 ; 3E000405 exp 15, 12, 0, 1, 0, v6, v8, v0, v7 ; F80008CF 07000806 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 12 Code Size: 744 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL OUT[0], COLOR IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MOV OUT[0], IMM[0].xxxx 1: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00) %23 = bitcast i32 %22 to float %24 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00) %25 = bitcast i32 %24 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %23, float %25, float %23, float %25) ret void } ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: v_cvt_pkrtz_f16_f32_e64 v0, 0, 0 ; D25E0000 00010080 exp 15, 0, 1, 1, 1, v0, v0, v0, v0 ; F8001C0F 00000000 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 8 VGPRS: 4 Code Size: 20 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL CONST[0..17] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: FSNE TEMP[0].x, CONST[1].zzzz, IMM[0].xxxx 1: UIF TEMP[0].xxxx :0 2: MUL TEMP[0], CONST[2], IN[0].xxxx 3: MAD TEMP[0], CONST[3], IN[0].yyyy, TEMP[0] 4: MAD TEMP[0], CONST[4], IN[0].zzzz, TEMP[0] 5: ADD TEMP[0].xyz, TEMP[0], CONST[5] 6: MOV TEMP[1].x, CONST[6].xxxx 7: MOV TEMP[1].y, CONST[7].xxxx 8: MOV TEMP[1].z, CONST[8].xxxx 9: MOV TEMP[2].x, CONST[6].yyyy 10: MOV TEMP[2].y, CONST[7].yyyy 11: MOV TEMP[2].z, CONST[8].yyyy 12: MOV TEMP[3].x, CONST[6].zzzz 13: MOV TEMP[3].y, CONST[7].zzzz 14: MOV TEMP[3].z, CONST[8].zzzz 15: MUL TEMP[1].xyz, TEMP[1].xyzz, IN[1].xxxx 16: MAD TEMP[1].xyz, TEMP[2].xyzz, IN[1].yyyy, TEMP[1].xyzz 17: MAD TEMP[1].xyz, TEMP[3].xyzz, IN[1].zzzz, TEMP[1].xyzz 18: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz 19: RSQ TEMP[2].x, TEMP[2].xxxx 20: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx 21: MUL TEMP[2].xyz, TEMP[0].xyzz, CONST[0].wwww 22: ADD TEMP[2].xyz, CONST[0].xyzz, -TEMP[2].xyzz 23: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz 24: RSQ TEMP[3].x, TEMP[3].xxxx 25: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx 26: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[2].xyzz 27: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[2].xxxx 28: ADD TEMP[2].x, IMM[0].yyyy, -TEMP[2].xxxx 29: SQRT TEMP[2].x, TEMP[2].xxxx 30: MUL TEMP[2].x, CONST[1].zzzz, TEMP[2].xxxx 31: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx 32: ADD TEMP[0].xyz, TEMP[0].xyzz, -TEMP[1].xyzz 33: MUL TEMP[1], CONST[10], TEMP[0].xxxx 34: MAD TEMP[1], CONST[11], TEMP[0].yyyy, TEMP[1] 35: MAD TEMP[0], CONST[12], TEMP[0].zzzz, TEMP[1] 36: ADD TEMP[0], TEMP[0], CONST[13] 37: ELSE :0 38: MUL TEMP[1], CONST[14], IN[0].xxxx 39: MAD TEMP[1], CONST[15], IN[0].yyyy, TEMP[1] 40: MAD TEMP[1], CONST[16], IN[0].zzzz, TEMP[1] 41: ADD TEMP[0], TEMP[1], CONST[17] 42: ENDIF 43: MOV TEMP[1].xyw, TEMP[0].xyxw 44: RCP TEMP[2].x, TEMP[0].wwww 45: MUL TEMP[2].x, CONST[1].xxxx, TEMP[2].xxxx 46: MOV_SAT TEMP[2].x, TEMP[2].xxxx 47: ADD TEMP[2].x, TEMP[0].zzzz, TEMP[2].xxxx 48: MAX TEMP[0].x, TEMP[2].xxxx, -TEMP[0].wwww 49: LRP TEMP[0].x, CONST[1].yyyy, TEMP[0].xxxx, TEMP[2].xxxx 50: MOV TEMP[1].z, TEMP[0].xxxx 51: MOV OUT[0], TEMP[1] 52: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %17 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 %19 = add i32 %5, %7 %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %19) %21 = extractelement <4 x float> %20, i32 0 %22 = extractelement <4 x float> %20, i32 1 %23 = extractelement <4 x float> %20, i32 2 %24 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = add i32 %5, %7 %27 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %25, i32 0, i32 %26) %28 = extractelement <4 x float> %27, i32 0 %29 = extractelement <4 x float> %27, i32 1 %30 = extractelement <4 x float> %27, i32 2 %31 = fcmp une float %16, 0.000000e+00 br i1 %31, label %IF, label %ELSE IF: ; preds = %main_body %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 204) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 172) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %72 = fmul float %68, %21 %73 = fmul float %67, %21 %74 = fmul float %66, %21 %75 = fmul float %65, %22 %76 = fadd float %75, %72 %77 = fmul float %64, %22 %78 = fadd float %77, %73 %79 = fmul float %63, %22 %80 = fadd float %79, %74 %81 = fmul float %62, %23 %82 = fadd float %81, %76 %83 = fmul float %61, %23 %84 = fadd float %83, %78 %85 = fmul float %60, %23 %86 = fadd float %85, %80 %87 = fadd float %82, %59 %88 = fadd float %84, %58 %89 = fadd float %86, %57 %90 = fmul float %56, %28 %91 = fmul float %53, %28 %92 = fmul float %50, %28 %93 = fmul float %55, %29 %94 = fadd float %93, %90 %95 = fmul float %52, %29 %96 = fadd float %95, %91 %97 = fmul float %49, %29 %98 = fadd float %97, %92 %99 = fmul float %54, %30 %100 = fadd float %99, %94 %101 = fmul float %51, %30 %102 = fadd float %101, %96 %103 = fmul float %48, %30 %104 = fadd float %103, %98 %105 = fmul float %100, %100 %106 = fmul float %102, %102 %107 = fadd float %106, %105 %108 = fmul float %104, %104 %109 = fadd float %107, %108 %110 = call float @llvm.AMDGPU.rsq.clamped.f32(float %109) %111 = fmul float %100, %110 %112 = fmul float %102, %110 %113 = fmul float %104, %110 %114 = fmul float %87, %13 %115 = fmul float %88, %13 %116 = fmul float %89, %13 %117 = fsub float %71, %114 %118 = fsub float %70, %115 %119 = fsub float %69, %116 %120 = fmul float %117, %117 %121 = fmul float %118, %118 %122 = fadd float %121, %120 %123 = fmul float %119, %119 %124 = fadd float %122, %123 %125 = call float @llvm.AMDGPU.rsq.clamped.f32(float %124) %126 = fmul float %117, %125 %127 = fmul float %118, %125 %128 = fmul float %119, %125 %129 = fmul float %111, %126 %130 = fmul float %112, %127 %131 = fadd float %130, %129 %132 = fmul float %113, %128 %133 = fadd float %131, %132 %134 = fmul float %133, %133 %135 = fsub float 1.000000e+00, %134 %136 = call float @llvm.sqrt.f32(float %135) %137 = fmul float %16, %136 %138 = fmul float %111, %137 %139 = fmul float %112, %137 %140 = fmul float %113, %137 %141 = fsub float %87, %138 %142 = fsub float %88, %139 %143 = fsub float %89, %140 %144 = fmul float %47, %141 %145 = fmul float %46, %141 %146 = fmul float %45, %141 %147 = fmul float %44, %141 %148 = fmul float %43, %142 %149 = fadd float %148, %144 %150 = fmul float %42, %142 %151 = fadd float %150, %145 %152 = fmul float %41, %142 %153 = fadd float %152, %146 %154 = fmul float %40, %142 %155 = fadd float %154, %147 %156 = fmul float %39, %143 %157 = fadd float %156, %149 %158 = fmul float %38, %143 %159 = fadd float %158, %151 %160 = fmul float %37, %143 %161 = fadd float %160, %153 %162 = fmul float %36, %143 %163 = fadd float %162, %155 %164 = fadd float %157, %35 %165 = fadd float %159, %34 %166 = fadd float %161, %33 %167 = fadd float %163, %32 br label %ENDIF ELSE: ; preds = %main_body %168 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284) %169 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280) %170 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276) %171 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272) %172 = call float @llvm.SI.load.const(<16 x i8> %12, i32 268) %173 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264) %174 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260) %175 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256) %176 = call float @llvm.SI.load.const(<16 x i8> %12, i32 252) %177 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248) %178 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244) %179 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240) %180 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236) %181 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232) %182 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228) %183 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224) %184 = fmul float %183, %21 %185 = fmul float %182, %21 %186 = fmul float %181, %21 %187 = fmul float %180, %21 %188 = fmul float %179, %22 %189 = fadd float %188, %184 %190 = fmul float %178, %22 %191 = fadd float %190, %185 %192 = fmul float %177, %22 %193 = fadd float %192, %186 %194 = fmul float %176, %22 %195 = fadd float %194, %187 %196 = fmul float %175, %23 %197 = fadd float %196, %189 %198 = fmul float %174, %23 %199 = fadd float %198, %191 %200 = fmul float %173, %23 %201 = fadd float %200, %193 %202 = fmul float %172, %23 %203 = fadd float %202, %195 %204 = fadd float %197, %171 %205 = fadd float %199, %170 %206 = fadd float %201, %169 %207 = fadd float %203, %168 br label %ENDIF ENDIF: ; preds = %ELSE, %IF %temp.0 = phi float [ %164, %IF ], [ %204, %ELSE ] %temp1.0 = phi float [ %165, %IF ], [ %205, %ELSE ] %temp2.0 = phi float [ %166, %IF ], [ %206, %ELSE ] %temp3.0 = phi float [ %167, %IF ], [ %207, %ELSE ] %208 = fdiv float 1.000000e+00, %temp3.0 %209 = fmul float %14, %208 %210 = call float @llvm.AMDIL.clamp.(float %209, float 0.000000e+00, float 1.000000e+00) %211 = fadd float %temp2.0, %210 %212 = fsub float -0.000000e+00, %temp3.0 %213 = call float @llvm.maxnum.f32(float %211, float %212) %214 = call float @llvm.AMDGPU.lrp(float %15, float %213, float %211) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %temp.0, float %temp1.0, float %214, float %temp3.0) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v4, s10, v0 ; 4A08000A s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v4, s[4:7], 0 idxen ; E00C2000 80010004 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[3:6], v4, s[8:11], 0 idxen ; E00C2000 80020304 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_cmp_eq_f32_e64 s[6:7], 0, s4 ; D0040006 00000880 s_and_saveexec_b64 s[6:7], s[6:7] ; BE862406 s_xor_b64 s[6:7], exec, s[6:7] ; 8986067E s_cbranch_execz BB0_1 ; BF880000 s_buffer_load_dword s5, s[0:3], 0x47 ; C2028147 s_buffer_load_dword s8, s[0:3], 0x42 ; C2040142 s_buffer_load_dword s9, s[0:3], 0x43 ; C2048143 s_buffer_load_dword s10, s[0:3], 0x44 ; C2050144 s_buffer_load_dword s11, s[0:3], 0x45 ; C2058145 s_buffer_load_dword s12, s[0:3], 0x46 ; C2060146 s_buffer_load_dword s13, s[0:3], 0x3d ; C206813D s_buffer_load_dword s14, s[0:3], 0x3e ; C207013E s_buffer_load_dword s15, s[0:3], 0x3f ; C207813F s_buffer_load_dword s16, s[0:3], 0x40 ; C2080140 s_buffer_load_dword s17, s[0:3], 0x41 ; C2088141 s_buffer_load_dword s18, s[0:3], 0x38 ; C2090138 s_buffer_load_dword s19, s[0:3], 0x39 ; C2098139 s_buffer_load_dword s20, s[0:3], 0x3a ; C20A013A s_buffer_load_dword s21, s[0:3], 0x3b ; C20A813B s_buffer_load_dword s22, s[0:3], 0x3c ; C20B013C s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s18, v0 ; 100C0012 v_mul_f32_e32 v7, s19, v0 ; 100E0013 v_mul_f32_e32 v9, s20, v0 ; 10120014 v_mul_f32_e32 v10, s21, v0 ; 10140015 v_mac_f32_e32 v6, s22, v1 ; 3E0C0216 v_mac_f32_e32 v7, s13, v1 ; 3E0E020D v_mac_f32_e32 v9, s14, v1 ; 3E12020E v_mac_f32_e32 v10, s15, v1 ; 3E14020F v_mac_f32_e32 v6, s16, v2 ; 3E0C0410 v_mac_f32_e32 v7, s17, v2 ; 3E0E0411 v_mac_f32_e32 v9, s8, v2 ; 3E120408 v_mac_f32_e32 v10, s9, v2 ; 3E140409 v_add_f32_e32 v6, s10, v6 ; 060C0C0A v_add_f32_e32 v8, s11, v7 ; 06100E0B v_add_f32_e32 v9, s12, v9 ; 0612120C v_add_f32_e32 v7, s5, v10 ; 060E1405 s_or_saveexec_b64 s[6:7], s[6:7] ; BE862506 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_waitcnt lgkmcnt(0) ; BF8C007F s_xor_b64 exec, exec, s[6:7] ; 89FE067E s_cbranch_execz BB0_4 ; BF880000 s_buffer_load_dword s9, s[0:3], 0x37 ; C2048137 s_buffer_load_dword s10, s[0:3], 0x32 ; C2050132 s_buffer_load_dword s11, s[0:3], 0x33 ; C2058133 s_buffer_load_dword s12, s[0:3], 0x34 ; C2060134 s_buffer_load_dword s13, s[0:3], 0x35 ; C2068135 s_buffer_load_dword s14, s[0:3], 0x36 ; C2070136 s_buffer_load_dword s15, s[0:3], 0x2d ; C207812D s_buffer_load_dword s16, s[0:3], 0x2e ; C208012E s_buffer_load_dword s17, s[0:3], 0x2f ; C208812F s_buffer_load_dword s18, s[0:3], 0x30 ; C2090130 s_buffer_load_dword s19, s[0:3], 0x31 ; C2098131 s_buffer_load_dword s20, s[0:3], 0x28 ; C20A0128 s_buffer_load_dword s21, s[0:3], 0x29 ; C20A8129 s_buffer_load_dword s22, s[0:3], 0x2a ; C20B012A s_buffer_load_dword s23, s[0:3], 0x2b ; C20B812B s_buffer_load_dword s24, s[0:3], 0x2c ; C20C012C s_buffer_load_dword s25, s[0:3], 0x1d ; C20C811D s_buffer_load_dword s26, s[0:3], 0x1e ; C20D011E s_buffer_load_dword s27, s[0:3], 0x20 ; C20D8120 s_buffer_load_dword s28, s[0:3], 0x21 ; C20E0121 s_buffer_load_dword s29, s[0:3], 0x22 ; C20E8122 s_buffer_load_dword s30, s[0:3], 0x16 ; C20F0116 s_buffer_load_dword s31, s[0:3], 0x18 ; C20F8118 s_buffer_load_dword s32, s[0:3], 0x19 ; C2100119 s_buffer_load_dword s33, s[0:3], 0x1a ; C210811A s_buffer_load_dword s34, s[0:3], 0x1c ; C211011C s_buffer_load_dword s35, s[0:3], 0x10 ; C2118110 s_buffer_load_dword s36, s[0:3], 0x11 ; C2120111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s27, v3 ; 100C061B v_mac_f32_e32 v6, s28, v4 ; 3E0C081C v_mac_f32_e32 v6, s29, v5 ; 3E0C0A1D s_buffer_load_dword s27, s[0:3], 0x12 ; C20D8112 v_mul_f32_e32 v7, s31, v3 ; 100E061F v_mac_f32_e32 v7, s32, v4 ; 3E0E0820 v_mac_f32_e32 v7, s33, v5 ; 3E0E0A21 v_mul_f32_e32 v3, s34, v3 ; 10060622 v_mac_f32_e32 v3, s25, v4 ; 3E060819 v_mac_f32_e32 v3, s26, v5 ; 3E060A1A s_buffer_load_dword s25, s[0:3], 0x14 ; C20C8114 s_buffer_load_dword s26, s[0:3], 0x15 ; C20D0115 s_buffer_load_dword s28, s[0:3], 0x9 ; C20E0109 s_buffer_load_dword s29, s[0:3], 0xa ; C20E810A s_buffer_load_dword s31, s[0:3], 0xc ; C20F810C s_buffer_load_dword s32, s[0:3], 0xd ; C210010D s_buffer_load_dword s33, s[0:3], 0xe ; C210810E s_buffer_load_dword s34, s[0:3], 0x0 ; C2110100 v_mul_f32_e32 v4, v7, v7 ; 10080F07 v_mac_f32_e32 v4, v3, v3 ; 3E080703 v_mac_f32_e32 v4, v6, v6 ; 3E080D06 v_rsq_clamp_f32_e32 v4, v4 ; 7E085904 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s28, v0 ; 100A001C v_mul_f32_e32 v8, s29, v0 ; 1010001D s_buffer_load_dword s28, s[0:3], 0x1 ; C20E0101 v_mac_f32_e32 v5, s32, v1 ; 3E0A0220 v_mac_f32_e32 v8, s33, v1 ; 3E100221 v_mac_f32_e32 v5, s36, v2 ; 3E0A0424 v_mac_f32_e32 v8, s27, v2 ; 3E10041B v_add_f32_e32 v5, s26, v5 ; 060A0A1A v_add_f32_e32 v8, s30, v8 ; 0610101E s_buffer_load_dword s26, s[0:3], 0x2 ; C20D0102 s_buffer_load_dword s27, s[0:3], 0x3 ; C20D8103 s_buffer_load_dword s29, s[0:3], 0x8 ; C20E8108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v9, s27 ; 7E12021B v_mad_f32 v10, -v5, v9, s28 ; D282000A 20721305 v_mul_f32_e32 v0, s29, v0 ; 1000001D v_mac_f32_e32 v0, s31, v1 ; 3E00021F v_mac_f32_e32 v0, s35, v2 ; 3E000423 v_add_f32_e32 v0, s25, v0 ; 06000019 v_mad_f32 v1, -v0, v9, s34 ; D2820001 208A1300 v_mad_f32 v2, -v8, v9, s26 ; D2820002 206A1308 v_mul_f32_e32 v9, v1, v1 ; 10120301 v_mac_f32_e32 v9, v10, v10 ; 3E12150A v_mac_f32_e32 v9, v2, v2 ; 3E120502 v_rsq_clamp_f32_e32 v9, v9 ; 7E125909 v_mul_f32_e32 v7, v4, v7 ; 100E0F04 v_mul_f32_e32 v3, v4, v3 ; 10060704 v_mul_f32_e32 v4, v4, v6 ; 10080D04 v_mul_f32_e32 v1, v9, v1 ; 10020309 v_mul_f32_e32 v6, v9, v10 ; 100C1509 v_mul_f32_e32 v2, v9, v2 ; 10040509 v_mul_f32_e32 v1, v1, v7 ; 10020F01 v_mac_f32_e32 v1, v6, v3 ; 3E020706 v_mac_f32_e32 v1, v2, v4 ; 3E020902 v_mad_f32 v1, -v1, v1, 1.0 ; D2820001 23CA0301 v_sqrt_f32_e32 v1, v1 ; 7E026701 v_mul_f32_e32 v1, s4, v1 ; 10020204 v_mad_f32 v0, -v7, v1, v0 ; D2820000 24020307 v_mad_f32 v2, -v3, v1, v5 ; D2820002 24160303 v_mad_f32 v1, -v4, v1, v8 ; D2820001 24220304 v_mul_f32_e32 v3, s20, v0 ; 10060014 v_mul_f32_e32 v4, s21, v0 ; 10080015 v_mul_f32_e32 v5, s22, v0 ; 100A0016 v_mul_f32_e32 v0, s23, v0 ; 10000017 v_mac_f32_e32 v3, s24, v2 ; 3E060418 v_mac_f32_e32 v4, s15, v2 ; 3E08040F v_mac_f32_e32 v5, s16, v2 ; 3E0A0410 v_mac_f32_e32 v0, s17, v2 ; 3E000411 v_mac_f32_e32 v3, s18, v1 ; 3E060212 v_mac_f32_e32 v4, s19, v1 ; 3E080213 v_mac_f32_e32 v5, s10, v1 ; 3E0A020A v_mac_f32_e32 v0, s11, v1 ; 3E00020B v_add_f32_e32 v6, s12, v3 ; 060C060C v_add_f32_e32 v8, s13, v4 ; 0610080D v_add_f32_e32 v9, s14, v5 ; 06120A0E v_add_f32_e32 v7, s9, v0 ; 060E0009 s_or_b64 exec, exec, s[6:7] ; 88FE067E v_rcp_f32_e32 v0, v7 ; 7E005507 v_sub_f32_e64 v1, 1.0, s5 ; D2080001 00000AF2 v_mul_f32_e32 v0, s8, v0 ; 10000008 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_add_f32_e32 v0, v0, v9 ; 06001300 v_max_f32_e64 v2, v0, -v7 ; D2200002 40020F00 v_mul_f32_e32 v0, v0, v1 ; 10000300 v_mac_f32_e32 v0, s5, v2 ; 3E000405 exp 15, 12, 0, 1, 0, v6, v8, v0, v7 ; F80008CF 07000806 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 12 Code Size: 744 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL OUT[0], COLOR IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MOV OUT[0], IMM[0].xxxx 1: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00) %23 = bitcast i32 %22 to float %24 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00) %25 = bitcast i32 %24 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %23, float %25, float %23, float %25) ret void } ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: v_cvt_pkrtz_f16_f32_e64 v0, 0, 0 ; D25E0000 00010080 exp 15, 0, 1, 1, 1, v0, v0, v0, v0 ; F8001C0F 00000000 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 8 VGPRS: 4 Code Size: 20 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL CONST[0..17] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: FSNE TEMP[0].x, CONST[1].zzzz, IMM[0].xxxx 1: UIF TEMP[0].xxxx :0 2: MUL TEMP[0], CONST[2], IN[0].xxxx 3: MAD TEMP[0], CONST[3], IN[0].yyyy, TEMP[0] 4: MAD TEMP[0], CONST[4], IN[0].zzzz, TEMP[0] 5: ADD TEMP[0].xyz, TEMP[0], CONST[5] 6: MOV TEMP[1].x, CONST[6].xxxx 7: MOV TEMP[1].y, CONST[7].xxxx 8: MOV TEMP[1].z, CONST[8].xxxx 9: MOV TEMP[2].x, CONST[6].yyyy 10: MOV TEMP[2].y, CONST[7].yyyy 11: MOV TEMP[2].z, CONST[8].yyyy 12: MOV TEMP[3].x, CONST[6].zzzz 13: MOV TEMP[3].y, CONST[7].zzzz 14: MOV TEMP[3].z, CONST[8].zzzz 15: MUL TEMP[1].xyz, TEMP[1].xyzz, IN[1].xxxx 16: MAD TEMP[1].xyz, TEMP[2].xyzz, IN[1].yyyy, TEMP[1].xyzz 17: MAD TEMP[1].xyz, TEMP[3].xyzz, IN[1].zzzz, TEMP[1].xyzz 18: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz 19: RSQ TEMP[2].x, TEMP[2].xxxx 20: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx 21: MUL TEMP[2].xyz, TEMP[0].xyzz, CONST[0].wwww 22: ADD TEMP[2].xyz, CONST[0].xyzz, -TEMP[2].xyzz 23: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz 24: RSQ TEMP[3].x, TEMP[3].xxxx 25: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx 26: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[2].xyzz 27: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[2].xxxx 28: ADD TEMP[2].x, IMM[0].yyyy, -TEMP[2].xxxx 29: SQRT TEMP[2].x, TEMP[2].xxxx 30: MUL TEMP[2].x, CONST[1].zzzz, TEMP[2].xxxx 31: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx 32: ADD TEMP[0].xyz, TEMP[0].xyzz, -TEMP[1].xyzz 33: MUL TEMP[1], CONST[10], TEMP[0].xxxx 34: MAD TEMP[1], CONST[11], TEMP[0].yyyy, TEMP[1] 35: MAD TEMP[0], CONST[12], TEMP[0].zzzz, TEMP[1] 36: ADD TEMP[0], TEMP[0], CONST[13] 37: ELSE :0 38: MUL TEMP[1], CONST[14], IN[0].xxxx 39: MAD TEMP[1], CONST[15], IN[0].yyyy, TEMP[1] 40: MAD TEMP[1], CONST[16], IN[0].zzzz, TEMP[1] 41: ADD TEMP[0], TEMP[1], CONST[17] 42: ENDIF 43: MOV TEMP[1].xyw, TEMP[0].xyxw 44: RCP TEMP[2].x, TEMP[0].wwww 45: MUL TEMP[2].x, CONST[1].xxxx, TEMP[2].xxxx 46: MOV_SAT TEMP[2].x, TEMP[2].xxxx 47: ADD TEMP[2].x, TEMP[0].zzzz, TEMP[2].xxxx 48: MAX TEMP[0].x, TEMP[2].xxxx, -TEMP[0].wwww 49: LRP TEMP[0].x, CONST[1].yyyy, TEMP[0].xxxx, TEMP[2].xxxx 50: MOV TEMP[1].z, TEMP[0].xxxx 51: MOV OUT[0], TEMP[1] 52: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %17 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 %19 = add i32 %5, %7 %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %19) %21 = extractelement <4 x float> %20, i32 0 %22 = extractelement <4 x float> %20, i32 1 %23 = extractelement <4 x float> %20, i32 2 %24 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = add i32 %5, %7 %27 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %25, i32 0, i32 %26) %28 = extractelement <4 x float> %27, i32 0 %29 = extractelement <4 x float> %27, i32 1 %30 = extractelement <4 x float> %27, i32 2 %31 = fcmp une float %16, 0.000000e+00 br i1 %31, label %IF, label %ELSE IF: ; preds = %main_body %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 204) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 172) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %72 = fmul float %68, %21 %73 = fmul float %67, %21 %74 = fmul float %66, %21 %75 = fmul float %65, %22 %76 = fadd float %75, %72 %77 = fmul float %64, %22 %78 = fadd float %77, %73 %79 = fmul float %63, %22 %80 = fadd float %79, %74 %81 = fmul float %62, %23 %82 = fadd float %81, %76 %83 = fmul float %61, %23 %84 = fadd float %83, %78 %85 = fmul float %60, %23 %86 = fadd float %85, %80 %87 = fadd float %82, %59 %88 = fadd float %84, %58 %89 = fadd float %86, %57 %90 = fmul float %56, %28 %91 = fmul float %53, %28 %92 = fmul float %50, %28 %93 = fmul float %55, %29 %94 = fadd float %93, %90 %95 = fmul float %52, %29 %96 = fadd float %95, %91 %97 = fmul float %49, %29 %98 = fadd float %97, %92 %99 = fmul float %54, %30 %100 = fadd float %99, %94 %101 = fmul float %51, %30 %102 = fadd float %101, %96 %103 = fmul float %48, %30 %104 = fadd float %103, %98 %105 = fmul float %100, %100 %106 = fmul float %102, %102 %107 = fadd float %106, %105 %108 = fmul float %104, %104 %109 = fadd float %107, %108 %110 = call float @llvm.AMDGPU.rsq.clamped.f32(float %109) %111 = fmul float %100, %110 %112 = fmul float %102, %110 %113 = fmul float %104, %110 %114 = fmul float %87, %13 %115 = fmul float %88, %13 %116 = fmul float %89, %13 %117 = fsub float %71, %114 %118 = fsub float %70, %115 %119 = fsub float %69, %116 %120 = fmul float %117, %117 %121 = fmul float %118, %118 %122 = fadd float %121, %120 %123 = fmul float %119, %119 %124 = fadd float %122, %123 %125 = call float @llvm.AMDGPU.rsq.clamped.f32(float %124) %126 = fmul float %117, %125 %127 = fmul float %118, %125 %128 = fmul float %119, %125 %129 = fmul float %111, %126 %130 = fmul float %112, %127 %131 = fadd float %130, %129 %132 = fmul float %113, %128 %133 = fadd float %131, %132 %134 = fmul float %133, %133 %135 = fsub float 1.000000e+00, %134 %136 = call float @llvm.sqrt.f32(float %135) %137 = fmul float %16, %136 %138 = fmul float %111, %137 %139 = fmul float %112, %137 %140 = fmul float %113, %137 %141 = fsub float %87, %138 %142 = fsub float %88, %139 %143 = fsub float %89, %140 %144 = fmul float %47, %141 %145 = fmul float %46, %141 %146 = fmul float %45, %141 %147 = fmul float %44, %141 %148 = fmul float %43, %142 %149 = fadd float %148, %144 %150 = fmul float %42, %142 %151 = fadd float %150, %145 %152 = fmul float %41, %142 %153 = fadd float %152, %146 %154 = fmul float %40, %142 %155 = fadd float %154, %147 %156 = fmul float %39, %143 %157 = fadd float %156, %149 %158 = fmul float %38, %143 %159 = fadd float %158, %151 %160 = fmul float %37, %143 %161 = fadd float %160, %153 %162 = fmul float %36, %143 %163 = fadd float %162, %155 %164 = fadd float %157, %35 %165 = fadd float %159, %34 %166 = fadd float %161, %33 %167 = fadd float %163, %32 br label %ENDIF ELSE: ; preds = %main_body %168 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284) %169 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280) %170 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276) %171 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272) %172 = call float @llvm.SI.load.const(<16 x i8> %12, i32 268) %173 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264) %174 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260) %175 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256) %176 = call float @llvm.SI.load.const(<16 x i8> %12, i32 252) %177 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248) %178 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244) %179 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240) %180 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236) %181 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232) %182 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228) %183 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224) %184 = fmul float %183, %21 %185 = fmul float %182, %21 %186 = fmul float %181, %21 %187 = fmul float %180, %21 %188 = fmul float %179, %22 %189 = fadd float %188, %184 %190 = fmul float %178, %22 %191 = fadd float %190, %185 %192 = fmul float %177, %22 %193 = fadd float %192, %186 %194 = fmul float %176, %22 %195 = fadd float %194, %187 %196 = fmul float %175, %23 %197 = fadd float %196, %189 %198 = fmul float %174, %23 %199 = fadd float %198, %191 %200 = fmul float %173, %23 %201 = fadd float %200, %193 %202 = fmul float %172, %23 %203 = fadd float %202, %195 %204 = fadd float %197, %171 %205 = fadd float %199, %170 %206 = fadd float %201, %169 %207 = fadd float %203, %168 br label %ENDIF ENDIF: ; preds = %ELSE, %IF %temp.0 = phi float [ %164, %IF ], [ %204, %ELSE ] %temp1.0 = phi float [ %165, %IF ], [ %205, %ELSE ] %temp2.0 = phi float [ %166, %IF ], [ %206, %ELSE ] %temp3.0 = phi float [ %167, %IF ], [ %207, %ELSE ] %208 = fdiv float 1.000000e+00, %temp3.0 %209 = fmul float %14, %208 %210 = call float @llvm.AMDIL.clamp.(float %209, float 0.000000e+00, float 1.000000e+00) %211 = fadd float %temp2.0, %210 %212 = fsub float -0.000000e+00, %temp3.0 %213 = call float @llvm.maxnum.f32(float %211, float %212) %214 = call float @llvm.AMDGPU.lrp(float %15, float %213, float %211) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %temp.0, float %temp1.0, float %214, float %temp3.0) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v4, s10, v0 ; 4A08000A s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v4, s[4:7], 0 idxen ; E00C2000 80010004 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[3:6], v4, s[8:11], 0 idxen ; E00C2000 80020304 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_cmp_eq_f32_e64 s[6:7], 0, s4 ; D0040006 00000880 s_and_saveexec_b64 s[6:7], s[6:7] ; BE862406 s_xor_b64 s[6:7], exec, s[6:7] ; 8986067E s_cbranch_execz BB0_1 ; BF880000 s_buffer_load_dword s5, s[0:3], 0x47 ; C2028147 s_buffer_load_dword s8, s[0:3], 0x42 ; C2040142 s_buffer_load_dword s9, s[0:3], 0x43 ; C2048143 s_buffer_load_dword s10, s[0:3], 0x44 ; C2050144 s_buffer_load_dword s11, s[0:3], 0x45 ; C2058145 s_buffer_load_dword s12, s[0:3], 0x46 ; C2060146 s_buffer_load_dword s13, s[0:3], 0x3d ; C206813D s_buffer_load_dword s14, s[0:3], 0x3e ; C207013E s_buffer_load_dword s15, s[0:3], 0x3f ; C207813F s_buffer_load_dword s16, s[0:3], 0x40 ; C2080140 s_buffer_load_dword s17, s[0:3], 0x41 ; C2088141 s_buffer_load_dword s18, s[0:3], 0x38 ; C2090138 s_buffer_load_dword s19, s[0:3], 0x39 ; C2098139 s_buffer_load_dword s20, s[0:3], 0x3a ; C20A013A s_buffer_load_dword s21, s[0:3], 0x3b ; C20A813B s_buffer_load_dword s22, s[0:3], 0x3c ; C20B013C s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s18, v0 ; 100C0012 v_mul_f32_e32 v7, s19, v0 ; 100E0013 v_mul_f32_e32 v9, s20, v0 ; 10120014 v_mul_f32_e32 v10, s21, v0 ; 10140015 v_mac_f32_e32 v6, s22, v1 ; 3E0C0216 v_mac_f32_e32 v7, s13, v1 ; 3E0E020D v_mac_f32_e32 v9, s14, v1 ; 3E12020E v_mac_f32_e32 v10, s15, v1 ; 3E14020F v_mac_f32_e32 v6, s16, v2 ; 3E0C0410 v_mac_f32_e32 v7, s17, v2 ; 3E0E0411 v_mac_f32_e32 v9, s8, v2 ; 3E120408 v_mac_f32_e32 v10, s9, v2 ; 3E140409 v_add_f32_e32 v6, s10, v6 ; 060C0C0A v_add_f32_e32 v8, s11, v7 ; 06100E0B v_add_f32_e32 v9, s12, v9 ; 0612120C v_add_f32_e32 v7, s5, v10 ; 060E1405 s_or_saveexec_b64 s[6:7], s[6:7] ; BE862506 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_waitcnt lgkmcnt(0) ; BF8C007F s_xor_b64 exec, exec, s[6:7] ; 89FE067E s_cbranch_execz BB0_4 ; BF880000 s_buffer_load_dword s9, s[0:3], 0x37 ; C2048137 s_buffer_load_dword s10, s[0:3], 0x32 ; C2050132 s_buffer_load_dword s11, s[0:3], 0x33 ; C2058133 s_buffer_load_dword s12, s[0:3], 0x34 ; C2060134 s_buffer_load_dword s13, s[0:3], 0x35 ; C2068135 s_buffer_load_dword s14, s[0:3], 0x36 ; C2070136 s_buffer_load_dword s15, s[0:3], 0x2d ; C207812D s_buffer_load_dword s16, s[0:3], 0x2e ; C208012E s_buffer_load_dword s17, s[0:3], 0x2f ; C208812F s_buffer_load_dword s18, s[0:3], 0x30 ; C2090130 s_buffer_load_dword s19, s[0:3], 0x31 ; C2098131 s_buffer_load_dword s20, s[0:3], 0x28 ; C20A0128 s_buffer_load_dword s21, s[0:3], 0x29 ; C20A8129 s_buffer_load_dword s22, s[0:3], 0x2a ; C20B012A s_buffer_load_dword s23, s[0:3], 0x2b ; C20B812B s_buffer_load_dword s24, s[0:3], 0x2c ; C20C012C s_buffer_load_dword s25, s[0:3], 0x1d ; C20C811D s_buffer_load_dword s26, s[0:3], 0x1e ; C20D011E s_buffer_load_dword s27, s[0:3], 0x20 ; C20D8120 s_buffer_load_dword s28, s[0:3], 0x21 ; C20E0121 s_buffer_load_dword s29, s[0:3], 0x22 ; C20E8122 s_buffer_load_dword s30, s[0:3], 0x16 ; C20F0116 s_buffer_load_dword s31, s[0:3], 0x18 ; C20F8118 s_buffer_load_dword s32, s[0:3], 0x19 ; C2100119 s_buffer_load_dword s33, s[0:3], 0x1a ; C210811A s_buffer_load_dword s34, s[0:3], 0x1c ; C211011C s_buffer_load_dword s35, s[0:3], 0x10 ; C2118110 s_buffer_load_dword s36, s[0:3], 0x11 ; C2120111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s27, v3 ; 100C061B v_mac_f32_e32 v6, s28, v4 ; 3E0C081C v_mac_f32_e32 v6, s29, v5 ; 3E0C0A1D s_buffer_load_dword s27, s[0:3], 0x12 ; C20D8112 v_mul_f32_e32 v7, s31, v3 ; 100E061F v_mac_f32_e32 v7, s32, v4 ; 3E0E0820 v_mac_f32_e32 v7, s33, v5 ; 3E0E0A21 v_mul_f32_e32 v3, s34, v3 ; 10060622 v_mac_f32_e32 v3, s25, v4 ; 3E060819 v_mac_f32_e32 v3, s26, v5 ; 3E060A1A s_buffer_load_dword s25, s[0:3], 0x14 ; C20C8114 s_buffer_load_dword s26, s[0:3], 0x15 ; C20D0115 s_buffer_load_dword s28, s[0:3], 0x9 ; C20E0109 s_buffer_load_dword s29, s[0:3], 0xa ; C20E810A s_buffer_load_dword s31, s[0:3], 0xc ; C20F810C s_buffer_load_dword s32, s[0:3], 0xd ; C210010D s_buffer_load_dword s33, s[0:3], 0xe ; C210810E s_buffer_load_dword s34, s[0:3], 0x0 ; C2110100 v_mul_f32_e32 v4, v7, v7 ; 10080F07 v_mac_f32_e32 v4, v3, v3 ; 3E080703 v_mac_f32_e32 v4, v6, v6 ; 3E080D06 v_rsq_clamp_f32_e32 v4, v4 ; 7E085904 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s28, v0 ; 100A001C v_mul_f32_e32 v8, s29, v0 ; 1010001D s_buffer_load_dword s28, s[0:3], 0x1 ; C20E0101 v_mac_f32_e32 v5, s32, v1 ; 3E0A0220 v_mac_f32_e32 v8, s33, v1 ; 3E100221 v_mac_f32_e32 v5, s36, v2 ; 3E0A0424 v_mac_f32_e32 v8, s27, v2 ; 3E10041B v_add_f32_e32 v5, s26, v5 ; 060A0A1A v_add_f32_e32 v8, s30, v8 ; 0610101E s_buffer_load_dword s26, s[0:3], 0x2 ; C20D0102 s_buffer_load_dword s27, s[0:3], 0x3 ; C20D8103 s_buffer_load_dword s29, s[0:3], 0x8 ; C20E8108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v9, s27 ; 7E12021B v_mad_f32 v10, -v5, v9, s28 ; D282000A 20721305 v_mul_f32_e32 v0, s29, v0 ; 1000001D v_mac_f32_e32 v0, s31, v1 ; 3E00021F v_mac_f32_e32 v0, s35, v2 ; 3E000423 v_add_f32_e32 v0, s25, v0 ; 06000019 v_mad_f32 v1, -v0, v9, s34 ; D2820001 208A1300 v_mad_f32 v2, -v8, v9, s26 ; D2820002 206A1308 v_mul_f32_e32 v9, v1, v1 ; 10120301 v_mac_f32_e32 v9, v10, v10 ; 3E12150A v_mac_f32_e32 v9, v2, v2 ; 3E120502 v_rsq_clamp_f32_e32 v9, v9 ; 7E125909 v_mul_f32_e32 v7, v4, v7 ; 100E0F04 v_mul_f32_e32 v3, v4, v3 ; 10060704 v_mul_f32_e32 v4, v4, v6 ; 10080D04 v_mul_f32_e32 v1, v9, v1 ; 10020309 v_mul_f32_e32 v6, v9, v10 ; 100C1509 v_mul_f32_e32 v2, v9, v2 ; 10040509 v_mul_f32_e32 v1, v1, v7 ; 10020F01 v_mac_f32_e32 v1, v6, v3 ; 3E020706 v_mac_f32_e32 v1, v2, v4 ; 3E020902 v_mad_f32 v1, -v1, v1, 1.0 ; D2820001 23CA0301 v_sqrt_f32_e32 v1, v1 ; 7E026701 v_mul_f32_e32 v1, s4, v1 ; 10020204 v_mad_f32 v0, -v7, v1, v0 ; D2820000 24020307 v_mad_f32 v2, -v3, v1, v5 ; D2820002 24160303 v_mad_f32 v1, -v4, v1, v8 ; D2820001 24220304 v_mul_f32_e32 v3, s20, v0 ; 10060014 v_mul_f32_e32 v4, s21, v0 ; 10080015 v_mul_f32_e32 v5, s22, v0 ; 100A0016 v_mul_f32_e32 v0, s23, v0 ; 10000017 v_mac_f32_e32 v3, s24, v2 ; 3E060418 v_mac_f32_e32 v4, s15, v2 ; 3E08040F v_mac_f32_e32 v5, s16, v2 ; 3E0A0410 v_mac_f32_e32 v0, s17, v2 ; 3E000411 v_mac_f32_e32 v3, s18, v1 ; 3E060212 v_mac_f32_e32 v4, s19, v1 ; 3E080213 v_mac_f32_e32 v5, s10, v1 ; 3E0A020A v_mac_f32_e32 v0, s11, v1 ; 3E00020B v_add_f32_e32 v6, s12, v3 ; 060C060C v_add_f32_e32 v8, s13, v4 ; 0610080D v_add_f32_e32 v9, s14, v5 ; 06120A0E v_add_f32_e32 v7, s9, v0 ; 060E0009 s_or_b64 exec, exec, s[6:7] ; 88FE067E v_rcp_f32_e32 v0, v7 ; 7E005507 v_sub_f32_e64 v1, 1.0, s5 ; D2080001 00000AF2 v_mul_f32_e32 v0, s8, v0 ; 10000008 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_add_f32_e32 v0, v0, v9 ; 06001300 v_max_f32_e64 v2, v0, -v7 ; D2200002 40020F00 v_mul_f32_e32 v0, v0, v1 ; 10000300 v_mac_f32_e32 v0, s5, v2 ; 3E000405 exp 15, 12, 0, 1, 0, v6, v8, v0, v7 ; F80008CF 07000806 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 12 Code Size: 744 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL OUT[0], COLOR IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MOV OUT[0], IMM[0].xxxx 1: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00) %23 = bitcast i32 %22 to float %24 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00) %25 = bitcast i32 %24 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %23, float %25, float %23, float %25) ret void } ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: v_cvt_pkrtz_f16_f32_e64 v0, 0, 0 ; D25E0000 00010080 exp 15, 0, 1, 1, 1, v0, v0, v0, v0 ; F8001C0F 00000000 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 8 VGPRS: 4 Code Size: 20 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL CONST[0..18] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: FSNE TEMP[0].x, CONST[1].zzzz, IMM[0].xxxx 1: UIF TEMP[0].xxxx :0 2: MUL TEMP[0], CONST[2], IN[0].xxxx 3: MAD TEMP[0], CONST[3], IN[0].yyyy, TEMP[0] 4: MAD TEMP[0], CONST[4], IN[0].zzzz, TEMP[0] 5: ADD TEMP[0].xyz, TEMP[0], CONST[5] 6: MOV TEMP[1].x, CONST[6].xxxx 7: MOV TEMP[1].y, CONST[7].xxxx 8: MOV TEMP[1].z, CONST[8].xxxx 9: MOV TEMP[2].x, CONST[6].yyyy 10: MOV TEMP[2].y, CONST[7].yyyy 11: MOV TEMP[2].z, CONST[8].yyyy 12: MOV TEMP[3].x, CONST[6].zzzz 13: MOV TEMP[3].y, CONST[7].zzzz 14: MOV TEMP[3].z, CONST[8].zzzz 15: MUL TEMP[1].xyz, TEMP[1].xyzz, IN[1].xxxx 16: MAD TEMP[1].xyz, TEMP[2].xyzz, IN[1].yyyy, TEMP[1].xyzz 17: MAD TEMP[1].xyz, TEMP[3].xyzz, IN[1].zzzz, TEMP[1].xyzz 18: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz 19: RSQ TEMP[2].x, TEMP[2].xxxx 20: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx 21: MUL TEMP[2].xyz, TEMP[0].xyzz, CONST[0].wwww 22: ADD TEMP[2].xyz, CONST[0].xyzz, -TEMP[2].xyzz 23: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz 24: RSQ TEMP[3].x, TEMP[3].xxxx 25: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx 26: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[2].xyzz 27: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[2].xxxx 28: ADD TEMP[2].x, IMM[0].yyyy, -TEMP[2].xxxx 29: SQRT TEMP[2].x, TEMP[2].xxxx 30: MUL TEMP[2].x, CONST[1].zzzz, TEMP[2].xxxx 31: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx 32: ADD TEMP[0].xyz, TEMP[0].xyzz, -TEMP[1].xyzz 33: MUL TEMP[1], CONST[10], TEMP[0].xxxx 34: MAD TEMP[1], CONST[11], TEMP[0].yyyy, TEMP[1] 35: MAD TEMP[0], CONST[12], TEMP[0].zzzz, TEMP[1] 36: ADD TEMP[0], TEMP[0], CONST[13] 37: ELSE :0 38: MUL TEMP[1], CONST[15], IN[0].xxxx 39: MAD TEMP[1], CONST[16], IN[0].yyyy, TEMP[1] 40: MAD TEMP[1], CONST[17], IN[0].zzzz, TEMP[1] 41: ADD TEMP[0], TEMP[1], CONST[18] 42: ENDIF 43: MOV TEMP[1].xyw, TEMP[0].xyxw 44: RCP TEMP[2].x, TEMP[0].wwww 45: MUL TEMP[2].x, CONST[1].xxxx, TEMP[2].xxxx 46: MOV_SAT TEMP[2].x, TEMP[2].xxxx 47: ADD TEMP[2].x, TEMP[0].zzzz, TEMP[2].xxxx 48: MAX TEMP[0].x, TEMP[2].xxxx, -TEMP[0].wwww 49: LRP TEMP[0].x, CONST[1].yyyy, TEMP[0].xxxx, TEMP[2].xxxx 50: MOV TEMP[1].z, TEMP[0].xxxx 51: MAD TEMP[0].xy, IN[2].xyyy, CONST[14].xyyy, CONST[14].zwww 52: MOV OUT[1], TEMP[0] 53: MOV OUT[0], TEMP[1] 54: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236) %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0 %23 = add i32 %5, %7 %24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %22, i32 0, i32 %23) %25 = extractelement <4 x float> %24, i32 0 %26 = extractelement <4 x float> %24, i32 1 %27 = extractelement <4 x float> %24, i32 2 %28 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %29 = load <16 x i8>, <16 x i8> addrspace(2)* %28, align 16, !tbaa !0 %30 = add i32 %5, %7 %31 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %29, i32 0, i32 %30) %32 = extractelement <4 x float> %31, i32 0 %33 = extractelement <4 x float> %31, i32 1 %34 = extractelement <4 x float> %31, i32 2 %35 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0 %37 = add i32 %5, %7 %38 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %36, i32 0, i32 %37) %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = fcmp une float %16, 0.000000e+00 br i1 %41, label %IF, label %ELSE IF: ; preds = %main_body %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 204) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 172) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %81 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %82 = fmul float %78, %25 %83 = fmul float %77, %25 %84 = fmul float %76, %25 %85 = fmul float %75, %26 %86 = fadd float %85, %82 %87 = fmul float %74, %26 %88 = fadd float %87, %83 %89 = fmul float %73, %26 %90 = fadd float %89, %84 %91 = fmul float %72, %27 %92 = fadd float %91, %86 %93 = fmul float %71, %27 %94 = fadd float %93, %88 %95 = fmul float %70, %27 %96 = fadd float %95, %90 %97 = fadd float %92, %69 %98 = fadd float %94, %68 %99 = fadd float %96, %67 %100 = fmul float %66, %32 %101 = fmul float %63, %32 %102 = fmul float %60, %32 %103 = fmul float %65, %33 %104 = fadd float %103, %100 %105 = fmul float %62, %33 %106 = fadd float %105, %101 %107 = fmul float %59, %33 %108 = fadd float %107, %102 %109 = fmul float %64, %34 %110 = fadd float %109, %104 %111 = fmul float %61, %34 %112 = fadd float %111, %106 %113 = fmul float %58, %34 %114 = fadd float %113, %108 %115 = fmul float %110, %110 %116 = fmul float %112, %112 %117 = fadd float %116, %115 %118 = fmul float %114, %114 %119 = fadd float %117, %118 %120 = call float @llvm.AMDGPU.rsq.clamped.f32(float %119) %121 = fmul float %110, %120 %122 = fmul float %112, %120 %123 = fmul float %114, %120 %124 = fmul float %97, %13 %125 = fmul float %98, %13 %126 = fmul float %99, %13 %127 = fsub float %81, %124 %128 = fsub float %80, %125 %129 = fsub float %79, %126 %130 = fmul float %127, %127 %131 = fmul float %128, %128 %132 = fadd float %131, %130 %133 = fmul float %129, %129 %134 = fadd float %132, %133 %135 = call float @llvm.AMDGPU.rsq.clamped.f32(float %134) %136 = fmul float %127, %135 %137 = fmul float %128, %135 %138 = fmul float %129, %135 %139 = fmul float %121, %136 %140 = fmul float %122, %137 %141 = fadd float %140, %139 %142 = fmul float %123, %138 %143 = fadd float %141, %142 %144 = fmul float %143, %143 %145 = fsub float 1.000000e+00, %144 %146 = call float @llvm.sqrt.f32(float %145) %147 = fmul float %16, %146 %148 = fmul float %121, %147 %149 = fmul float %122, %147 %150 = fmul float %123, %147 %151 = fsub float %97, %148 %152 = fsub float %98, %149 %153 = fsub float %99, %150 %154 = fmul float %57, %151 %155 = fmul float %56, %151 %156 = fmul float %55, %151 %157 = fmul float %54, %151 %158 = fmul float %53, %152 %159 = fadd float %158, %154 %160 = fmul float %52, %152 %161 = fadd float %160, %155 %162 = fmul float %51, %152 %163 = fadd float %162, %156 %164 = fmul float %50, %152 %165 = fadd float %164, %157 %166 = fmul float %49, %153 %167 = fadd float %166, %159 %168 = fmul float %48, %153 %169 = fadd float %168, %161 %170 = fmul float %47, %153 %171 = fadd float %170, %163 %172 = fmul float %46, %153 %173 = fadd float %172, %165 %174 = fadd float %167, %45 %175 = fadd float %169, %44 %176 = fadd float %171, %43 %177 = fadd float %173, %42 br label %ENDIF ELSE: ; preds = %main_body %178 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300) %179 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296) %180 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292) %181 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288) %182 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284) %183 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280) %184 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276) %185 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272) %186 = call float @llvm.SI.load.const(<16 x i8> %12, i32 268) %187 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264) %188 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260) %189 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256) %190 = call float @llvm.SI.load.const(<16 x i8> %12, i32 252) %191 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248) %192 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244) %193 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240) %194 = fmul float %193, %25 %195 = fmul float %192, %25 %196 = fmul float %191, %25 %197 = fmul float %190, %25 %198 = fmul float %189, %26 %199 = fadd float %198, %194 %200 = fmul float %188, %26 %201 = fadd float %200, %195 %202 = fmul float %187, %26 %203 = fadd float %202, %196 %204 = fmul float %186, %26 %205 = fadd float %204, %197 %206 = fmul float %185, %27 %207 = fadd float %206, %199 %208 = fmul float %184, %27 %209 = fadd float %208, %201 %210 = fmul float %183, %27 %211 = fadd float %210, %203 %212 = fmul float %182, %27 %213 = fadd float %212, %205 %214 = fadd float %207, %181 %215 = fadd float %209, %180 %216 = fadd float %211, %179 %217 = fadd float %213, %178 br label %ENDIF ENDIF: ; preds = %ELSE, %IF %temp.0 = phi float [ %174, %IF ], [ %214, %ELSE ] %temp1.0 = phi float [ %175, %IF ], [ %215, %ELSE ] %temp2.0 = phi float [ %176, %IF ], [ %216, %ELSE ] %temp3.0 = phi float [ %177, %IF ], [ %217, %ELSE ] %218 = fdiv float 1.000000e+00, %temp3.0 %219 = fmul float %14, %218 %220 = call float @llvm.AMDIL.clamp.(float %219, float 0.000000e+00, float 1.000000e+00) %221 = fadd float %temp2.0, %220 %222 = fsub float -0.000000e+00, %temp3.0 %223 = call float @llvm.maxnum.f32(float %221, float %222) %224 = call float @llvm.AMDGPU.lrp(float %15, float %223, float %221) %225 = fmul float %39, %17 %226 = fadd float %225, %19 %227 = fmul float %40, %18 %228 = fadd float %227, %20 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %226, float %228, float %temp2.0, float %temp3.0) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %temp.0, float %temp1.0, float %224, float %temp3.0) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[4:7], v0, s[4:7], 0 idxen ; E00C2000 80010400 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[7:10], v0, s[12:15], 0 idxen ; E00C2000 80030700 buffer_load_format_xyzw v[0:3], v0, s[16:19], 0 idxen ; E00C2000 80040000 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_buffer_load_dword s12, s[0:3], 0x3a ; C206013A s_buffer_load_dword s11, s[0:3], 0x3b ; C205813B s_waitcnt vmcnt(1) ; BF8C0771 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_cmp_eq_f32_e64 s[6:7], 0, s4 ; D0040006 00000880 s_and_saveexec_b64 s[6:7], s[6:7] ; BE862406 s_xor_b64 s[6:7], exec, s[6:7] ; 8986067E s_cbranch_execz BB0_1 ; BF880000 s_buffer_load_dword s5, s[0:3], 0x4b ; C202814B s_buffer_load_dword s8, s[0:3], 0x46 ; C2040146 s_buffer_load_dword s9, s[0:3], 0x47 ; C2048147 s_buffer_load_dword s10, s[0:3], 0x48 ; C2050148 s_buffer_load_dword s13, s[0:3], 0x49 ; C2068149 s_buffer_load_dword s14, s[0:3], 0x4a ; C207014A s_buffer_load_dword s15, s[0:3], 0x41 ; C2078141 s_buffer_load_dword s16, s[0:3], 0x42 ; C2080142 s_buffer_load_dword s17, s[0:3], 0x43 ; C2088143 s_buffer_load_dword s18, s[0:3], 0x44 ; C2090144 s_buffer_load_dword s19, s[0:3], 0x45 ; C2098145 s_buffer_load_dword s20, s[0:3], 0x3c ; C20A013C s_buffer_load_dword s21, s[0:3], 0x3d ; C20A813D s_buffer_load_dword s22, s[0:3], 0x3e ; C20B013E s_buffer_load_dword s23, s[0:3], 0x3f ; C20B813F s_buffer_load_dword s24, s[0:3], 0x40 ; C20C0140 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s20, v4 ; 10040814 v_mul_f32_e32 v3, s21, v4 ; 10060815 v_mul_f32_e32 v12, s22, v4 ; 10180816 v_mul_f32_e32 v13, s23, v4 ; 101A0817 v_mac_f32_e32 v2, s24, v5 ; 3E040A18 v_mac_f32_e32 v3, s15, v5 ; 3E060A0F v_mac_f32_e32 v12, s16, v5 ; 3E180A10 v_mac_f32_e32 v13, s17, v5 ; 3E1A0A11 v_mac_f32_e32 v2, s18, v6 ; 3E040C12 v_mac_f32_e32 v3, s19, v6 ; 3E060C13 v_mac_f32_e32 v12, s8, v6 ; 3E180C08 v_mac_f32_e32 v13, s9, v6 ; 3E1A0C09 v_add_f32_e32 v10, s10, v2 ; 0614040A v_add_f32_e32 v11, s13, v3 ; 0616060D v_add_f32_e32 v12, s14, v12 ; 0618180E v_add_f32_e32 v13, s5, v13 ; 061A1A05 s_or_saveexec_b64 s[6:7], s[6:7] ; BE862506 s_buffer_load_dword s10, s[0:3], 0x4 ; C2050104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s8, s[0:3], 0x38 ; C2040138 s_buffer_load_dword s5, s[0:3], 0x39 ; C2028139 v_mov_b32_e32 v2, s12 ; 7E04020C v_mov_b32_e32 v3, s11 ; 7E06020B s_waitcnt lgkmcnt(0) ; BF8C007F s_xor_b64 exec, exec, s[6:7] ; 89FE067E s_cbranch_execz BB0_4 ; BF880000 s_buffer_load_dword s11, s[0:3], 0x37 ; C2058137 s_buffer_load_dword s12, s[0:3], 0x32 ; C2060132 s_buffer_load_dword s13, s[0:3], 0x33 ; C2068133 s_buffer_load_dword s14, s[0:3], 0x34 ; C2070134 s_buffer_load_dword s15, s[0:3], 0x35 ; C2078135 s_buffer_load_dword s16, s[0:3], 0x36 ; C2080136 s_buffer_load_dword s17, s[0:3], 0x2d ; C208812D s_buffer_load_dword s18, s[0:3], 0x2e ; C209012E s_buffer_load_dword s19, s[0:3], 0x2f ; C209812F s_buffer_load_dword s20, s[0:3], 0x30 ; C20A0130 s_buffer_load_dword s21, s[0:3], 0x31 ; C20A8131 s_buffer_load_dword s22, s[0:3], 0x28 ; C20B0128 s_buffer_load_dword s23, s[0:3], 0x29 ; C20B8129 s_buffer_load_dword s24, s[0:3], 0x2a ; C20C012A s_buffer_load_dword s25, s[0:3], 0x2b ; C20C812B s_buffer_load_dword s26, s[0:3], 0x2c ; C20D012C s_buffer_load_dword s27, s[0:3], 0x1d ; C20D811D s_buffer_load_dword s28, s[0:3], 0x1e ; C20E011E s_buffer_load_dword s29, s[0:3], 0x20 ; C20E8120 s_buffer_load_dword s30, s[0:3], 0x21 ; C20F0121 s_buffer_load_dword s31, s[0:3], 0x22 ; C20F8122 s_buffer_load_dword s32, s[0:3], 0x16 ; C2100116 s_buffer_load_dword s33, s[0:3], 0x18 ; C2108118 s_buffer_load_dword s34, s[0:3], 0x19 ; C2110119 s_buffer_load_dword s35, s[0:3], 0x1a ; C211811A s_buffer_load_dword s36, s[0:3], 0x1c ; C212011C s_buffer_load_dword s37, s[0:3], 0x10 ; C2128110 s_buffer_load_dword s38, s[0:3], 0x11 ; C2130111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v10, s29, v7 ; 10140E1D v_mac_f32_e32 v10, s30, v8 ; 3E14101E v_mac_f32_e32 v10, s31, v9 ; 3E14121F s_buffer_load_dword s29, s[0:3], 0x12 ; C20E8112 v_mul_f32_e32 v11, s33, v7 ; 10160E21 v_mac_f32_e32 v11, s34, v8 ; 3E161022 v_mac_f32_e32 v11, s35, v9 ; 3E161223 v_mul_f32_e32 v7, s36, v7 ; 100E0E24 v_mac_f32_e32 v7, s27, v8 ; 3E0E101B v_mac_f32_e32 v7, s28, v9 ; 3E0E121C s_buffer_load_dword s27, s[0:3], 0x14 ; C20D8114 s_buffer_load_dword s28, s[0:3], 0x15 ; C20E0115 s_buffer_load_dword s30, s[0:3], 0x9 ; C20F0109 s_buffer_load_dword s31, s[0:3], 0xa ; C20F810A s_buffer_load_dword s33, s[0:3], 0xc ; C210810C s_buffer_load_dword s34, s[0:3], 0xd ; C211010D s_buffer_load_dword s35, s[0:3], 0xe ; C211810E s_buffer_load_dword s36, s[0:3], 0x0 ; C2120100 v_mul_f32_e32 v8, v11, v11 ; 1010170B v_mac_f32_e32 v8, v7, v7 ; 3E100F07 v_mac_f32_e32 v8, v10, v10 ; 3E10150A v_rsq_clamp_f32_e32 v8, v8 ; 7E105908 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v9, s30, v4 ; 1012081E v_mul_f32_e32 v12, s31, v4 ; 1018081F s_buffer_load_dword s30, s[0:3], 0x1 ; C20F0101 v_mac_f32_e32 v9, s34, v5 ; 3E120A22 v_mac_f32_e32 v12, s35, v5 ; 3E180A23 v_mac_f32_e32 v9, s38, v6 ; 3E120C26 v_mac_f32_e32 v12, s29, v6 ; 3E180C1D v_add_f32_e32 v9, s28, v9 ; 0612121C v_add_f32_e32 v12, s32, v12 ; 06181820 s_buffer_load_dword s28, s[0:3], 0x2 ; C20E0102 s_buffer_load_dword s29, s[0:3], 0x3 ; C20E8103 s_buffer_load_dword s31, s[0:3], 0x8 ; C20F8108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v13, s29 ; 7E1A021D v_mad_f32 v14, -v9, v13, s30 ; D282000E 207A1B09 v_mul_f32_e32 v4, s31, v4 ; 1008081F v_mac_f32_e32 v4, s33, v5 ; 3E080A21 v_mac_f32_e32 v4, s37, v6 ; 3E080C25 v_add_f32_e32 v4, s27, v4 ; 0608081B v_mad_f32 v5, -v4, v13, s36 ; D2820005 20921B04 v_mad_f32 v6, -v12, v13, s28 ; D2820006 20721B0C v_mul_f32_e32 v13, v5, v5 ; 101A0B05 v_mac_f32_e32 v13, v14, v14 ; 3E1A1D0E v_mac_f32_e32 v13, v6, v6 ; 3E1A0D06 v_rsq_clamp_f32_e32 v13, v13 ; 7E1A590D v_mul_f32_e32 v11, v8, v11 ; 10161708 v_mul_f32_e32 v7, v8, v7 ; 100E0F08 v_mul_f32_e32 v8, v8, v10 ; 10101508 v_mul_f32_e32 v5, v13, v5 ; 100A0B0D v_mul_f32_e32 v10, v13, v14 ; 10141D0D v_mul_f32_e32 v6, v13, v6 ; 100C0D0D v_mul_f32_e32 v5, v5, v11 ; 100A1705 v_mac_f32_e32 v5, v10, v7 ; 3E0A0F0A v_mac_f32_e32 v5, v6, v8 ; 3E0A1106 v_mad_f32 v5, -v5, v5, 1.0 ; D2820005 23CA0B05 v_sqrt_f32_e32 v5, v5 ; 7E0A6705 v_mul_f32_e32 v5, s4, v5 ; 100A0A04 v_mad_f32 v4, -v11, v5, v4 ; D2820004 24120B0B v_mad_f32 v6, -v7, v5, v9 ; D2820006 24260B07 v_mad_f32 v5, -v8, v5, v12 ; D2820005 24320B08 v_mul_f32_e32 v7, s22, v4 ; 100E0816 v_mul_f32_e32 v8, s23, v4 ; 10100817 v_mul_f32_e32 v9, s24, v4 ; 10120818 v_mul_f32_e32 v4, s25, v4 ; 10080819 v_mac_f32_e32 v7, s26, v6 ; 3E0E0C1A v_mac_f32_e32 v8, s17, v6 ; 3E100C11 v_mac_f32_e32 v9, s18, v6 ; 3E120C12 v_mac_f32_e32 v4, s19, v6 ; 3E080C13 v_mac_f32_e32 v7, s20, v5 ; 3E0E0A14 v_mac_f32_e32 v8, s21, v5 ; 3E100A15 v_mac_f32_e32 v9, s12, v5 ; 3E120A0C v_mac_f32_e32 v4, s13, v5 ; 3E080A0D v_add_f32_e32 v10, s14, v7 ; 06140E0E v_add_f32_e32 v11, s15, v8 ; 0616100F v_add_f32_e32 v12, s16, v9 ; 06181210 v_add_f32_e32 v13, s11, v4 ; 061A080B s_or_b64 exec, exec, s[6:7] ; 88FE067E v_rcp_f32_e32 v4, v13 ; 7E08550D v_sub_f32_e64 v5, 1.0, s9 ; D2080005 000012F2 v_mul_f32_e32 v4, s10, v4 ; 1008080A v_add_f32_e64 v4, 0, v4 clamp ; D2060804 00020880 v_add_f32_e32 v4, v4, v12 ; 06081904 v_max_f32_e64 v6, v4, -v13 ; D2200006 40021B04 v_mul_f32_e32 v4, v4, v5 ; 10080B04 v_mac_f32_e32 v4, s9, v6 ; 3E080C09 v_mac_f32_e32 v2, s8, v0 ; 3E040008 v_mac_f32_e32 v3, s5, v1 ; 3E060205 exp 15, 32, 0, 0, 0, v2, v3, v12, v13 ; F800020F 0D0C0302 exp 15, 12, 0, 1, 0, v10, v11, v4, v13 ; F80008CF 0D040B0A s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 48 VGPRS: 16 Code Size: 800 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], POSITION, LINEAR DCL IN[1], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 3D, FLOAT DCL CONST[3] DCL CONST[0] DCL TEMP[0] DCL TEMP[1..2], LOCAL IMM[0] FLT32 { 0.2500, 0.9375, 0.0100, 1.0000} IMM[1] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0], IN[0] 1: MAD TEMP[0].y, IN[0], CONST[3].xxxx, CONST[3].yyyy 2: MUL TEMP[1].xy, TEMP[0].xyyy, IMM[0].xxxx 3: MOV TEMP[2].xy, IN[1].xyyy 4: TEX TEMP[2].w, TEMP[2], SAMP[0], 2D 5: MUL TEMP[2].x, TEMP[2].wwww, CONST[0].wwww 6: MUL TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy 7: MOV TEMP[1].z, TEMP[2].xxxx 8: MOV TEMP[1].xyz, TEMP[1].xyzz 9: TEX TEMP[1].w, TEMP[1], SAMP[1], 3D 10: FSLT TEMP[1].x, TEMP[1].wwww, IMM[0].zzzz 11: AND TEMP[1].x, TEMP[1].xxxx, IMM[0].wwww 12: KILL_IF -TEMP[1].xxxx 13: MOV OUT[0], IMM[1].xxxx 14: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %27 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %28 = load <32 x i8>, <32 x i8> addrspace(2)* %27, align 32, !tbaa !0 %29 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %32 = bitcast <8 x i32> addrspace(2)* %31 to <32 x i8> addrspace(2)* %33 = load <32 x i8>, <32 x i8> addrspace(2)* %32, align 32, !tbaa !0 %34 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %35 = bitcast <4 x i32> addrspace(2)* %34 to <16 x i8> addrspace(2)* %36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0 %37 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %38 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %39 = fmul float %25, %15 %40 = fadd float %39, %26 %41 = fmul float %14, 2.500000e-01 %42 = fmul float %40, 2.500000e-01 %43 = bitcast float %37 to i32 %44 = bitcast float %38 to i32 %45 = insertelement <2 x i32> undef, i32 %43, i32 0 %46 = insertelement <2 x i32> %45, i32 %44, i32 1 %47 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %46, <32 x i8> %28, <16 x i8> %30, i32 2) %48 = extractelement <4 x float> %47, i32 3 %49 = fmul float %48, %24 %50 = fmul float %49, 9.375000e-01 %51 = bitcast float %41 to i32 %52 = bitcast float %42 to i32 %53 = bitcast float %50 to i32 %54 = insertelement <4 x i32> undef, i32 %51, i32 0 %55 = insertelement <4 x i32> %54, i32 %52, i32 1 %56 = insertelement <4 x i32> %55, i32 %53, i32 2 %57 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %56, <32 x i8> %33, <16 x i8> %36, i32 3) %58 = extractelement <4 x float> %57, i32 3 %59 = fcmp olt float %58, 0x3F847AE140000000 %60 = select i1 %59, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %60) %61 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00) %62 = bitcast i32 %61 to float %63 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00) %64 = bitcast i32 %63 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %62, float %64, float %62, float %64) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0xd ; C204010D s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700 s_buffer_load_dword s10, s[0:3], 0xc ; C205010C s_mov_b32 m0, s9 ; BEFC0309 s_buffer_load_dword s0, s[0:3], 0x3 ; C2000103 v_interp_p1_f32 v4, v0, 0, 0, [m0] ; C8100000 v_interp_p2_f32 v4, [v4], v1, 0, 0, [m0] ; C8110001 s_load_dwordx4 s[24:27], s[4:5], 0x4 ; C08C0504 s_load_dwordx8 s[28:35], s[6:7], 0x8 ; C0CE0708 v_interp_p1_f32 v5, v0, 1, 0, [m0] ; C8140100 v_interp_p2_f32 v5, [v5], v1, 1, 0, [m0] ; C8150101 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v0, 8, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[16:23], s[12:15] ; F0800800 00640004 v_mov_b32_e32 v1, s8 ; 7E020208 v_mac_f32_e32 v1, s10, v3 ; 3E02060A v_mov_b32_e32 v3, 0x3e800000 ; 7E0602FF 3E800000 v_mul_f32_e32 v4, v3, v2 ; 10080503 v_mul_f32_e32 v5, v3, v1 ; 100A0303 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, s0, v0 ; 10000000 v_mul_f32_e32 v6, 0x3f700000, v0 ; 100C00FF 3F700000 image_sample v0, 8, 0, 0, 0, 0, 0, 0, 0, v[4:7], s[28:35], s[24:27] ; F0800800 00C70004 v_mov_b32_e32 v1, 0x3c23d70a ; 7E0202FF 3C23D70A s_waitcnt vmcnt(0) ; BF8C0770 v_cmp_gt_f32_e32 vcc, v1, v0 ; 7C080101 v_cndmask_b32_e64 v0, 0, -1.0, vcc ; D2000000 01A9E680 v_cmpx_le_f32_e32 vcc, 0, v0 ; 7C260080 v_cvt_pkrtz_f16_f32_e64 v0, 0, 0 ; D25E0000 00010080 exp 15, 0, 1, 1, 1, v0, v0, v0, v0 ; F8001C0F 00000000 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 8 Code Size: 168 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL CONST[0..8] DCL TEMP[0..3], LOCAL 0: MUL TEMP[0], CONST[5], IN[0].xxxx 1: MAD TEMP[0], CONST[6], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[7], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[8], IN[0].wwww, TEMP[0] 4: MUL TEMP[1].x, TEMP[0].yyyy, CONST[0].xxxx 5: MUL TEMP[2], CONST[1], TEMP[0].xxxx 6: MAD TEMP[2], CONST[2], TEMP[1].xxxx, TEMP[2] 7: ADD TEMP[2], TEMP[2], -CONST[3] 8: ADD TEMP[2].xyz, TEMP[2], CONST[4] 9: MOV TEMP[3].xy, TEMP[2].xyxx 10: MOV TEMP[3].z, -TEMP[2].zzzz 11: MUL TEMP[2], CONST[1], TEMP[0].xxxx 12: MAD TEMP[1], CONST[2], TEMP[1].xxxx, TEMP[2] 13: ADD TEMP[1], TEMP[1], CONST[3] 14: ADD TEMP[1].z, TEMP[1], CONST[4] 15: MOV TEMP[3].w, -TEMP[1].zzzz 16: MOV TEMP[1].xy, IN[2].xyxx 17: MOV TEMP[1].zw, IN[1].yyxy 18: MOV TEMP[2].x, IN[1].zzzz 19: MOV OUT[3], TEMP[2] 20: MOV OUT[2], TEMP[1] 21: MOV OUT[1], TEMP[3] 22: MOV OUT[0], TEMP[0] 23: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %43 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0 %45 = add i32 %5, %7 %46 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %45) %47 = extractelement <4 x float> %46, i32 0 %48 = extractelement <4 x float> %46, i32 1 %49 = extractelement <4 x float> %46, i32 2 %50 = extractelement <4 x float> %46, i32 3 %51 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %52 = load <16 x i8>, <16 x i8> addrspace(2)* %51, align 16, !tbaa !0 %53 = add i32 %5, %7 %54 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %52, i32 0, i32 %53) %55 = extractelement <4 x float> %54, i32 0 %56 = extractelement <4 x float> %54, i32 1 %57 = extractelement <4 x float> %54, i32 2 %58 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %59 = load <16 x i8>, <16 x i8> addrspace(2)* %58, align 16, !tbaa !0 %60 = add i32 %5, %7 %61 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %59, i32 0, i32 %60) %62 = extractelement <4 x float> %61, i32 0 %63 = extractelement <4 x float> %61, i32 1 %64 = fmul float %27, %47 %65 = fmul float %28, %47 %66 = fmul float %29, %47 %67 = fmul float %30, %47 %68 = fmul float %31, %48 %69 = fadd float %68, %64 %70 = fmul float %32, %48 %71 = fadd float %70, %65 %72 = fmul float %33, %48 %73 = fadd float %72, %66 %74 = fmul float %34, %48 %75 = fadd float %74, %67 %76 = fmul float %35, %49 %77 = fadd float %76, %69 %78 = fmul float %36, %49 %79 = fadd float %78, %71 %80 = fmul float %37, %49 %81 = fadd float %80, %73 %82 = fmul float %38, %49 %83 = fadd float %82, %75 %84 = fmul float %39, %50 %85 = fadd float %84, %77 %86 = fmul float %40, %50 %87 = fadd float %86, %79 %88 = fmul float %41, %50 %89 = fadd float %88, %81 %90 = fmul float %42, %50 %91 = fadd float %90, %83 %92 = fmul float %87, %13 %93 = fmul float %14, %85 %94 = fmul float %15, %85 %95 = fmul float %16, %85 %96 = fmul float %18, %92 %97 = fadd float %96, %93 %98 = fmul float %19, %92 %99 = fadd float %98, %94 %100 = fmul float %20, %92 %101 = fadd float %100, %95 %102 = fsub float %97, %21 %103 = fsub float %99, %22 %104 = fsub float %101, %23 %105 = fadd float %102, %24 %106 = fadd float %103, %25 %107 = fadd float %104, %26 %108 = fsub float -0.000000e+00, %107 %109 = fmul float %15, %85 %110 = fmul float %16, %85 %111 = fmul float %17, %85 %112 = fmul float %20, %92 %113 = fadd float %112, %110 %114 = fadd float %113, %23 %115 = fadd float %114, %26 %116 = fsub float -0.000000e+00, %115 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %105, float %106, float %108, float %116) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %62, float %63, float %55, float %56) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %57, float %109, float %110, float %111) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %85, float %87, float %89, float %91) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0xe ; C204010E buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[8:11], v0, s[16:19], 0 idxen ; E00C2000 80040800 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_buffer_load_dword s5, s[0:3], 0x11 ; C2028111 s_buffer_load_dword s6, s[0:3], 0x12 ; C2030112 s_buffer_load_dword s7, s[0:3], 0x14 ; C2038114 s_buffer_load_dword s9, s[0:3], 0x15 ; C2048115 s_buffer_load_dword s10, s[0:3], 0x16 ; C2050116 s_buffer_load_dword s11, s[0:3], 0x17 ; C2058117 s_buffer_load_dword s12, s[0:3], 0x18 ; C2060118 s_buffer_load_dword s13, s[0:3], 0x19 ; C2068119 s_buffer_load_dword s14, s[0:3], 0x0 ; C2070100 s_buffer_load_dword s15, s[0:3], 0x4 ; C2078104 s_buffer_load_dword s16, s[0:3], 0x5 ; C2080105 s_buffer_load_dword s17, s[0:3], 0x6 ; C2088106 s_buffer_load_dword s18, s[0:3], 0x7 ; C2090107 s_buffer_load_dword s19, s[0:3], 0x8 ; C2098108 s_buffer_load_dword s20, s[0:3], 0x9 ; C20A0109 s_buffer_load_dword s21, s[0:3], 0xa ; C20A810A s_buffer_load_dword s22, s[0:3], 0xc ; C20B010C s_buffer_load_dword s23, s[0:3], 0xd ; C20B810D v_mov_b32_e32 v0, s8 ; 7E000208 s_waitcnt vmcnt(0) ; BF8C0770 v_mov_b32_e32 v10, s8 ; 7E140208 s_buffer_load_dword s8, s[0:3], 0x1c ; C204011C s_buffer_load_dword s24, s[0:3], 0x1d ; C20C011D s_buffer_load_dword s25, s[0:3], 0x1e ; C20C811E s_buffer_load_dword s26, s[0:3], 0x20 ; C20D0120 s_buffer_load_dword s27, s[0:3], 0x21 ; C20D8121 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v11, s7, v1 ; 10160207 v_mac_f32_e32 v11, s12, v2 ; 3E16040C v_mul_f32_e32 v12, s9, v1 ; 10180209 v_mac_f32_e32 v12, s13, v2 ; 3E18040D v_mac_f32_e32 v11, s8, v3 ; 3E160608 v_mac_f32_e32 v12, s24, v3 ; 3E180618 s_buffer_load_dword s7, s[0:3], 0x22 ; C2038122 s_buffer_load_dword s8, s[0:3], 0x23 ; C2040123 v_mac_f32_e32 v11, s26, v4 ; 3E16081A v_mac_f32_e32 v12, s27, v4 ; 3E18081B v_mov_b32_e32 v13, s22 ; 7E1A0216 v_mad_f32 v13, v11, s15, -v13 ; D282000D 84341F0B v_mul_f32_e32 v14, s14, v12 ; 101C180E v_mac_f32_e32 v13, s19, v14 ; 3E1A1C13 v_mov_b32_e32 v15, s23 ; 7E1E0217 v_mad_f32 v15, v11, s16, -v15 ; D282000F 843C210B v_mac_f32_e32 v15, s20, v14 ; 3E1E1C14 v_mad_f32 v10, v11, s17, -v10 ; D282000A 8428230B v_mac_f32_e32 v0, s17, v11 ; 3E001611 v_mac_f32_e32 v10, s21, v14 ; 3E141C15 v_mac_f32_e32 v0, s21, v14 ; 3E001C15 v_add_f32_e32 v13, s4, v13 ; 061A1A04 v_add_f32_e32 v14, s5, v15 ; 061C1E05 v_add_f32_e32 v10, s6, v10 ; 06141406 v_add_f32_e32 v0, s6, v0 ; 06000006 v_mov_b32_e32 v15, 0x80000000 ; 7E1E02FF 80000000 v_xor_b32_e32 v10, v10, v15 ; 3A141F0A v_xor_b32_e32 v0, v0, v15 ; 3A001F00 s_buffer_load_dword s4, s[0:3], 0x1a ; C202011A exp 15, 32, 0, 0, 0, v13, v14, v10, v0 ; F800020F 000A0E0D s_buffer_load_dword s5, s[0:3], 0x1b ; C202811B exp 15, 33, 0, 0, 0, v8, v9, v5, v6 ; F800021F 06050908 s_buffer_load_dword s0, s[0:3], 0x1f ; C200011F s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v0, s16, v11 ; 10001610 v_mul_f32_e32 v5, s17, v11 ; 100A1611 v_mul_f32_e32 v6, s18, v11 ; 100C1612 exp 15, 34, 0, 0, 0, v7, v0, v5, v6 ; F800022F 06050007 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s10, v1 ; 1000020A v_mac_f32_e32 v0, s4, v2 ; 3E000404 v_mul_f32_e32 v1, s11, v1 ; 1002020B v_mac_f32_e32 v1, s5, v2 ; 3E020405 v_mac_f32_e32 v0, s25, v3 ; 3E000619 v_mac_f32_e32 v1, s0, v3 ; 3E020600 v_mac_f32_e32 v0, s7, v4 ; 3E000807 v_mac_f32_e32 v1, s8, v4 ; 3E020808 exp 15, 12, 0, 1, 0, v11, v12, v0, v1 ; F80008CF 01000C0B s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 16 Code Size: 396 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], SHADOW2D, FLOAT DCL CONST[0..24] DCL CONST[26..29] DCL CONST[31] DCL TEMP[0..15], LOCAL IMM[0] FLT32 { 1.0000, 0.5000, -0.5000, 7.0000} IMM[1] FLT32 { 4.0000, 3.0000, 2.0000, -2.0000} IMM[2] FLT32 { 0.1429, 0.0069, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[1].zwzz 1: MOV TEMP[0].z, IN[2].xxxx 2: MOV TEMP[1].xy, IN[1].xyyy 3: TEX TEMP[1].x, TEMP[1], SAMP[0], 2D 4: MOV TEMP[2].xy, IN[0].xyxx 5: LRP TEMP[3].x, TEMP[1].xxxx, IN[0].wwww, IN[0].zzzz 6: MOV TEMP[2].z, TEMP[3].xxxx 7: MAD TEMP[3].x, CONST[0].xxxx, TEMP[1].xxxx, CONST[0].yyyy 8: RCP TEMP[3].x, TEMP[3].xxxx 9: LRP TEMP[1].x, CONST[1].wwww, TEMP[1].xxxx, TEMP[3].xxxx 10: MUL TEMP[1].xyz, TEMP[0].xyzz, TEMP[1].xxxx 11: LRP TEMP[0].xyz, CONST[1].wwww, TEMP[2].xyzz, TEMP[1].xyzz 12: MUL TEMP[1], CONST[26], TEMP[0].xxxx 13: MAD TEMP[1], CONST[27], TEMP[0].yyyy, TEMP[1] 14: MAD TEMP[0], CONST[28], TEMP[0].zzzz, TEMP[1] 15: ADD TEMP[0], TEMP[0], CONST[29] 16: ADD TEMP[1].xyz, TEMP[0].xyzz, -CONST[2].xyzz 17: ADD TEMP[2].xyz, TEMP[0].xyzz, -CONST[3].xyzz 18: ADD TEMP[3].xyz, TEMP[0].xyzz, -CONST[4].xyzz 19: ADD TEMP[4].xyz, TEMP[0].xyzz, -CONST[5].xyzz 20: DP3 TEMP[1].x, TEMP[1].xyzz, TEMP[1].xyzz 21: DP3 TEMP[2].x, TEMP[2].xyzz, TEMP[2].xyzz 22: MOV TEMP[1].y, TEMP[2].xxxx 23: DP3 TEMP[2].x, TEMP[3].xyzz, TEMP[3].xyzz 24: MOV TEMP[1].z, TEMP[2].xxxx 25: DP3 TEMP[2].x, TEMP[4].xyzz, TEMP[4].xyzz 26: MOV TEMP[1].w, TEMP[2].xxxx 27: FSLT TEMP[1], TEMP[1], CONST[6] 28: AND TEMP[1], TEMP[1], IMM[0].xxxx 29: ADD TEMP[2].xyz, TEMP[1].yzww, -TEMP[1].xyzz 30: MOV_SAT TEMP[2].xyz, TEMP[2].xyzz 31: MUL TEMP[3], CONST[19], TEMP[0].xxxx 32: MAD TEMP[3], CONST[20], TEMP[0].yyyy, TEMP[3] 33: MAD TEMP[3], CONST[21], TEMP[0].zzzz, TEMP[3] 34: MAD TEMP[3].xyz, CONST[22], TEMP[0].wwww, TEMP[3] 35: MUL TEMP[4], CONST[15], TEMP[0].xxxx 36: MAD TEMP[4], CONST[16], TEMP[0].yyyy, TEMP[4] 37: MAD TEMP[4], CONST[17], TEMP[0].zzzz, TEMP[4] 38: MAD TEMP[4].xyz, CONST[18], TEMP[0].wwww, TEMP[4] 39: MUL TEMP[5], CONST[11], TEMP[0].xxxx 40: MAD TEMP[5], CONST[12], TEMP[0].yyyy, TEMP[5] 41: MAD TEMP[5], CONST[13], TEMP[0].zzzz, TEMP[5] 42: MAD TEMP[5].xyz, CONST[14], TEMP[0].wwww, TEMP[5] 43: MUL TEMP[6], CONST[7], TEMP[0].xxxx 44: MAD TEMP[6], CONST[8], TEMP[0].yyyy, TEMP[6] 45: MAD TEMP[6], CONST[9], TEMP[0].zzzz, TEMP[6] 46: MAD TEMP[6].xyz, CONST[10], TEMP[0].wwww, TEMP[6] 47: MUL TEMP[6].xyz, TEMP[6].xyzz, TEMP[1].xxxx 48: MAD TEMP[5].xyz, TEMP[5].xyzz, TEMP[2].xxxx, TEMP[6].xyzz 49: MAD TEMP[4].xyz, TEMP[4].xyzz, TEMP[2].yyyy, TEMP[5].xyzz 50: MAD TEMP[1].xyz, TEMP[3].xyzz, TEMP[2].zzzz, TEMP[4].xyzz 51: MAD TEMP[2].xy, TEMP[1].xyyy, CONST[31].zwww, IMM[0].yyyy 52: FLR TEMP[3].xy, TEMP[2].xyyy 53: ADD TEMP[3].xy, TEMP[3].xyyy, IMM[0].zzzz 54: MUL TEMP[3].xy, TEMP[3].xyyy, CONST[31].xyyy 55: FRC TEMP[2].xy, TEMP[2].xyyy 56: MOV TEMP[4].y, IMM[0].wwww 57: MUL TEMP[5].x, IMM[1].yyyy, TEMP[2].xxxx 58: ADD TEMP[4].x, IMM[1].xxxx, -TEMP[5].xxxx 59: MAD TEMP[5].x, IMM[1].yyyy, TEMP[2].xxxx, IMM[0].xxxx 60: MOV TEMP[4].z, TEMP[5].xxxx 61: MUL TEMP[6].x, IMM[1].zzzz, TEMP[2].xxxx 62: ADD TEMP[6].x, IMM[1].yyyy, -TEMP[6].xxxx 63: RCP TEMP[7].x, TEMP[4].xxxx 64: MAD TEMP[6].x, TEMP[6].xxxx, TEMP[7].xxxx, IMM[1].wwww 65: ADD TEMP[7].x, IMM[1].yyyy, TEMP[2].xxxx 66: MUL TEMP[7].x, TEMP[7].xxxx, IMM[2].xxxx 67: MOV TEMP[6].y, TEMP[7].xxxx 68: RCP TEMP[5].x, TEMP[5].xxxx 69: MAD TEMP[5].x, TEMP[2].xxxx, TEMP[5].xxxx, IMM[1].zzzz 70: MOV TEMP[6].z, TEMP[5].xxxx 71: MUL TEMP[5].xyz, TEMP[6].xyzz, CONST[31].xxxx 72: MUL TEMP[7].x, IMM[1].yyyy, TEMP[2].yyyy 73: ADD TEMP[6].x, IMM[1].xxxx, -TEMP[7].xxxx 74: MAD TEMP[7].x, IMM[1].yyyy, TEMP[2].yyyy, IMM[0].xxxx 75: MUL TEMP[8].x, IMM[1].zzzz, TEMP[2].yyyy 76: ADD TEMP[8].x, IMM[1].yyyy, -TEMP[8].xxxx 77: RCP TEMP[9].x, TEMP[6].xxxx 78: MAD TEMP[8].x, TEMP[8].xxxx, TEMP[9].xxxx, IMM[1].wwww 79: ADD TEMP[9].x, IMM[1].yyyy, TEMP[2].yyyy 80: MUL TEMP[9].x, TEMP[9].xxxx, IMM[2].xxxx 81: MOV TEMP[8].y, TEMP[9].xxxx 82: RCP TEMP[9].x, TEMP[7].xxxx 83: MAD TEMP[2].x, TEMP[2].yyyy, TEMP[9].xxxx, IMM[1].zzzz 84: MOV TEMP[8].z, TEMP[2].xxxx 85: MUL TEMP[2].xyz, TEMP[8].xyzz, CONST[31].yyyy 86: MUL TEMP[6].xyz, TEMP[4].xyzz, TEMP[6].xxxx 87: MOV TEMP[8].x, TEMP[5].xxxx 88: MOV TEMP[8].y, TEMP[2].xxxx 89: ADD TEMP[8].xy, TEMP[3].xyyy, TEMP[8].xyyy 90: MOV TEMP[9].x, TEMP[5].yyyy 91: MOV TEMP[9].y, TEMP[2].xxxx 92: ADD TEMP[9].xy, TEMP[3].xyyy, TEMP[9].xyyy 93: MOV TEMP[10].x, TEMP[5].zzzz 94: MOV TEMP[10].y, TEMP[2].xxxx 95: ADD TEMP[10].xy, TEMP[3].xyyy, TEMP[10].xyyy 96: MUL TEMP[11].xyz, TEMP[4].xyzz, IMM[0].wwww 97: MOV TEMP[12].x, TEMP[5].xxxx 98: MOV TEMP[12].y, TEMP[2].yyyy 99: ADD TEMP[12].xy, TEMP[3].xyyy, TEMP[12].xyyy 100: MOV TEMP[13].x, TEMP[5].yyyy 101: MOV TEMP[13].y, TEMP[2].yyyy 102: ADD TEMP[13].xy, TEMP[3].xyyy, TEMP[13].xyyy 103: MOV TEMP[14].x, TEMP[5].zzzz 104: MOV TEMP[14].y, TEMP[2].yyyy 105: ADD TEMP[14].xy, TEMP[3].xyyy, TEMP[14].xyyy 106: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[7].xxxx 107: MOV TEMP[7].x, TEMP[5].xxxx 108: MOV TEMP[7].y, TEMP[2].zzzz 109: ADD TEMP[7].xy, TEMP[3].xyyy, TEMP[7].xyyy 110: MOV TEMP[15].x, TEMP[5].yyyy 111: MOV TEMP[15].y, TEMP[2].zzzz 112: ADD TEMP[15].xy, TEMP[3].xyyy, TEMP[15].xyyy 113: MOV TEMP[5].x, TEMP[5].zzzz 114: MOV TEMP[5].y, TEMP[2].zzzz 115: ADD TEMP[2].xy, TEMP[3].xyyy, TEMP[5].xyyy 116: ADD TEMP[0].xyz, TEMP[0].xyzz, -CONST[24].xyzz 117: MOV TEMP[2].xy, TEMP[2].xyyy 118: MOV TEMP[2].z, TEMP[1].zzzz 119: TEX TEMP[2].x, TEMP[2], SAMP[1], SHADOW2D 120: MOV TEMP[3].xy, TEMP[15].xyyy 121: MOV TEMP[3].z, TEMP[1].zzzz 122: TEX TEMP[3].x, TEMP[3], SAMP[1], SHADOW2D 123: MOV TEMP[5].xy, TEMP[7].xyyy 124: MOV TEMP[5].z, TEMP[1].zzzz 125: TEX TEMP[5].x, TEMP[5], SAMP[1], SHADOW2D 126: MOV TEMP[7].xy, TEMP[14].xyyy 127: MOV TEMP[7].z, TEMP[1].zzzz 128: TEX TEMP[7].x, TEMP[7], SAMP[1], SHADOW2D 129: MOV TEMP[13].xy, TEMP[13].xyyy 130: MOV TEMP[13].z, TEMP[1].zzzz 131: TEX TEMP[13].x, TEMP[13], SAMP[1], SHADOW2D 132: MOV TEMP[12].xy, TEMP[12].xyyy 133: MOV TEMP[12].z, TEMP[1].zzzz 134: TEX TEMP[12].x, TEMP[12], SAMP[1], SHADOW2D 135: MOV TEMP[10].xy, TEMP[10].xyyy 136: MOV TEMP[10].z, TEMP[1].zzzz 137: TEX TEMP[10].x, TEMP[10], SAMP[1], SHADOW2D 138: MOV TEMP[9].xy, TEMP[9].xyyy 139: MOV TEMP[9].z, TEMP[1].zzzz 140: TEX TEMP[9].x, TEMP[9], SAMP[1], SHADOW2D 141: MOV TEMP[8].xy, TEMP[8].xyyy 142: MOV TEMP[8].z, TEMP[1].zzzz 143: TEX TEMP[1].x, TEMP[8], SAMP[1], SHADOW2D 144: MUL TEMP[1].x, TEMP[6].xxxx, TEMP[1].xxxx 145: MAD TEMP[1].x, TEMP[6].yyyy, TEMP[9].xxxx, TEMP[1].xxxx 146: MAD TEMP[1].x, TEMP[6].zzzz, TEMP[10].xxxx, TEMP[1].xxxx 147: MAD TEMP[1].x, TEMP[11].xxxx, TEMP[12].xxxx, TEMP[1].xxxx 148: MAD TEMP[1].x, TEMP[11].yyyy, TEMP[13].xxxx, TEMP[1].xxxx 149: MAD TEMP[1].x, TEMP[11].zzzz, TEMP[7].xxxx, TEMP[1].xxxx 150: MAD TEMP[1].x, TEMP[4].xxxx, TEMP[5].xxxx, TEMP[1].xxxx 151: MAD TEMP[1].x, TEMP[4].yyyy, TEMP[3].xxxx, TEMP[1].xxxx 152: MAD TEMP[1].x, TEMP[4].zzzz, TEMP[2].xxxx, TEMP[1].xxxx 153: MUL TEMP[1].x, TEMP[1].xxxx, IMM[2].yyyy 154: LRP TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx, CONST[23].xxxx 155: DP3 TEMP[0].x, TEMP[0].xyzz, TEMP[0].xyzz 156: SQRT TEMP[0].x, TEMP[0].xxxx 157: MAD TEMP[0].x, TEMP[0].xxxx, CONST[23].zzzz, CONST[23].wwww 158: MOV_SAT TEMP[0].x, TEMP[0].xxxx 159: ADD TEMP[0].x, TEMP[1].xxxx, TEMP[0].xxxx 160: MOV OUT[0], TEMP[0].xxxx 161: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 184) %58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %60 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200) %61 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208) %62 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212) %63 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216) %64 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224) %65 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228) %66 = call float @llvm.SI.load.const(<16 x i8> %23, i32 232) %67 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240) %68 = call float @llvm.SI.load.const(<16 x i8> %23, i32 244) %69 = call float @llvm.SI.load.const(<16 x i8> %23, i32 248) %70 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256) %71 = call float @llvm.SI.load.const(<16 x i8> %23, i32 260) %72 = call float @llvm.SI.load.const(<16 x i8> %23, i32 264) %73 = call float @llvm.SI.load.const(<16 x i8> %23, i32 272) %74 = call float @llvm.SI.load.const(<16 x i8> %23, i32 276) %75 = call float @llvm.SI.load.const(<16 x i8> %23, i32 280) %76 = call float @llvm.SI.load.const(<16 x i8> %23, i32 288) %77 = call float @llvm.SI.load.const(<16 x i8> %23, i32 292) %78 = call float @llvm.SI.load.const(<16 x i8> %23, i32 296) %79 = call float @llvm.SI.load.const(<16 x i8> %23, i32 304) %80 = call float @llvm.SI.load.const(<16 x i8> %23, i32 308) %81 = call float @llvm.SI.load.const(<16 x i8> %23, i32 312) %82 = call float @llvm.SI.load.const(<16 x i8> %23, i32 320) %83 = call float @llvm.SI.load.const(<16 x i8> %23, i32 324) %84 = call float @llvm.SI.load.const(<16 x i8> %23, i32 328) %85 = call float @llvm.SI.load.const(<16 x i8> %23, i32 336) %86 = call float @llvm.SI.load.const(<16 x i8> %23, i32 340) %87 = call float @llvm.SI.load.const(<16 x i8> %23, i32 344) %88 = call float @llvm.SI.load.const(<16 x i8> %23, i32 352) %89 = call float @llvm.SI.load.const(<16 x i8> %23, i32 356) %90 = call float @llvm.SI.load.const(<16 x i8> %23, i32 360) %91 = call float @llvm.SI.load.const(<16 x i8> %23, i32 368) %92 = call float @llvm.SI.load.const(<16 x i8> %23, i32 376) %93 = call float @llvm.SI.load.const(<16 x i8> %23, i32 380) %94 = call float @llvm.SI.load.const(<16 x i8> %23, i32 384) %95 = call float @llvm.SI.load.const(<16 x i8> %23, i32 388) %96 = call float @llvm.SI.load.const(<16 x i8> %23, i32 392) %97 = call float @llvm.SI.load.const(<16 x i8> %23, i32 416) %98 = call float @llvm.SI.load.const(<16 x i8> %23, i32 420) %99 = call float @llvm.SI.load.const(<16 x i8> %23, i32 424) %100 = call float @llvm.SI.load.const(<16 x i8> %23, i32 428) %101 = call float @llvm.SI.load.const(<16 x i8> %23, i32 432) %102 = call float @llvm.SI.load.const(<16 x i8> %23, i32 436) %103 = call float @llvm.SI.load.const(<16 x i8> %23, i32 440) %104 = call float @llvm.SI.load.const(<16 x i8> %23, i32 444) %105 = call float @llvm.SI.load.const(<16 x i8> %23, i32 448) %106 = call float @llvm.SI.load.const(<16 x i8> %23, i32 452) %107 = call float @llvm.SI.load.const(<16 x i8> %23, i32 456) %108 = call float @llvm.SI.load.const(<16 x i8> %23, i32 460) %109 = call float @llvm.SI.load.const(<16 x i8> %23, i32 464) %110 = call float @llvm.SI.load.const(<16 x i8> %23, i32 468) %111 = call float @llvm.SI.load.const(<16 x i8> %23, i32 472) %112 = call float @llvm.SI.load.const(<16 x i8> %23, i32 476) %113 = call float @llvm.SI.load.const(<16 x i8> %23, i32 496) %114 = call float @llvm.SI.load.const(<16 x i8> %23, i32 500) %115 = call float @llvm.SI.load.const(<16 x i8> %23, i32 504) %116 = call float @llvm.SI.load.const(<16 x i8> %23, i32 508) %117 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %118 = load <32 x i8>, <32 x i8> addrspace(2)* %117, align 32, !tbaa !0 %119 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %120 = load <16 x i8>, <16 x i8> addrspace(2)* %119, align 16, !tbaa !0 %121 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %122 = load <8 x i32>, <8 x i32> addrspace(2)* %121, align 32, !tbaa !0 %123 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %124 = load <4 x i32>, <4 x i32> addrspace(2)* %123, align 16, !tbaa !0 %125 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %126 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %127 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %128 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %129 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %130 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %131 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %132 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %133 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %134 = bitcast float %129 to i32 %135 = bitcast float %130 to i32 %136 = insertelement <2 x i32> undef, i32 %134, i32 0 %137 = insertelement <2 x i32> %136, i32 %135, i32 1 %138 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %137, <32 x i8> %118, <16 x i8> %120, i32 2) %139 = extractelement <4 x float> %138, i32 0 %140 = call float @llvm.AMDGPU.lrp(float %139, float %128, float %127) %141 = fmul float %24, %139 %142 = fadd float %141, %25 %143 = fdiv float 1.000000e+00, %142 %144 = call float @llvm.AMDGPU.lrp(float %26, float %139, float %143) %145 = fmul float %131, %144 %146 = fmul float %132, %144 %147 = fmul float %133, %144 %148 = call float @llvm.AMDGPU.lrp(float %26, float %125, float %145) %149 = call float @llvm.AMDGPU.lrp(float %26, float %126, float %146) %150 = call float @llvm.AMDGPU.lrp(float %26, float %140, float %147) %151 = fmul float %97, %148 %152 = fmul float %98, %148 %153 = fmul float %99, %148 %154 = fmul float %100, %148 %155 = fmul float %101, %149 %156 = fadd float %155, %151 %157 = fmul float %102, %149 %158 = fadd float %157, %152 %159 = fmul float %103, %149 %160 = fadd float %159, %153 %161 = fmul float %104, %149 %162 = fadd float %161, %154 %163 = fmul float %105, %150 %164 = fadd float %163, %156 %165 = fmul float %106, %150 %166 = fadd float %165, %158 %167 = fmul float %107, %150 %168 = fadd float %167, %160 %169 = fmul float %108, %150 %170 = fadd float %169, %162 %171 = fadd float %164, %109 %172 = fadd float %166, %110 %173 = fadd float %168, %111 %174 = fadd float %170, %112 %175 = fsub float %171, %27 %176 = fsub float %172, %28 %177 = fsub float %173, %29 %178 = fsub float %171, %30 %179 = fsub float %172, %31 %180 = fsub float %173, %32 %181 = fsub float %171, %33 %182 = fsub float %172, %34 %183 = fsub float %173, %35 %184 = fsub float %171, %36 %185 = fsub float %172, %37 %186 = fsub float %173, %38 %187 = fmul float %175, %175 %188 = fmul float %176, %176 %189 = fadd float %188, %187 %190 = fmul float %177, %177 %191 = fadd float %189, %190 %192 = fmul float %178, %178 %193 = fmul float %179, %179 %194 = fadd float %193, %192 %195 = fmul float %180, %180 %196 = fadd float %194, %195 %197 = fmul float %181, %181 %198 = fmul float %182, %182 %199 = fadd float %198, %197 %200 = fmul float %183, %183 %201 = fadd float %199, %200 %202 = fmul float %184, %184 %203 = fmul float %185, %185 %204 = fadd float %203, %202 %205 = fmul float %186, %186 %206 = fadd float %204, %205 %207 = fcmp olt float %191, %39 %208 = fcmp olt float %196, %40 %209 = fcmp olt float %201, %41 %210 = fcmp olt float %206, %42 %211 = select i1 %207, float 1.000000e+00, float 0.000000e+00 %212 = select i1 %208, float 1.000000e+00, float 0.000000e+00 %213 = select i1 %209, float 1.000000e+00, float 0.000000e+00 %214 = select i1 %210, float 1.000000e+00, float 0.000000e+00 %215 = fsub float %212, %211 %216 = fsub float %213, %212 %217 = fsub float %214, %213 %218 = call float @llvm.AMDIL.clamp.(float %215, float 0.000000e+00, float 1.000000e+00) %219 = call float @llvm.AMDIL.clamp.(float %216, float 0.000000e+00, float 1.000000e+00) %220 = call float @llvm.AMDIL.clamp.(float %217, float 0.000000e+00, float 1.000000e+00) %221 = fmul float %79, %171 %222 = fmul float %80, %171 %223 = fmul float %81, %171 %224 = fmul float %82, %172 %225 = fadd float %224, %221 %226 = fmul float %83, %172 %227 = fadd float %226, %222 %228 = fmul float %84, %172 %229 = fadd float %228, %223 %230 = fmul float %85, %173 %231 = fadd float %230, %225 %232 = fmul float %86, %173 %233 = fadd float %232, %227 %234 = fmul float %87, %173 %235 = fadd float %234, %229 %236 = fmul float %88, %174 %237 = fadd float %236, %231 %238 = fmul float %89, %174 %239 = fadd float %238, %233 %240 = fmul float %90, %174 %241 = fadd float %240, %235 %242 = fmul float %67, %171 %243 = fmul float %68, %171 %244 = fmul float %69, %171 %245 = fmul float %70, %172 %246 = fadd float %245, %242 %247 = fmul float %71, %172 %248 = fadd float %247, %243 %249 = fmul float %72, %172 %250 = fadd float %249, %244 %251 = fmul float %73, %173 %252 = fadd float %251, %246 %253 = fmul float %74, %173 %254 = fadd float %253, %248 %255 = fmul float %75, %173 %256 = fadd float %255, %250 %257 = fmul float %76, %174 %258 = fadd float %257, %252 %259 = fmul float %77, %174 %260 = fadd float %259, %254 %261 = fmul float %78, %174 %262 = fadd float %261, %256 %263 = fmul float %55, %171 %264 = fmul float %56, %171 %265 = fmul float %57, %171 %266 = fmul float %58, %172 %267 = fadd float %266, %263 %268 = fmul float %59, %172 %269 = fadd float %268, %264 %270 = fmul float %60, %172 %271 = fadd float %270, %265 %272 = fmul float %61, %173 %273 = fadd float %272, %267 %274 = fmul float %62, %173 %275 = fadd float %274, %269 %276 = fmul float %63, %173 %277 = fadd float %276, %271 %278 = fmul float %64, %174 %279 = fadd float %278, %273 %280 = fmul float %65, %174 %281 = fadd float %280, %275 %282 = fmul float %66, %174 %283 = fadd float %282, %277 %284 = fmul float %43, %171 %285 = fmul float %44, %171 %286 = fmul float %45, %171 %287 = fmul float %46, %172 %288 = fadd float %287, %284 %289 = fmul float %47, %172 %290 = fadd float %289, %285 %291 = fmul float %48, %172 %292 = fadd float %291, %286 %293 = fmul float %49, %173 %294 = fadd float %293, %288 %295 = fmul float %50, %173 %296 = fadd float %295, %290 %297 = fmul float %51, %173 %298 = fadd float %297, %292 %299 = fmul float %52, %174 %300 = fadd float %299, %294 %301 = fmul float %53, %174 %302 = fadd float %301, %296 %303 = fmul float %54, %174 %304 = fadd float %303, %298 %305 = fmul float %300, %211 %306 = fmul float %302, %211 %307 = fmul float %304, %211 %308 = fmul float %279, %218 %309 = fadd float %308, %305 %310 = fmul float %281, %218 %311 = fadd float %310, %306 %312 = fmul float %283, %218 %313 = fadd float %312, %307 %314 = fmul float %258, %219 %315 = fadd float %314, %309 %316 = fmul float %260, %219 %317 = fadd float %316, %311 %318 = fmul float %262, %219 %319 = fadd float %318, %313 %320 = fmul float %237, %220 %321 = fadd float %320, %315 %322 = fmul float %239, %220 %323 = fadd float %322, %317 %324 = fmul float %241, %220 %325 = fadd float %324, %319 %326 = fmul float %321, %115 %327 = fadd float %326, 5.000000e-01 %328 = fmul float %323, %116 %329 = fadd float %328, 5.000000e-01 %330 = call float @llvm.floor.f32(float %327) %331 = call float @llvm.floor.f32(float %329) %332 = fadd float %330, -5.000000e-01 %333 = fadd float %331, -5.000000e-01 %334 = fmul float %332, %113 %335 = fmul float %333, %114 %336 = call float @llvm.floor.f32(float %327) %337 = fsub float %327, %336 %338 = call float @llvm.floor.f32(float %329) %339 = fsub float %329, %338 %340 = fmul float %337, 3.000000e+00 %341 = fsub float 4.000000e+00, %340 %342 = fmul float %337, 3.000000e+00 %343 = fadd float %342, 1.000000e+00 %344 = fmul float %337, 2.000000e+00 %345 = fsub float 3.000000e+00, %344 %346 = fdiv float 1.000000e+00, %341 %347 = fmul float %345, %346 %348 = fadd float %347, -2.000000e+00 %349 = fadd float %337, 3.000000e+00 %350 = fmul float %349, 0x3FC24924A0000000 %351 = fdiv float 1.000000e+00, %343 %352 = fmul float %337, %351 %353 = fadd float %352, 2.000000e+00 %354 = fmul float %348, %113 %355 = fmul float %350, %113 %356 = fmul float %353, %113 %357 = fmul float %339, 3.000000e+00 %358 = fsub float 4.000000e+00, %357 %359 = fmul float %339, 3.000000e+00 %360 = fadd float %359, 1.000000e+00 %361 = fmul float %339, 2.000000e+00 %362 = fsub float 3.000000e+00, %361 %363 = fdiv float 1.000000e+00, %358 %364 = fmul float %362, %363 %365 = fadd float %364, -2.000000e+00 %366 = fadd float %339, 3.000000e+00 %367 = fmul float %366, 0x3FC24924A0000000 %368 = fdiv float 1.000000e+00, %360 %369 = fmul float %339, %368 %370 = fadd float %369, 2.000000e+00 %371 = fmul float %365, %114 %372 = fmul float %367, %114 %373 = fmul float %370, %114 %374 = fmul float %341, %358 %375 = fmul float %358, 7.000000e+00 %376 = fmul float %343, %358 %377 = fadd float %334, %354 %378 = fadd float %335, %371 %379 = fadd float %334, %355 %380 = fadd float %335, %371 %381 = fadd float %334, %356 %382 = fadd float %335, %371 %383 = fmul float %341, 7.000000e+00 %384 = fmul float %343, 7.000000e+00 %385 = fadd float %334, %354 %386 = fadd float %335, %372 %387 = fadd float %334, %355 %388 = fadd float %335, %372 %389 = fadd float %334, %356 %390 = fadd float %335, %372 %391 = fmul float %341, %360 %392 = fmul float %360, 7.000000e+00 %393 = fmul float %343, %360 %394 = fadd float %334, %354 %395 = fadd float %335, %373 %396 = fadd float %334, %355 %397 = fadd float %335, %373 %398 = fadd float %334, %356 %399 = fadd float %335, %373 %400 = fsub float %171, %94 %401 = fsub float %172, %95 %402 = fsub float %173, %96 %403 = bitcast float %325 to i32 %404 = bitcast float %398 to i32 %405 = bitcast float %399 to i32 %406 = insertelement <4 x i32> undef, i32 %403, i32 0 %407 = insertelement <4 x i32> %406, i32 %404, i32 1 %408 = insertelement <4 x i32> %407, i32 %405, i32 2 %409 = bitcast <8 x i32> %122 to <32 x i8> %410 = bitcast <4 x i32> %124 to <16 x i8> %411 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %408, <32 x i8> %409, <16 x i8> %410, i32 7) %412 = extractelement <4 x float> %411, i32 0 %413 = bitcast float %325 to i32 %414 = bitcast float %396 to i32 %415 = bitcast float %397 to i32 %416 = insertelement <4 x i32> undef, i32 %413, i32 0 %417 = insertelement <4 x i32> %416, i32 %414, i32 1 %418 = insertelement <4 x i32> %417, i32 %415, i32 2 %419 = bitcast <8 x i32> %122 to <32 x i8> %420 = bitcast <4 x i32> %124 to <16 x i8> %421 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %418, <32 x i8> %419, <16 x i8> %420, i32 7) %422 = extractelement <4 x float> %421, i32 0 %423 = bitcast float %325 to i32 %424 = bitcast float %394 to i32 %425 = bitcast float %395 to i32 %426 = insertelement <4 x i32> undef, i32 %423, i32 0 %427 = insertelement <4 x i32> %426, i32 %424, i32 1 %428 = insertelement <4 x i32> %427, i32 %425, i32 2 %429 = bitcast <8 x i32> %122 to <32 x i8> %430 = bitcast <4 x i32> %124 to <16 x i8> %431 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %428, <32 x i8> %429, <16 x i8> %430, i32 7) %432 = extractelement <4 x float> %431, i32 0 %433 = bitcast float %325 to i32 %434 = bitcast float %389 to i32 %435 = bitcast float %390 to i32 %436 = insertelement <4 x i32> undef, i32 %433, i32 0 %437 = insertelement <4 x i32> %436, i32 %434, i32 1 %438 = insertelement <4 x i32> %437, i32 %435, i32 2 %439 = bitcast <8 x i32> %122 to <32 x i8> %440 = bitcast <4 x i32> %124 to <16 x i8> %441 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %438, <32 x i8> %439, <16 x i8> %440, i32 7) %442 = extractelement <4 x float> %441, i32 0 %443 = bitcast float %325 to i32 %444 = bitcast float %387 to i32 %445 = bitcast float %388 to i32 %446 = insertelement <4 x i32> undef, i32 %443, i32 0 %447 = insertelement <4 x i32> %446, i32 %444, i32 1 %448 = insertelement <4 x i32> %447, i32 %445, i32 2 %449 = bitcast <8 x i32> %122 to <32 x i8> %450 = bitcast <4 x i32> %124 to <16 x i8> %451 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %448, <32 x i8> %449, <16 x i8> %450, i32 7) %452 = extractelement <4 x float> %451, i32 0 %453 = bitcast float %325 to i32 %454 = bitcast float %385 to i32 %455 = bitcast float %386 to i32 %456 = insertelement <4 x i32> undef, i32 %453, i32 0 %457 = insertelement <4 x i32> %456, i32 %454, i32 1 %458 = insertelement <4 x i32> %457, i32 %455, i32 2 %459 = bitcast <8 x i32> %122 to <32 x i8> %460 = bitcast <4 x i32> %124 to <16 x i8> %461 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %458, <32 x i8> %459, <16 x i8> %460, i32 7) %462 = extractelement <4 x float> %461, i32 0 %463 = bitcast float %325 to i32 %464 = bitcast float %381 to i32 %465 = bitcast float %382 to i32 %466 = insertelement <4 x i32> undef, i32 %463, i32 0 %467 = insertelement <4 x i32> %466, i32 %464, i32 1 %468 = insertelement <4 x i32> %467, i32 %465, i32 2 %469 = bitcast <8 x i32> %122 to <32 x i8> %470 = bitcast <4 x i32> %124 to <16 x i8> %471 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %468, <32 x i8> %469, <16 x i8> %470, i32 7) %472 = extractelement <4 x float> %471, i32 0 %473 = bitcast float %325 to i32 %474 = bitcast float %379 to i32 %475 = bitcast float %380 to i32 %476 = insertelement <4 x i32> undef, i32 %473, i32 0 %477 = insertelement <4 x i32> %476, i32 %474, i32 1 %478 = insertelement <4 x i32> %477, i32 %475, i32 2 %479 = bitcast <8 x i32> %122 to <32 x i8> %480 = bitcast <4 x i32> %124 to <16 x i8> %481 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %478, <32 x i8> %479, <16 x i8> %480, i32 7) %482 = extractelement <4 x float> %481, i32 0 %483 = bitcast float %325 to i32 %484 = bitcast float %377 to i32 %485 = bitcast float %378 to i32 %486 = insertelement <4 x i32> undef, i32 %483, i32 0 %487 = insertelement <4 x i32> %486, i32 %484, i32 1 %488 = insertelement <4 x i32> %487, i32 %485, i32 2 %489 = bitcast <8 x i32> %122 to <32 x i8> %490 = bitcast <4 x i32> %124 to <16 x i8> %491 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %488, <32 x i8> %489, <16 x i8> %490, i32 7) %492 = extractelement <4 x float> %491, i32 0 %493 = fmul float %374, %492 %494 = fmul float %375, %482 %495 = fadd float %494, %493 %496 = fmul float %376, %472 %497 = fadd float %496, %495 %498 = fmul float %383, %462 %499 = fadd float %498, %497 %500 = fmul float %452, 4.900000e+01 %501 = fadd float %500, %499 %502 = fmul float %384, %442 %503 = fadd float %502, %501 %504 = fmul float %391, %432 %505 = fadd float %504, %503 %506 = fmul float %392, %422 %507 = fadd float %506, %505 %508 = fmul float %393, %412 %509 = fadd float %508, %507 %510 = fmul float %509, 0x3F7C71C720000000 %511 = call float @llvm.AMDGPU.lrp(float %510, float 1.000000e+00, float %91) %512 = fmul float %400, %400 %513 = fmul float %401, %401 %514 = fadd float %513, %512 %515 = fmul float %402, %402 %516 = fadd float %514, %515 %517 = call float @llvm.sqrt.f32(float %516) %518 = fmul float %517, %92 %519 = fadd float %518, %93 %520 = call float @llvm.AMDIL.clamp.(float %519, float 0.000000e+00, float 1.000000e+00) %521 = fadd float %511, %520 %522 = call i32 @llvm.SI.packf16(float %521, float %521) %523 = bitcast i32 %522 to float %524 = call i32 @llvm.SI.packf16(float %521, float %521) %525 = bitcast i32 %524 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %523, float %525, float %523, float %525) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.floor.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b64 s[16:17], s[6:7] ; BE900406 s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[76:79], s[4:5], 0x0 ; C0A60500 s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s74, s[0:3], 0x0 ; C2250100 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_buffer_load_dword s75, s[0:3], 0x7 ; C2258107 s_buffer_load_dword s33, s[0:3], 0x8 ; C2108108 s_buffer_load_dword s34, s[0:3], 0x9 ; C2110109 s_buffer_load_dword s32, s[0:3], 0xa ; C210010A s_buffer_load_dword s30, s[0:3], 0xc ; C20F010C s_buffer_load_dword s31, s[0:3], 0xd ; C20F810D s_buffer_load_dword s5, s[0:3], 0xe ; C202810E s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v25, s5, 11 ; 04331605 s_buffer_load_dword s5, s[0:3], 0x10 ; C2028110 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v25, s5, 7 ; 04330E05 s_buffer_load_dword s5, s[0:3], 0x11 ; C2028111 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v25, s5, 9 ; 04331205 v_mov_b32_e32 v6, s4 ; 7E0C0204 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v25, s4, 8 ; 04331004 v_sub_f32_e64 v7, 1.0, s75 ; D2080007 000096F2 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v25, s4, 3 ; 04330604 s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v25, s4, 5 ; 04330A04 s_buffer_load_dword s4, s[0:3], 0x16 ; C2020116 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v25, s4, 4 ; 04330804 s_buffer_load_dword s36, s[0:3], 0x18 ; C2120118 s_buffer_load_dword s37, s[0:3], 0x19 ; C2128119 s_buffer_load_dword s4, s[0:3], 0x1a ; C202011A s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v25, s4, 10 ; 04331404 s_buffer_load_dword s4, s[0:3], 0x1b ; C202011B s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v25, s4, 6 ; 04330C04 s_buffer_load_dword s48, s[0:3], 0x1c ; C218011C s_buffer_load_dword s49, s[0:3], 0x1d ; C218811D s_buffer_load_dword s42, s[0:3], 0x1e ; C215011E s_buffer_load_dword s47, s[0:3], 0x20 ; C2178120 s_buffer_load_dword s46, s[0:3], 0x21 ; C2170121 s_buffer_load_dword s45, s[0:3], 0x22 ; C2168122 s_buffer_load_dword s43, s[0:3], 0x24 ; C2158124 s_buffer_load_dword s40, s[0:3], 0x25 ; C2140125 s_buffer_load_dword s39, s[0:3], 0x26 ; C2138126 s_buffer_load_dword s38, s[0:3], 0x28 ; C2130128 v_interp_p1_f32 v8, v0, 0, 1, [m0] ; C8200400 v_interp_p2_f32 v8, [v8], v1, 0, 1, [m0] ; C8210401 v_interp_p1_f32 v9, v0, 1, 1, [m0] ; C8240500 v_interp_p2_f32 v9, [v9], v1, 1, 1, [m0] ; C8250501 v_interp_p1_f32 v10, v0, 2, 1, [m0] ; C8280600 v_interp_p2_f32 v10, [v10], v1, 2, 1, [m0] ; C8290601 v_interp_p1_f32 v11, v0, 3, 1, [m0] ; C82C0700 v_interp_p2_f32 v11, [v11], v1, 3, 1, [m0] ; C82D0701 v_interp_p1_f32 v0, v0, 0, 2, [m0] ; C8000800 v_interp_p2_f32 v0, [v0], v1, 0, 2, [m0] ; C8010801 s_buffer_load_dword s44, s[0:3], 0x29 ; C2160129 s_buffer_load_dword s41, s[0:3], 0x2a ; C214812A s_buffer_load_dword s61, s[0:3], 0x2c ; C21E812C s_buffer_load_dword s60, s[0:3], 0x2d ; C21E012D s_buffer_load_dword s59, s[0:3], 0x2e ; C21D812E s_buffer_load_dword s58, s[0:3], 0x30 ; C21D0130 s_buffer_load_dword s57, s[0:3], 0x31 ; C21C8131 s_buffer_load_dword s56, s[0:3], 0x32 ; C21C0132 s_buffer_load_dword s55, s[0:3], 0x34 ; C21B8134 s_buffer_load_dword s54, s[0:3], 0x35 ; C21B0135 s_buffer_load_dword s53, s[0:3], 0x36 ; C21A8136 s_buffer_load_dword s52, s[0:3], 0x38 ; C21A0138 s_buffer_load_dword s51, s[0:3], 0x39 ; C2198139 s_buffer_load_dword s50, s[0:3], 0x3a ; C219013A s_buffer_load_dword s71, s[0:3], 0x3c ; C223813C s_buffer_load_dword s72, s[0:3], 0x3d ; C224013D s_buffer_load_dword s73, s[0:3], 0x3e ; C224813E s_buffer_load_dword s70, s[0:3], 0x40 ; C2230140 s_buffer_load_dword s69, s[0:3], 0x41 ; C2228141 s_buffer_load_dword s68, s[0:3], 0x42 ; C2220142 s_buffer_load_dword s67, s[0:3], 0x44 ; C2218144 s_buffer_load_dword s66, s[0:3], 0x45 ; C2210145 s_buffer_load_dword s65, s[0:3], 0x46 ; C2208146 s_buffer_load_dword s64, s[0:3], 0x48 ; C2200148 s_buffer_load_dword s63, s[0:3], 0x49 ; C21F8149 s_buffer_load_dword s62, s[0:3], 0x4a ; C21F014A s_buffer_load_dword s80, s[0:3], 0x4c ; C228014C s_buffer_load_dword s81, s[0:3], 0x4d ; C228814D s_buffer_load_dword s82, s[0:3], 0x4e ; C229014E s_buffer_load_dword s83, s[0:3], 0x50 ; C2298150 s_buffer_load_dword s84, s[0:3], 0x51 ; C22A0151 s_buffer_load_dword s85, s[0:3], 0x52 ; C22A8152 s_buffer_load_dword s86, s[0:3], 0x54 ; C22B0154 s_buffer_load_dword s87, s[0:3], 0x55 ; C22B8155 s_buffer_load_dword s88, s[0:3], 0x56 ; C22C0156 s_buffer_load_dword s89, s[0:3], 0x58 ; C22C8158 s_buffer_load_dword s90, s[0:3], 0x59 ; C22D0159 s_buffer_load_dword s91, s[0:3], 0x5a ; C22D815A s_buffer_load_dword s4, s[0:3], 0x5c ; C202015C s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v25, s4, 0 ; 04330004 s_buffer_load_dword s4, s[0:3], 0x5e ; C202015E s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v25, s4, 1 ; 04330204 s_buffer_load_dword s4, s[0:3], 0x5f ; C202015F s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v25, s4, 2 ; 04330404 s_buffer_load_dword s92, s[0:3], 0x6a ; C22E016A s_buffer_load_dword s93, s[0:3], 0x6b ; C22E816B s_buffer_load_dword s94, s[0:3], 0x6c ; C22F016C s_buffer_load_dword s95, s[0:3], 0x6d ; C22F816D s_buffer_load_dword s96, s[0:3], 0x6e ; C230016E s_buffer_load_dword s97, s[0:3], 0x6f ; C230816F s_buffer_load_dword s98, s[0:3], 0x70 ; C2310170 s_buffer_load_dword s99, s[0:3], 0x71 ; C2318171 s_buffer_load_dword s100, s[0:3], 0x72 ; C2320172 s_buffer_load_dword s101, s[0:3], 0x73 ; C2328173 s_buffer_load_dword s24, s[0:3], 0x74 ; C20C0174 s_buffer_load_dword s25, s[0:3], 0x75 ; C20C8175 s_buffer_load_dword s26, s[0:3], 0x76 ; C20D0176 s_buffer_load_dword s27, s[0:3], 0x77 ; C20D8177 s_buffer_load_dword s35, s[0:3], 0x7c ; C211817C s_buffer_load_dword s28, s[0:3], 0x68 ; C20E0168 s_buffer_load_dword s29, s[0:3], 0x69 ; C20E8169 s_load_dwordx8 s[4:11], s[16:17], 0x0 ; C0C21100 s_load_dwordx8 s[16:23], s[16:17], 0x8 ; C0C81108 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v1, 1, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[4:11], s[76:79] ; F0800100 02610108 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v6, s74, v1 ; 3E0C024A v_rcp_f32_e32 v6, v6 ; 7E0C5506 v_sub_f32_e32 v8, 1.0, v1 ; 081002F2 v_mul_f32_e32 v4, v4, v8 ; 10081104 v_mac_f32_e32 v4, v5, v1 ; 3E080305 v_mul_f32_e32 v5, v6, v7 ; 100A0F06 v_mac_f32_e32 v5, s75, v1 ; 3E0A024B v_mul_f32_e32 v1, v5, v10 ; 10021505 v_mul_f32_e32 v6, v5, v11 ; 100C1705 v_mul_f32_e32 v0, v5, v0 ; 10000105 v_mul_f32_e32 v1, v1, v7 ; 10020F01 v_mac_f32_e32 v1, s75, v2 ; 3E02044B v_mul_f32_e32 v2, v6, v7 ; 10040F06 v_mac_f32_e32 v2, s75, v3 ; 3E04064B v_mul_f32_e32 v0, v0, v7 ; 10000F00 v_mac_f32_e32 v0, s75, v4 ; 3E00084B v_mul_f32_e32 v3, s28, v1 ; 1006021C v_mul_f32_e32 v4, s29, v1 ; 1008021D v_mul_f32_e32 v5, s92, v1 ; 100A025C v_mul_f32_e32 v1, s93, v1 ; 1002025D v_mac_f32_e32 v3, s94, v2 ; 3E06045E v_mac_f32_e32 v4, s95, v2 ; 3E08045F v_mac_f32_e32 v5, s96, v2 ; 3E0A0460 v_mac_f32_e32 v1, s97, v2 ; 3E020461 v_mac_f32_e32 v3, s98, v0 ; 3E060062 v_mac_f32_e32 v4, s99, v0 ; 3E080063 v_mac_f32_e32 v5, s100, v0 ; 3E0A0064 v_mac_f32_e32 v1, s101, v0 ; 3E020065 v_add_f32_e32 v0, s24, v3 ; 06000618 v_add_f32_e32 v2, s25, v4 ; 06040819 v_add_f32_e32 v3, s26, v5 ; 06060A1A v_add_f32_e32 v1, s27, v1 ; 0602021B v_mul_f32_e32 v4, s80, v0 ; 10080050 v_mul_f32_e32 v5, s81, v0 ; 100A0051 v_mul_f32_e32 v6, s82, v0 ; 100C0052 v_mac_f32_e32 v4, s83, v2 ; 3E080453 v_mac_f32_e32 v5, s84, v2 ; 3E0A0454 v_mac_f32_e32 v6, s85, v2 ; 3E0C0455 v_mac_f32_e32 v4, s86, v3 ; 3E080656 v_mac_f32_e32 v5, s87, v3 ; 3E0A0657 v_mac_f32_e32 v6, s88, v3 ; 3E0C0658 v_mac_f32_e32 v4, s89, v1 ; 3E080259 v_mac_f32_e32 v5, s90, v1 ; 3E0A025A v_mac_f32_e32 v6, s91, v1 ; 3E0C025B v_mul_f32_e32 v7, s71, v0 ; 100E0047 v_mul_f32_e32 v8, s72, v0 ; 10100048 v_mul_f32_e32 v9, s73, v0 ; 10120049 v_mac_f32_e32 v7, s70, v2 ; 3E0E0446 v_mac_f32_e32 v8, s69, v2 ; 3E100445 v_mac_f32_e32 v9, s68, v2 ; 3E120444 v_mac_f32_e32 v7, s67, v3 ; 3E0E0643 v_mac_f32_e32 v8, s66, v3 ; 3E100642 v_mac_f32_e32 v9, s65, v3 ; 3E120641 v_mac_f32_e32 v7, s64, v1 ; 3E0E0240 v_mac_f32_e32 v8, s63, v1 ; 3E10023F v_mac_f32_e32 v9, s62, v1 ; 3E12023E v_mul_f32_e32 v10, s61, v0 ; 1014003D v_mul_f32_e32 v11, s60, v0 ; 1016003C v_mul_f32_e32 v12, s59, v0 ; 1018003B v_mac_f32_e32 v10, s58, v2 ; 3E14043A v_mac_f32_e32 v11, s57, v2 ; 3E160439 v_mac_f32_e32 v12, s56, v2 ; 3E180438 v_mac_f32_e32 v10, s55, v3 ; 3E140637 v_mac_f32_e32 v11, s54, v3 ; 3E160636 v_mac_f32_e32 v12, s53, v3 ; 3E180635 v_mac_f32_e32 v10, s52, v1 ; 3E140234 v_mac_f32_e32 v11, s51, v1 ; 3E160233 v_mac_f32_e32 v12, s50, v1 ; 3E180232 v_mul_f32_e32 v13, s48, v0 ; 101A0030 v_mul_f32_e32 v14, s49, v0 ; 101C0031 v_mac_f32_e32 v13, s47, v2 ; 3E1A042F v_mac_f32_e32 v14, s46, v2 ; 3E1C042E v_mul_f32_e32 v15, s42, v0 ; 101E002A v_mac_f32_e32 v15, s45, v2 ; 3E1E042D v_mac_f32_e32 v13, s43, v3 ; 3E1A062B v_mac_f32_e32 v14, s40, v3 ; 3E1C0628 v_mac_f32_e32 v15, s39, v3 ; 3E1E0627 v_mac_f32_e32 v13, s38, v1 ; 3E1A0226 v_mac_f32_e32 v14, s44, v1 ; 3E1C022C v_mac_f32_e32 v15, s41, v1 ; 3E1E0229 v_subrev_f32_e32 v1, s33, v0 ; 0A020021 v_subrev_f32_e32 v16, s34, v2 ; 0A200422 v_mul_f32_e32 v1, v1, v1 ; 10020301 v_mac_f32_e32 v1, v16, v16 ; 3E022110 v_subrev_f32_e32 v16, s32, v3 ; 0A200620 v_mac_f32_e32 v1, v16, v16 ; 3E022110 v_subrev_f32_e32 v16, s30, v0 ; 0A20001E v_subrev_f32_e32 v17, s31, v2 ; 0A22041F v_mul_f32_e32 v16, v16, v16 ; 10202110 v_mac_f32_e32 v16, v17, v17 ; 3E202311 v_readlane_b32 s4, v25, 11 ; 02091719 s_nop 2 ; BF800002 v_subrev_f32_e32 v17, s4, v3 ; 0A220604 v_mac_f32_e32 v16, v17, v17 ; 3E202311 v_cmp_gt_f32_e32 vcc, s36, v1 ; 7C080224 v_cndmask_b32_e64 v1, 0, 1.0, vcc ; D2000001 01A9E480 v_cmp_gt_f32_e32 vcc, s37, v16 ; 7C082025 v_cndmask_b32_e64 v16, 0, 1.0, vcc ; D2000010 01A9E480 v_subrev_f32_e32 v17, v1, v16 ; 0A222101 v_add_f32_e64 v17, 0, v17 clamp ; D2060811 00022280 v_mul_f32_e32 v13, v1, v13 ; 101A1B01 v_mac_f32_e32 v13, v17, v10 ; 3E1A1511 v_mul_f32_e32 v10, v1, v14 ; 10141D01 v_mac_f32_e32 v10, v17, v11 ; 3E141711 v_mul_f32_e32 v18, v1, v15 ; 10241F01 v_mac_f32_e32 v18, v17, v12 ; 3E241911 v_readlane_b32 s4, v25, 7 ; 02090F19 s_nop 2 ; BF800002 v_subrev_f32_e32 v1, s4, v0 ; 0A020004 v_readlane_b32 s4, v25, 9 ; 02091319 s_nop 2 ; BF800002 v_subrev_f32_e32 v11, s4, v2 ; 0A160404 v_mul_f32_e32 v1, v1, v1 ; 10020301 v_mac_f32_e32 v1, v11, v11 ; 3E02170B v_readlane_b32 s4, v25, 8 ; 02091119 s_nop 2 ; BF800002 v_subrev_f32_e32 v11, s4, v3 ; 0A160604 v_mac_f32_e32 v1, v11, v11 ; 3E02170B v_readlane_b32 s4, v25, 10 ; 02091519 s_nop 2 ; BF800002 v_cmp_gt_f32_e32 vcc, s4, v1 ; 7C080204 v_cndmask_b32_e64 v1, 0, 1.0, vcc ; D2000001 01A9E480 v_subrev_f32_e32 v11, v16, v1 ; 0A160310 v_add_f32_e64 v11, 0, v11 clamp ; D206080B 00021680 v_mac_f32_e32 v13, v11, v7 ; 3E1A0F0B v_mac_f32_e32 v10, v11, v8 ; 3E14110B v_mac_f32_e32 v18, v11, v9 ; 3E24130B v_readlane_b32 s4, v25, 3 ; 02090719 s_nop 2 ; BF800002 v_subrev_f32_e32 v7, s4, v0 ; 0A0E0004 v_readlane_b32 s4, v25, 5 ; 02090B19 s_nop 2 ; BF800002 v_subrev_f32_e32 v8, s4, v2 ; 0A100404 v_mul_f32_e32 v7, v7, v7 ; 100E0F07 v_mac_f32_e32 v7, v8, v8 ; 3E0E1108 v_readlane_b32 s4, v25, 4 ; 02090919 s_nop 2 ; BF800002 v_subrev_f32_e32 v8, s4, v3 ; 0A100604 s_buffer_load_dword s4, s[0:3], 0x7e ; C202017E s_buffer_load_dword s5, s[0:3], 0x7f ; C202817F v_mac_f32_e32 v7, v8, v8 ; 3E0E1108 v_readlane_b32 s6, v25, 6 ; 020D0D19 s_nop 2 ; BF800002 v_cmp_gt_f32_e32 vcc, s6, v7 ; 7C080E06 v_cndmask_b32_e64 v7, 0, 1.0, vcc ; D2000007 01A9E480 v_subrev_f32_e32 v1, v1, v7 ; 0A020F01 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_mac_f32_e32 v13, v1, v4 ; 3E1A0901 v_mac_f32_e32 v10, v1, v5 ; 3E140B01 v_mac_f32_e32 v18, v1, v6 ; 3E240D01 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v13, s4, 0.5 ; D2820001 03C0090D v_mad_f32 v4, v10, s5, 0.5 ; D2820004 03C00B0A v_floor_f32_e32 v5, v1 ; 7E0A4901 v_subrev_f32_e32 v1, v5, v1 ; 0A020305 v_floor_f32_e32 v6, v4 ; 7E0C4904 v_subrev_f32_e32 v4, v6, v4 ; 0A080906 s_buffer_load_dword s4, s[0:3], 0x7d ; C202017D v_mov_b32_e32 v7, 0x40400000 ; 7E0E02FF 40400000 v_mad_f32 v8, -v1, v7, 4.0 ; D2820008 23DA0F01 v_mad_f32 v9, v1, v7, 1.0 ; D2820009 03CA0F01 v_mad_f32 v10, -2.0, v1, v7 ; D282000A 041E02F5 v_rcp_f32_e32 v11, v8 ; 7E165508 v_rcp_f32_e32 v12, v9 ; 7E185509 v_mad_f32 v13, v4, v7, 1.0 ; D282000D 03CA0F04 v_rcp_f32_e32 v14, v13 ; 7E1C550D v_mad_f32 v10, v10, v11, -2.0 ; D282000A 03D6170A v_mad_f32 v11, v1, v12, 2.0 ; D282000B 03D21901 v_mul_f32_e32 v19, s35, v11 ; 10261623 v_mad_f32 v11, v4, v14, 2.0 ; D282000B 03D21D04 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v20, s4, v11 ; 10281604 v_add_f32_e32 v5, -0.5, v5 ; 060A0AF1 v_add_f32_e32 v1, v7, v1 ; 06020307 v_mov_b32_e32 v11, 0x3e124925 ; 7E1602FF 3E124925 v_mul_f32_e32 v1, v11, v1 ; 1002030B v_mul_f32_e32 v10, s35, v10 ; 10141423 v_mul_f32_e32 v1, s35, v1 ; 10020223 v_mac_f32_e32 v10, s35, v5 ; 3E140A23 v_mad_f32 v12, -v4, v7, 4.0 ; D282000C 23DA0F04 v_rcp_f32_e32 v14, v12 ; 7E1C550C v_mac_f32_e32 v1, s35, v5 ; 3E020A23 v_mac_f32_e32 v19, s35, v5 ; 3E260A23 v_mad_f32 v5, -2.0, v4, v7 ; D2820005 041E08F5 v_mad_f32 v5, v5, v14, -2.0 ; D2820005 03D61D05 v_add_f32_e32 v6, -0.5, v6 ; 060C0CF1 v_mac_f32_e32 v20, s4, v6 ; 3E280C04 v_add_f32_e32 v4, v7, v4 ; 06080907 v_mov_b32_e32 v14, v18 ; 7E1C0312 v_mov_b32_e32 v15, v19 ; 7E1E0313 v_mov_b32_e32 v16, v20 ; 7E200314 v_mov_b32_e32 v17, v21 ; 7E220315 v_mov_b32_e32 v21, v18 ; 7E2A0312 v_mov_b32_e32 v22, v19 ; 7E2C0313 v_mov_b32_e32 v23, v20 ; 7E2E0314 v_mov_b32_e32 v24, v21 ; 7E300315 v_mul_f32_e32 v4, v11, v4 ; 1008090B v_mov_b32_e32 v15, v1 ; 7E1E0301 v_mov_b32_e32 v22, v10 ; 7E2C030A v_mul_f32_e32 v1, s4, v5 ; 10020A04 v_mul_f32_e32 v4, s4, v4 ; 10080804 v_mac_f32_e32 v1, s4, v6 ; 3E020C04 v_mac_f32_e32 v4, s4, v6 ; 3E080C04 v_mov_b32_e32 v16, v20 ; 7E200314 v_mov_b32_e32 v23, v20 ; 7E2E0314 image_sample_c v5, 1, 0, 0, 0, 0, 0, 0, 0, v[18:21], s[16:23], s[12:15] ; F0A00100 00640512 v_mov_b32_e32 v20, v4 ; 7E280304 image_sample_c v6, 1, 0, 0, 0, 0, 0, 0, 0, v[14:17], s[16:23], s[12:15] ; F0A00100 0064060E v_mov_b32_e32 v16, v4 ; 7E200304 image_sample_c v7, 1, 0, 0, 0, 0, 0, 0, 0, v[21:24], s[16:23], s[12:15] ; F0A00100 00640715 v_mov_b32_e32 v23, v4 ; 7E2E0304 image_sample_c v4, 1, 0, 0, 0, 0, 0, 0, 0, v[18:21], s[16:23], s[12:15] ; F0A00100 00640412 v_mov_b32_e32 v20, v1 ; 7E280301 image_sample_c v10, 1, 0, 0, 0, 0, 0, 0, 0, v[14:17], s[16:23], s[12:15] ; F0A00100 00640A0E image_sample_c v11, 1, 0, 0, 0, 0, 0, 0, 0, v[21:24], s[16:23], s[12:15] ; F0A00100 00640B15 image_sample_c v18, 1, 0, 0, 0, 0, 0, 0, 0, v[18:21], s[16:23], s[12:15] ; F0A00100 00641212 v_mov_b32_e32 v16, v1 ; 7E200301 image_sample_c v14, 1, 0, 0, 0, 0, 0, 0, 0, v[14:17], s[16:23], s[12:15] ; F0A00100 00640E0E v_mov_b32_e32 v23, v1 ; 7E2E0301 image_sample_c v1, 1, 0, 0, 0, 0, 0, 0, 0, v[21:24], s[16:23], s[12:15] ; F0A00100 00640115 s_buffer_load_dword s4, s[0:3], 0x60 ; C2020160 s_buffer_load_dword s5, s[0:3], 0x61 ; C2028161 s_buffer_load_dword s0, s[0:3], 0x62 ; C2000162 v_mul_f32_e32 v15, v12, v8 ; 101E110C s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v1, v1, v15 ; 10021F01 v_mov_b32_e32 v15, 0x40e00000 ; 7E1E02FF 40E00000 v_mul_f32_e32 v16, v15, v12 ; 1020190F v_mac_f32_e32 v1, v14, v16 ; 3E02210E v_mul_f32_e32 v12, v12, v9 ; 1018130C v_mac_f32_e32 v1, v18, v12 ; 3E021912 v_mul_f32_e32 v12, v15, v8 ; 1018110F v_mac_f32_e32 v1, v11, v12 ; 3E02190B v_madmk_f32_e32 v1, v10, v1, 0x42440000 ; 4002030A 42440000 v_mul_f32_e32 v10, v15, v9 ; 1014130F v_mac_f32_e32 v1, v4, v10 ; 3E021504 v_mul_f32_e32 v4, v13, v8 ; 1008110D v_mac_f32_e32 v1, v7, v4 ; 3E020907 v_mul_f32_e32 v4, v15, v13 ; 10081B0F v_mac_f32_e32 v1, v6, v4 ; 3E020906 v_mul_f32_e32 v4, v13, v9 ; 1008130D v_mac_f32_e32 v1, v5, v4 ; 3E020905 v_mov_b32_e32 v4, 0x3be38e39 ; 7E0802FF 3BE38E39 v_mul_f32_e32 v5, v4, v1 ; 100A0304 v_mad_f32 v1, -v1, v4, 1.0 ; D2820001 23CA0901 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v0, s4, v0 ; 0A000004 v_subrev_f32_e32 v2, s5, v2 ; 0A040405 v_mul_f32_e32 v0, v0, v0 ; 10000100 v_mac_f32_e32 v0, v2, v2 ; 3E000502 v_subrev_f32_e32 v2, s0, v3 ; 0A040600 v_mac_f32_e32 v0, v2, v2 ; 3E000502 v_sqrt_f32_e32 v0, v0 ; 7E006700 v_readlane_b32 s0, v25, 2 ; 02010519 s_nop 2 ; BF800002 v_mov_b32_e32 v2, s0 ; 7E040200 v_readlane_b32 s0, v25, 1 ; 02010319 s_nop 2 ; BF800002 v_mac_f32_e32 v2, s0, v0 ; 3E040000 v_add_f32_e64 v0, 0, v2 clamp ; D2060800 00020480 v_readlane_b32 s0, v25, 0 ; 02010119 s_nop 2 ; BF800002 v_mac_f32_e32 v0, s0, v1 ; 3E000200 v_mac_f32_e32 v0, 1.0, v5 ; 3E000AF2 v_cvt_pkrtz_f16_f32_e32 v0, v0, v0 ; 5E000100 exp 15, 0, 1, 1, 1, v0, v0, v0, v0 ; F8001C0F 00000000 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 104 VGPRS: 28 Code Size: 1792 bytes LDS: 0 blocks Scratch: 3072 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..9] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 0.5000, -1.0000, 1.0000, 0.0000} 0: MUL TEMP[0], CONST[2], IN[0].xxxx 1: MAD TEMP[0], CONST[3], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[4], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[5], IN[0].wwww, TEMP[0] 4: MUL TEMP[1].xyw, TEMP[0], IMM[0].xxxx 5: MOV TEMP[2].x, TEMP[1].xxxx 6: MUL TEMP[3].x, TEMP[1].yyyy, CONST[0].xxxx 7: MOV TEMP[2].y, TEMP[3].xxxx 8: ADD TEMP[1].xy, TEMP[2].xyyy, TEMP[1].wwww 9: MOV TEMP[1].zw, TEMP[0].wwzw 10: MUL TEMP[2], CONST[6], IN[0].xxxx 11: MAD TEMP[2], CONST[7], IN[0].yyyy, TEMP[2] 12: MAD TEMP[2], CONST[8], IN[0].zzzz, TEMP[2] 13: MAD TEMP[2].xyz, CONST[9], IN[0].wwww, TEMP[2] 14: MUL TEMP[2].xyz, TEMP[2].xyzz, IMM[0].yyzz 15: LRP TEMP[2].xyz, CONST[1].xxxx, IN[1].xyzz, TEMP[2].xyzz 16: MOV OUT[1], TEMP[1] 17: MOV OUT[2], TEMP[2] 18: MOV OUT[0], TEMP[0] 19: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %46 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %47 = load <16 x i8>, <16 x i8> addrspace(2)* %46, align 16, !tbaa !0 %48 = add i32 %5, %7 %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %48) %50 = extractelement <4 x float> %49, i32 0 %51 = extractelement <4 x float> %49, i32 1 %52 = extractelement <4 x float> %49, i32 2 %53 = extractelement <4 x float> %49, i32 3 %54 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %55 = load <16 x i8>, <16 x i8> addrspace(2)* %54, align 16, !tbaa !0 %56 = add i32 %5, %7 %57 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %55, i32 0, i32 %56) %58 = extractelement <4 x float> %57, i32 0 %59 = extractelement <4 x float> %57, i32 1 %60 = extractelement <4 x float> %57, i32 2 %61 = fmul float %15, %50 %62 = fmul float %16, %50 %63 = fmul float %17, %50 %64 = fmul float %18, %50 %65 = fmul float %19, %51 %66 = fadd float %65, %61 %67 = fmul float %20, %51 %68 = fadd float %67, %62 %69 = fmul float %21, %51 %70 = fadd float %69, %63 %71 = fmul float %22, %51 %72 = fadd float %71, %64 %73 = fmul float %23, %52 %74 = fadd float %73, %66 %75 = fmul float %24, %52 %76 = fadd float %75, %68 %77 = fmul float %25, %52 %78 = fadd float %77, %70 %79 = fmul float %26, %52 %80 = fadd float %79, %72 %81 = fmul float %27, %53 %82 = fadd float %81, %74 %83 = fmul float %28, %53 %84 = fadd float %83, %76 %85 = fmul float %29, %53 %86 = fadd float %85, %78 %87 = fmul float %30, %53 %88 = fadd float %87, %80 %89 = fmul float %82, 5.000000e-01 %90 = fmul float %84, 5.000000e-01 %91 = fmul float %88, 5.000000e-01 %92 = fmul float %90, %13 %93 = fadd float %89, %91 %94 = fadd float %92, %91 %95 = fmul float %31, %50 %96 = fmul float %32, %50 %97 = fmul float %33, %50 %98 = fmul float %34, %50 %99 = fmul float %35, %51 %100 = fadd float %99, %95 %101 = fmul float %36, %51 %102 = fadd float %101, %96 %103 = fmul float %37, %51 %104 = fadd float %103, %97 %105 = fmul float %38, %51 %106 = fadd float %105, %98 %107 = fmul float %39, %52 %108 = fadd float %107, %100 %109 = fmul float %40, %52 %110 = fadd float %109, %102 %111 = fmul float %41, %52 %112 = fadd float %111, %104 %113 = fmul float %42, %52 %114 = fadd float %113, %106 %115 = fmul float %43, %53 %116 = fadd float %115, %108 %117 = fmul float %44, %53 %118 = fadd float %117, %110 %119 = fmul float %45, %53 %120 = fadd float %119, %112 %121 = fsub float -0.000000e+00, %116 %122 = fsub float -0.000000e+00, %118 %123 = call float @llvm.AMDGPU.lrp(float %14, float %58, float %121) %124 = call float @llvm.AMDGPU.lrp(float %14, float %59, float %122) %125 = call float @llvm.AMDGPU.lrp(float %14, float %60, float %120) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %93, float %94, float %86, float %88) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %123, float %124, float %125, float %114) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %82, float %84, float %86, float %88) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[8:11], 0 idxen ; E00C2000 80020500 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_buffer_load_dword s5, s[0:3], 0xc ; C202810C s_buffer_load_dword s6, s[0:3], 0x9 ; C2030109 s_buffer_load_dword s7, s[0:3], 0xd ; C203810D s_buffer_load_dword s8, s[0:3], 0xa ; C204010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s4, v1 ; 10000204 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_buffer_load_dword s9, s[0:3], 0xf ; C204810F v_mac_f32_e32 v0, s5, v2 ; 3E000405 v_mul_f32_e32 v8, s6, v1 ; 10100206 v_mac_f32_e32 v8, s7, v2 ; 3E100407 s_buffer_load_dword s5, s[0:3], 0xb ; C202810B v_mul_f32_e32 v9, s8, v1 ; 10120208 s_buffer_load_dword s6, s[0:3], 0x18 ; C2030118 s_buffer_load_dword s7, s[0:3], 0x1c ; C203811C s_buffer_load_dword s8, s[0:3], 0x19 ; C2040119 s_buffer_load_dword s10, s[0:3], 0x1d ; C205011D s_buffer_load_dword s11, s[0:3], 0x1a ; C205811A s_buffer_load_dword s12, s[0:3], 0x1e ; C206011E s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v9, s4, v2 ; 3E120404 s_buffer_load_dword s4, s[0:3], 0x1b ; C202011B v_mul_f32_e32 v10, s5, v1 ; 10140205 v_mac_f32_e32 v10, s9, v2 ; 3E140409 v_mul_f32_e32 v11, s6, v1 ; 10160206 v_mac_f32_e32 v11, s7, v2 ; 3E160407 v_mul_f32_e32 v12, s8, v1 ; 10180208 v_mac_f32_e32 v12, s10, v2 ; 3E18040A v_mul_f32_e32 v13, s11, v1 ; 101A020B v_mac_f32_e32 v13, s12, v2 ; 3E1A040C s_buffer_load_dword s5, s[0:3], 0x1f ; C202811F s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s4, v1 ; 10020204 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_buffer_load_dword s6, s[0:3], 0x11 ; C2030111 s_buffer_load_dword s7, s[0:3], 0x12 ; C2038112 s_buffer_load_dword s8, s[0:3], 0x13 ; C2040113 s_buffer_load_dword s9, s[0:3], 0x20 ; C2048120 s_buffer_load_dword s10, s[0:3], 0x21 ; C2050121 s_buffer_load_dword s11, s[0:3], 0x22 ; C2058122 s_buffer_load_dword s12, s[0:3], 0x23 ; C2060123 v_mac_f32_e32 v1, s5, v2 ; 3E020405 s_buffer_load_dword s5, s[0:3], 0x14 ; C2028114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v0, s4, v3 ; 3E000604 v_mac_f32_e32 v8, s6, v3 ; 3E100606 v_mac_f32_e32 v9, s7, v3 ; 3E120607 v_mac_f32_e32 v10, s8, v3 ; 3E140608 v_mac_f32_e32 v11, s9, v3 ; 3E160609 v_mac_f32_e32 v12, s10, v3 ; 3E18060A s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_buffer_load_dword s6, s[0:3], 0x16 ; C2030116 s_buffer_load_dword s7, s[0:3], 0x17 ; C2038117 s_buffer_load_dword s8, s[0:3], 0x24 ; C2040124 s_buffer_load_dword s9, s[0:3], 0x25 ; C2048125 s_buffer_load_dword s10, s[0:3], 0x26 ; C2050126 v_mac_f32_e32 v13, s11, v3 ; 3E1A060B s_buffer_load_dword s11, s[0:3], 0x4 ; C2058104 v_mac_f32_e32 v1, s12, v3 ; 3E02060C v_mac_f32_e32 v0, s5, v4 ; 3E000805 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v8, s4, v4 ; 3E100804 v_mac_f32_e32 v9, s6, v4 ; 3E120806 v_mac_f32_e32 v10, s7, v4 ; 3E140807 v_mac_f32_e32 v11, s8, v4 ; 3E160808 v_mac_f32_e32 v12, s9, v4 ; 3E180809 v_mac_f32_e32 v13, s10, v4 ; 3E1A080A s_buffer_load_dword s0, s[0:3], 0x0 ; C2000100 v_sub_f32_e64 v2, 1.0, s11 ; D2080002 000016F2 v_mul_f32_e32 v3, v11, v2 ; 1006050B v_mul_f32_e32 v4, v12, v2 ; 1008050C v_mul_f32_e32 v2, v13, v2 ; 1004050D v_mad_f32 v3, s11, v5, -v3 ; D2820003 840E0A0B v_mad_f32 v4, s11, v6, -v4 ; D2820004 84120C0B v_mac_f32_e32 v2, s11, v7 ; 3E040E0B v_mul_f32_e32 v5, 0.5, v8 ; 100A10F0 v_mul_f32_e32 v6, 0.5, v10 ; 100C14F0 v_mad_f32 v7, 0.5, v0, v6 ; D2820007 041A00F0 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v6, s0, v5 ; 3E0C0A00 exp 15, 32, 0, 0, 0, v7, v6, v9, v10 ; F800020F 0A090607 exp 15, 33, 0, 0, 0, v3, v4, v2, v1 ; F800021F 01020403 exp 15, 12, 0, 1, 0, v0, v8, v9, v10 ; F80008CF 0A090800 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 16 Code Size: 408 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL CONST[0..5] DCL CONST[7..13] DCL TEMP[0..8], LOCAL IMM[0] FLT32 { 1.0000, 2.0000, -1.0000, 0.0000} IMM[1] FLT32 { 128.0000, 0.0000, 0.0000, 0.0000} 0: RCP TEMP[0].x, IN[0].wwww 1: MUL TEMP[0].xy, IN[0].xyyy, TEMP[0].xxxx 2: RCP TEMP[1].x, IN[1].zzzz 3: MUL TEMP[2].x, CONST[1].zzzz, TEMP[1].xxxx 4: MUL TEMP[2].xyz, IN[1].xyzz, TEMP[2].xxxx 5: MOV TEMP[3].xy, TEMP[0].xyyy 6: TEX TEMP[3].x, TEMP[3], SAMP[0], 2D 7: MAD TEMP[3].x, CONST[2].xxxx, TEMP[3].xxxx, CONST[2].yyyy 8: RCP TEMP[3].x, TEMP[3].xxxx 9: MUL TEMP[1].xyz, TEMP[2].xyzz, TEMP[3].xxxx 10: MUL TEMP[2], CONST[10], TEMP[1].xxxx 11: MAD TEMP[2], CONST[11], TEMP[1].yyyy, TEMP[2] 12: MAD TEMP[2], CONST[12], TEMP[1].zzzz, TEMP[2] 13: ADD TEMP[2].xyz, TEMP[2], CONST[13] 14: ADD TEMP[3].xyz, TEMP[2].xyzz, -CONST[4].xyzz 15: DP3 TEMP[3].x, TEMP[3].xyzz, TEMP[3].xyzz 16: SQRT TEMP[3].x, TEMP[3].xxxx 17: LRP TEMP[1].x, CONST[4].wwww, TEMP[3].xxxx, TEMP[1].zzzz 18: MOV TEMP[3].xyz, -CONST[7].xyzx 19: MOV TEMP[4].xy, TEMP[0].xyyy 20: TEX TEMP[4].x, TEMP[4], SAMP[1], 2D 21: MAD TEMP[5].x, TEMP[1].xxxx, CONST[3].zzzz, CONST[3].wwww 22: MOV_SAT TEMP[5].x, TEMP[5].xxxx 23: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx 24: MOV_SAT TEMP[4].x, TEMP[4].xxxx 25: MOV TEMP[0].xy, TEMP[0].xyyy 26: TEX TEMP[0], TEMP[0], SAMP[2], 2D 27: MAD TEMP[5].xyz, TEMP[0].xyzz, IMM[0].yyyy, IMM[0].zzzz 28: DP3 TEMP[6].x, TEMP[5].xyzz, TEMP[5].xyzz 29: RSQ TEMP[6].x, TEMP[6].xxxx 30: MUL TEMP[5].xyz, TEMP[5].xyzz, TEMP[6].xxxx 31: DP3 TEMP[6].x, TEMP[3].xyzz, TEMP[5].xyzz 32: MAX TEMP[6].x, IMM[0].wwww, TEMP[6].xxxx 33: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[4].xxxx 34: MUL TEMP[6].xyz, CONST[8].xyzz, TEMP[6].xxxx 35: MUL TEMP[7].xyz, CONST[8].xyzz, CONST[5].xyzz 36: ADD TEMP[2].xyz, TEMP[2].xyzz, -CONST[0].xyzz 37: DP3 TEMP[8].x, TEMP[2].xyzz, TEMP[2].xyzz 38: RSQ TEMP[8].x, TEMP[8].xxxx 39: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[8].xxxx 40: ADD TEMP[2].xyz, TEMP[3].xyzz, -TEMP[2].xyzz 41: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz 42: RSQ TEMP[3].x, TEMP[3].xxxx 43: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx 44: DP3 TEMP[2].x, TEMP[2].xyzz, TEMP[5].xyzz 45: MAX TEMP[2].x, IMM[0].wwww, TEMP[2].xxxx 46: MUL TEMP[0].x, TEMP[0].wwww, IMM[1].xxxx 47: POW TEMP[0].x, TEMP[2].xxxx, TEMP[0].xxxx 48: MOV_SAT TEMP[2].x, TEMP[4].xxxx 49: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[2].xxxx 50: ADD TEMP[2].x, TEMP[7].xxxx, TEMP[7].zzzz 51: MUL TEMP[2].x, TEMP[7].yyyy, TEMP[2].xxxx 52: SQRT TEMP[2].x, TEMP[2].xxxx 53: MUL TEMP[2].x, IMM[0].yyyy, TEMP[2].xxxx 54: ADD TEMP[3].x, TEMP[7].xxxx, TEMP[7].yyyy 55: ADD TEMP[3].x, TEMP[3].xxxx, TEMP[7].zzzz 56: MAD TEMP[2].x, TEMP[2].xxxx, CONST[5].wwww, TEMP[3].xxxx 57: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[2].xxxx 58: MOV TEMP[6].w, TEMP[0].xxxx 59: MAD TEMP[0].x, TEMP[1].xxxx, CONST[9].zzzz, CONST[9].wwww 60: ADD TEMP[0].x, IMM[0].xxxx, -TEMP[0].xxxx 61: MOV_SAT TEMP[0].x, TEMP[0].xxxx 62: MUL TEMP[0], TEMP[6], TEMP[0].xxxx 63: EX2 TEMP[1].x, -TEMP[0].xxxx 64: EX2 TEMP[1].y, -TEMP[0].yyyy 65: EX2 TEMP[1].z, -TEMP[0].zzzz 66: EX2 TEMP[1].w, -TEMP[0].wwww 67: MOV OUT[0], TEMP[1] 68: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 184) %54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200) %57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208) %58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212) %59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216) %60 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %61 = load <32 x i8>, <32 x i8> addrspace(2)* %60, align 32, !tbaa !0 %62 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %63 = load <16 x i8>, <16 x i8> addrspace(2)* %62, align 16, !tbaa !0 %64 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %65 = bitcast <8 x i32> addrspace(2)* %64 to <32 x i8> addrspace(2)* %66 = load <32 x i8>, <32 x i8> addrspace(2)* %65, align 32, !tbaa !0 %67 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %68 = bitcast <4 x i32> addrspace(2)* %67 to <16 x i8> addrspace(2)* %69 = load <16 x i8>, <16 x i8> addrspace(2)* %68, align 16, !tbaa !0 %70 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %71 = bitcast <8 x i32> addrspace(2)* %70 to <32 x i8> addrspace(2)* %72 = load <32 x i8>, <32 x i8> addrspace(2)* %71, align 32, !tbaa !0 %73 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %74 = bitcast <4 x i32> addrspace(2)* %73 to <16 x i8> addrspace(2)* %75 = load <16 x i8>, <16 x i8> addrspace(2)* %74, align 16, !tbaa !0 %76 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %77 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %78 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %79 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %80 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %81 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %82 = fdiv float 1.000000e+00, %78 %83 = fmul float %76, %82 %84 = fmul float %77, %82 %85 = fdiv float 1.000000e+00, %81 %86 = fmul float %27, %85 %87 = fmul float %79, %86 %88 = fmul float %80, %86 %89 = fmul float %81, %86 %90 = bitcast float %83 to i32 %91 = bitcast float %84 to i32 %92 = insertelement <2 x i32> undef, i32 %90, i32 0 %93 = insertelement <2 x i32> %92, i32 %91, i32 1 %94 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %93, <32 x i8> %61, <16 x i8> %63, i32 2) %95 = extractelement <4 x float> %94, i32 0 %96 = fmul float %28, %95 %97 = fadd float %96, %29 %98 = fdiv float 1.000000e+00, %97 %99 = fmul float %87, %98 %100 = fmul float %88, %98 %101 = fmul float %89, %98 %102 = fmul float %48, %99 %103 = fmul float %49, %99 %104 = fmul float %50, %99 %105 = fmul float %51, %100 %106 = fadd float %105, %102 %107 = fmul float %52, %100 %108 = fadd float %107, %103 %109 = fmul float %53, %100 %110 = fadd float %109, %104 %111 = fmul float %54, %101 %112 = fadd float %111, %106 %113 = fmul float %55, %101 %114 = fadd float %113, %108 %115 = fmul float %56, %101 %116 = fadd float %115, %110 %117 = fadd float %112, %57 %118 = fadd float %114, %58 %119 = fadd float %116, %59 %120 = fsub float %117, %32 %121 = fsub float %118, %33 %122 = fsub float %119, %34 %123 = fmul float %120, %120 %124 = fmul float %121, %121 %125 = fadd float %124, %123 %126 = fmul float %122, %122 %127 = fadd float %125, %126 %128 = call float @llvm.sqrt.f32(float %127) %129 = call float @llvm.AMDGPU.lrp(float %35, float %128, float %101) %130 = bitcast float %83 to i32 %131 = bitcast float %84 to i32 %132 = insertelement <2 x i32> undef, i32 %130, i32 0 %133 = insertelement <2 x i32> %132, i32 %131, i32 1 %134 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %133, <32 x i8> %66, <16 x i8> %69, i32 2) %135 = extractelement <4 x float> %134, i32 0 %136 = fmul float %129, %30 %137 = fadd float %136, %31 %138 = call float @llvm.AMDIL.clamp.(float %137, float 0.000000e+00, float 1.000000e+00) %139 = fadd float %135, %138 %140 = call float @llvm.AMDIL.clamp.(float %139, float 0.000000e+00, float 1.000000e+00) %141 = bitcast float %83 to i32 %142 = bitcast float %84 to i32 %143 = insertelement <2 x i32> undef, i32 %141, i32 0 %144 = insertelement <2 x i32> %143, i32 %142, i32 1 %145 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %144, <32 x i8> %72, <16 x i8> %75, i32 2) %146 = extractelement <4 x float> %145, i32 0 %147 = extractelement <4 x float> %145, i32 1 %148 = extractelement <4 x float> %145, i32 2 %149 = extractelement <4 x float> %145, i32 3 %150 = fmul float %146, 2.000000e+00 %151 = fadd float %150, -1.000000e+00 %152 = fmul float %147, 2.000000e+00 %153 = fadd float %152, -1.000000e+00 %154 = fmul float %148, 2.000000e+00 %155 = fadd float %154, -1.000000e+00 %156 = fmul float %151, %151 %157 = fmul float %153, %153 %158 = fadd float %157, %156 %159 = fmul float %155, %155 %160 = fadd float %158, %159 %161 = call float @llvm.AMDGPU.rsq.clamped.f32(float %160) %162 = fmul float %151, %161 %163 = fmul float %153, %161 %164 = fmul float %155, %161 %165 = fmul float %40, %162 %166 = fsub float -0.000000e+00, %165 %167 = fmul float %41, %163 %168 = fsub float %166, %167 %169 = fmul float %42, %164 %170 = fsub float %168, %169 %171 = call float @llvm.maxnum.f32(float %170, float 0.000000e+00) %172 = fmul float %171, %140 %173 = fmul float %43, %172 %174 = fmul float %44, %172 %175 = fmul float %45, %172 %176 = fmul float %43, %36 %177 = fmul float %44, %37 %178 = fmul float %45, %38 %179 = fsub float %117, %24 %180 = fsub float %118, %25 %181 = fsub float %119, %26 %182 = fmul float %179, %179 %183 = fmul float %180, %180 %184 = fadd float %183, %182 %185 = fmul float %181, %181 %186 = fadd float %184, %185 %187 = call float @llvm.AMDGPU.rsq.clamped.f32(float %186) %188 = fmul float %179, %187 %189 = fmul float %180, %187 %190 = fmul float %181, %187 %191 = fsub float -0.000000e+00, %188 %192 = fsub float %191, %40 %193 = fsub float -0.000000e+00, %189 %194 = fsub float %193, %41 %195 = fsub float -0.000000e+00, %190 %196 = fsub float %195, %42 %197 = fmul float %192, %192 %198 = fmul float %194, %194 %199 = fadd float %198, %197 %200 = fmul float %196, %196 %201 = fadd float %199, %200 %202 = call float @llvm.AMDGPU.rsq.clamped.f32(float %201) %203 = fmul float %192, %202 %204 = fmul float %194, %202 %205 = fmul float %196, %202 %206 = fmul float %203, %162 %207 = fmul float %204, %163 %208 = fadd float %207, %206 %209 = fmul float %205, %164 %210 = fadd float %208, %209 %211 = call float @llvm.maxnum.f32(float %210, float 0.000000e+00) %212 = fmul float %149, 1.280000e+02 %213 = call float @llvm.pow.f32(float %211, float %212) %214 = call float @llvm.AMDIL.clamp.(float %140, float 0.000000e+00, float 1.000000e+00) %215 = fmul float %213, %214 %216 = fadd float %176, %178 %217 = fmul float %177, %216 %218 = call float @llvm.sqrt.f32(float %217) %219 = fmul float %218, 2.000000e+00 %220 = fadd float %176, %177 %221 = fadd float %220, %178 %222 = fmul float %219, %39 %223 = fadd float %222, %221 %224 = fmul float %215, %223 %225 = fmul float %129, %46 %226 = fadd float %225, %47 %227 = fsub float 1.000000e+00, %226 %228 = call float @llvm.AMDIL.clamp.(float %227, float 0.000000e+00, float 1.000000e+00) %229 = fmul float %173, %228 %230 = fmul float %174, %228 %231 = fmul float %175, %228 %232 = fmul float %224, %228 %233 = fsub float -0.000000e+00, %229 %234 = call float @llvm.AMDIL.exp.(float %233) %235 = fsub float -0.000000e+00, %230 %236 = call float @llvm.AMDIL.exp.(float %235) %237 = fsub float -0.000000e+00, %231 %238 = call float @llvm.AMDIL.exp.(float %237) %239 = fsub float -0.000000e+00, %232 %240 = call float @llvm.AMDIL.exp.(float %239) %241 = call i32 @llvm.SI.packf16(float %234, float %236) %242 = bitcast i32 %241 to float %243 = call i32 @llvm.SI.packf16(float %238, float %240) %244 = bitcast i32 %243 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %242, float %244, float %242, float %244) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 3, 0, [m0] ; C8100300 v_interp_p2_f32 v4, [v4], v1, 3, 0, [m0] ; C8110301 v_interp_p1_f32 v5, v0, 0, 1, [m0] ; C8140400 v_interp_p2_f32 v5, [v5], v1, 0, 1, [m0] ; C8150401 v_interp_p1_f32 v6, v0, 1, 1, [m0] ; C8180500 v_interp_p2_f32 v6, [v6], v1, 1, 1, [m0] ; C8190501 s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500 s_load_dwordx4 s[20:23], s[4:5], 0x4 ; C08A0504 s_load_dwordx4 s[8:11], s[4:5], 0x8 ; C0840508 s_load_dwordx8 s[36:43], s[6:7], 0x0 ; C0D20700 s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708 s_load_dwordx8 s[12:19], s[6:7], 0x10 ; C0C60710 v_interp_p1_f32 v0, v0, 2, 1, [m0] ; C8000600 v_rcp_f32_e32 v4, v4 ; 7E085504 v_interp_p2_f32 v0, [v0], v1, 2, 1, [m0] ; C8010601 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_buffer_load_dword s5, s[0:3], 0x8 ; C2028108 v_mul_f32_e32 v1, v4, v2 ; 10020504 v_mul_f32_e32 v2, v4, v3 ; 10040704 image_sample v3, 1, 0, 0, 0, 0, 0, 0, 0, v[1:2], s[36:43], s[32:35] ; F0800100 01090301 image_sample v4, 1, 0, 0, 0, 0, 0, 0, 0, v[1:2], s[24:31], s[20:23] ; F0800100 00A60401 image_sample v[7:10], 15, 0, 0, 0, 0, 0, 0, 0, v[1:2], s[12:19], s[8:11] ; F0800F00 00430701 s_buffer_load_dword s6, s[0:3], 0xe ; C203010E s_buffer_load_dword s7, s[0:3], 0xf ; C203810F s_buffer_load_dword s8, s[0:3], 0x10 ; C2040110 s_buffer_load_dword s9, s[0:3], 0x11 ; C2048111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s4 ; 7E020204 s_waitcnt vmcnt(2) ; BF8C0772 v_mac_f32_e32 v1, s5, v3 ; 3E020605 v_rcp_f32_e32 v1, v1 ; 7E025501 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s10, s[0:3], 0x2 ; C2050102 s_buffer_load_dword s11, s[0:3], 0x6 ; C2058106 s_buffer_load_dword s12, s[0:3], 0x12 ; C2060112 s_buffer_load_dword s13, s[0:3], 0x13 ; C2068113 s_buffer_load_dword s14, s[0:3], 0x14 ; C2070114 s_buffer_load_dword s15, s[0:3], 0x15 ; C2078115 s_buffer_load_dword s16, s[0:3], 0x16 ; C2080116 s_buffer_load_dword s17, s[0:3], 0x17 ; C2088117 s_buffer_load_dword s18, s[0:3], 0x1c ; C209011C s_buffer_load_dword s19, s[0:3], 0x1d ; C209811D s_buffer_load_dword s20, s[0:3], 0x1e ; C20A011E s_buffer_load_dword s21, s[0:3], 0x20 ; C20A8120 s_buffer_load_dword s22, s[0:3], 0x21 ; C20B0121 s_buffer_load_dword s23, s[0:3], 0x22 ; C20B8122 s_buffer_load_dword s24, s[0:3], 0x26 ; C20C0126 s_buffer_load_dword s25, s[0:3], 0x27 ; C20C8127 s_buffer_load_dword s26, s[0:3], 0x28 ; C20D0128 s_buffer_load_dword s27, s[0:3], 0x29 ; C20D8129 s_buffer_load_dword s28, s[0:3], 0x2a ; C20E012A s_buffer_load_dword s29, s[0:3], 0x2c ; C20E812C s_buffer_load_dword s30, s[0:3], 0x2d ; C20F012D s_buffer_load_dword s31, s[0:3], 0x2e ; C20F812E v_rcp_f32_e32 v2, v0 ; 7E045500 s_buffer_load_dword s32, s[0:3], 0x30 ; C2100130 s_buffer_load_dword s33, s[0:3], 0x31 ; C2108131 s_buffer_load_dword s34, s[0:3], 0x32 ; C2110132 s_buffer_load_dword s35, s[0:3], 0x34 ; C2118134 s_buffer_load_dword s36, s[0:3], 0x35 ; C2120135 s_buffer_load_dword s0, s[0:3], 0x36 ; C2000136 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v2, s11, v2 ; 1004040B v_mul_f32_e32 v3, v2, v5 ; 10060B02 v_mul_f32_e32 v3, v1, v3 ; 10060701 v_mul_f32_e32 v5, s26, v3 ; 100A061A v_mul_f32_e32 v11, s27, v3 ; 1016061B v_mul_f32_e32 v3, s28, v3 ; 1006061C v_mul_f32_e32 v6, v2, v6 ; 100C0D02 v_mul_f32_e32 v6, v1, v6 ; 100C0D01 v_mac_f32_e32 v5, s29, v6 ; 3E0A0C1D v_mac_f32_e32 v11, s30, v6 ; 3E160C1E v_mac_f32_e32 v3, s31, v6 ; 3E060C1F v_mul_f32_e32 v0, v2, v0 ; 10000102 v_mul_f32_e32 v0, v1, v0 ; 10000101 v_mac_f32_e32 v5, s32, v0 ; 3E0A0020 v_mac_f32_e32 v11, s33, v0 ; 3E160021 v_mac_f32_e32 v3, s34, v0 ; 3E060022 v_add_f32_e32 v1, s35, v5 ; 06020A23 v_add_f32_e32 v2, s36, v11 ; 06041624 v_add_f32_e32 v3, s0, v3 ; 06060600 v_subrev_f32_e32 v5, s8, v1 ; 0A0A0208 v_subrev_f32_e32 v6, s9, v2 ; 0A0C0409 v_mul_f32_e32 v5, v5, v5 ; 100A0B05 v_mac_f32_e32 v5, v6, v6 ; 3E0A0D06 v_subrev_f32_e32 v6, s12, v3 ; 0A0C060C v_mac_f32_e32 v5, v6, v6 ; 3E0A0D06 v_sub_f32_e64 v6, 1.0, s13 ; D2080006 00001AF2 v_mul_f32_e32 v0, v0, v6 ; 10000D00 v_sqrt_f32_e32 v5, v5 ; 7E0A6705 v_mac_f32_e32 v0, s13, v5 ; 3E000A0D v_subrev_f32_e32 v1, s4, v1 ; 0A020204 v_subrev_f32_e32 v2, s5, v2 ; 0A040405 v_subrev_f32_e32 v3, s10, v3 ; 0A06060A v_mad_f32 v5, 2.0, v7, -1.0 ; D2820005 03CE0EF4 v_mad_f32 v6, 2.0, v8, -1.0 ; D2820006 03CE10F4 v_mul_f32_e32 v7, v5, v5 ; 100E0B05 v_mac_f32_e32 v7, v6, v6 ; 3E0E0D06 v_mul_f32_e32 v8, v1, v1 ; 10100301 v_mac_f32_e32 v8, v2, v2 ; 3E100502 v_mac_f32_e32 v8, v3, v3 ; 3E100703 v_rsq_clamp_f32_e32 v8, v8 ; 7E105908 v_mad_f32 v9, 2.0, v9, -1.0 ; D2820009 03CE12F4 v_mac_f32_e32 v7, v9, v9 ; 3E0E1309 v_rsq_clamp_f32_e32 v7, v7 ; 7E0E5907 v_mad_f32 v1, -v1, v8, -s18 ; D2820001 A04A1101 v_mad_f32 v2, -v2, v8, -s19 ; D2820002 A04E1102 v_mad_f32 v3, -v3, v8, -s20 ; D2820003 A0521103 v_mul_f32_e32 v5, v7, v5 ; 100A0B07 v_mul_f32_e32 v8, v1, v1 ; 10100301 v_mac_f32_e32 v8, v2, v2 ; 3E100502 v_mac_f32_e32 v8, v3, v3 ; 3E100703 v_rsq_clamp_f32_e32 v8, v8 ; 7E105908 v_mul_f32_e32 v11, s18, v5 ; 10160A12 v_mul_f32_e32 v6, v7, v6 ; 100C0D07 v_mad_f32 v11, -s19, v6, -v11 ; D282000B A42E0C13 v_mul_f32_e32 v1, v8, v1 ; 10020308 v_mul_f32_e32 v1, v5, v1 ; 10020305 v_mul_f32_e32 v2, v8, v2 ; 10040508 v_mac_f32_e32 v1, v6, v2 ; 3E020506 v_mov_b32_e32 v2, s7 ; 7E040207 v_mac_f32_e32 v2, s6, v0 ; 3E040006 v_mul_f32_e32 v5, v7, v9 ; 100A1307 v_mad_f32 v6, -s20, v5, v11 ; D2820006 242E0A14 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_add_f32_e32 v2, v2, v4 ; 06040902 v_mov_b32_e32 v4, s16 ; 7E080210 v_mul_f32_e32 v4, s23, v4 ; 10080817 v_mov_b32_e32 v7, s14 ; 7E0E020E v_mac_f32_e32 v4, s21, v7 ; 3E080E15 v_mov_b32_e32 v7, s14 ; 7E0E020E v_mov_b32_e32 v9, s15 ; 7E12020F v_mul_f32_e32 v9, s22, v9 ; 10121216 v_mul_f32_e32 v4, v4, v9 ; 10081304 v_mac_f32_e32 v9, s21, v7 ; 3E120E15 v_mov_b32_e32 v7, s16 ; 7E0E0210 v_mac_f32_e32 v9, s23, v7 ; 3E120E17 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_max_f32_e32 v6, 0, v6 ; 200C0C80 v_mul_f32_e32 v6, v2, v6 ; 100C0D02 v_mul_f32_e32 v7, s21, v6 ; 100E0C15 v_mul_f32_e32 v11, s22, v6 ; 10160C16 v_mul_f32_e32 v6, s23, v6 ; 100C0C17 v_mul_f32_e32 v3, v8, v3 ; 10060708 v_mac_f32_e32 v1, v5, v3 ; 3E020705 v_max_f32_e32 v1, 0, v1 ; 20020280 v_log_f32_e32 v1, v1 ; 7E024F01 v_sqrt_f32_e32 v3, v4 ; 7E066704 v_add_f32_e32 v3, v3, v3 ; 06060703 v_mac_f32_e32 v9, s17, v3 ; 3E120611 v_mov_b32_e32 v3, s25 ; 7E060219 v_mac_f32_e32 v3, s24, v0 ; 3E060018 v_mul_f32_e32 v0, 0x43000000, v10 ; 100014FF 43000000 v_mul_legacy_f32_e32 v0, v0, v1 ; 0E000300 v_exp_f32_e32 v0, v0 ; 7E004B00 v_add_f32_e64 v1, 0, v2 clamp ; D2060801 00020480 v_mul_f32_e32 v0, v1, v0 ; 10000101 v_mul_f32_e32 v0, v9, v0 ; 10000109 v_sub_f32_e32 v1, 1.0, v3 ; 080206F2 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_mul_f32_e32 v2, v1, v7 ; 10040F01 v_mul_f32_e32 v3, v1, v11 ; 10061701 v_mul_f32_e32 v4, v1, v6 ; 10080D01 v_mul_f32_e32 v0, v1, v0 ; 10000101 v_exp_f32_e64 v1, -v2 ; D34A0001 20000102 v_exp_f32_e64 v2, -v3 ; D34A0002 20000103 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_exp_f32_e64 v2, -v4 ; D34A0002 20000104 v_exp_f32_e64 v0, -v0 ; D34A0000 20000100 v_cvt_pkrtz_f16_f32_e32 v0, v2, v0 ; 5E000102 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 48 VGPRS: 12 Code Size: 812 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL CONST[0..17] DCL TEMP[0..6], LOCAL IMM[0] FLT32 { 0.5000, 1.0000, 0.0000, 0.0000} 0: MUL TEMP[0], CONST[14], IN[0].xxxx 1: MAD TEMP[0], CONST[15], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[16], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[17], IN[0].wwww, TEMP[0] 4: MUL TEMP[1].xyw, TEMP[0], IMM[0].xxxx 5: MOV TEMP[2].x, TEMP[1].xxxx 6: MUL TEMP[3].x, TEMP[1].yyyy, CONST[0].xxxx 7: MOV TEMP[2].y, TEMP[3].xxxx 8: ADD TEMP[1].xy, TEMP[2].xyyy, TEMP[1].wwww 9: MOV TEMP[1].zw, TEMP[0].wwzw 10: MOV TEMP[2].x, CONST[8].xxxx 11: MOV TEMP[2].y, CONST[9].xxxx 12: MOV TEMP[2].z, CONST[10].xxxx 13: MOV TEMP[3].x, CONST[8].yyyy 14: MOV TEMP[3].y, CONST[9].yyyy 15: MOV TEMP[3].z, CONST[10].yyyy 16: MOV TEMP[4].x, CONST[8].zzzz 17: MOV TEMP[4].y, CONST[9].zzzz 18: MOV TEMP[4].z, CONST[10].zzzz 19: MUL TEMP[2].xyz, TEMP[2].xyzz, IN[1].xxxx 20: MAD TEMP[2].xyz, TEMP[3].xyzz, IN[1].yyyy, TEMP[2].xyzz 21: MAD TEMP[2].xyz, TEMP[4].xyzz, IN[1].zzzz, TEMP[2].xyzz 22: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz 23: RSQ TEMP[3].x, TEMP[3].xxxx 24: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx 25: MOV TEMP[3].w, IMM[0].yyyy 26: MOV TEMP[3].xyz, TEMP[2].xyzx 27: DP4 TEMP[4].x, CONST[1], TEMP[3] 28: DP4 TEMP[5].x, CONST[2], TEMP[3] 29: MOV TEMP[4].y, TEMP[5].xxxx 30: DP4 TEMP[3].x, CONST[3], TEMP[3] 31: MOV TEMP[4].z, TEMP[3].xxxx 32: MUL TEMP[3], TEMP[2].xyzz, TEMP[2].yzzx 33: DP4 TEMP[5].x, CONST[4], TEMP[3] 34: DP4 TEMP[6].x, CONST[5], TEMP[3] 35: MOV TEMP[5].y, TEMP[6].xxxx 36: DP4 TEMP[3].x, CONST[6], TEMP[3] 37: MOV TEMP[5].z, TEMP[3].xxxx 38: MUL TEMP[3].x, TEMP[2].yyyy, TEMP[2].yyyy 39: MAD TEMP[2].x, TEMP[2].xxxx, TEMP[2].xxxx, -TEMP[3].xxxx 40: MAD TEMP[2].xyz, CONST[7].xyzz, TEMP[2].xxxx, TEMP[5].xyzz 41: ADD TEMP[2].xyz, TEMP[2].xyzz, TEMP[4].xyzz 42: MAD TEMP[3].xy, IN[2].xyyy, CONST[13].xyyy, CONST[13].zwww 43: MOV TEMP[3].w, TEMP[2].xxxx 44: MOV TEMP[2].xy, TEMP[2].yzyy 45: MAD TEMP[4].x, TEMP[0].zzzz, CONST[12].zzzz, CONST[12].wwww 46: MOV TEMP[3].z, TEMP[4].xxxx 47: MOV OUT[2], TEMP[3] 48: MOV OUT[1], TEMP[1] 49: MOV OUT[0], TEMP[0] 50: MOV OUT[3], TEMP[2] 51: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 204) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248) %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 252) %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256) %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260) %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264) %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 268) %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272) %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276) %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280) %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284) %72 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %73 = load <16 x i8>, <16 x i8> addrspace(2)* %72, align 16, !tbaa !0 %74 = add i32 %5, %7 %75 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %73, i32 0, i32 %74) %76 = extractelement <4 x float> %75, i32 0 %77 = extractelement <4 x float> %75, i32 1 %78 = extractelement <4 x float> %75, i32 2 %79 = extractelement <4 x float> %75, i32 3 %80 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %81 = load <16 x i8>, <16 x i8> addrspace(2)* %80, align 16, !tbaa !0 %82 = add i32 %5, %7 %83 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %81, i32 0, i32 %82) %84 = extractelement <4 x float> %83, i32 0 %85 = extractelement <4 x float> %83, i32 1 %86 = extractelement <4 x float> %83, i32 2 %87 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %88 = load <16 x i8>, <16 x i8> addrspace(2)* %87, align 16, !tbaa !0 %89 = add i32 %5, %7 %90 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %88, i32 0, i32 %89) %91 = extractelement <4 x float> %90, i32 0 %92 = extractelement <4 x float> %90, i32 1 %93 = fmul float %56, %76 %94 = fmul float %57, %76 %95 = fmul float %58, %76 %96 = fmul float %59, %76 %97 = fmul float %60, %77 %98 = fadd float %97, %93 %99 = fmul float %61, %77 %100 = fadd float %99, %94 %101 = fmul float %62, %77 %102 = fadd float %101, %95 %103 = fmul float %63, %77 %104 = fadd float %103, %96 %105 = fmul float %64, %78 %106 = fadd float %105, %98 %107 = fmul float %65, %78 %108 = fadd float %107, %100 %109 = fmul float %66, %78 %110 = fadd float %109, %102 %111 = fmul float %67, %78 %112 = fadd float %111, %104 %113 = fmul float %68, %79 %114 = fadd float %113, %106 %115 = fmul float %69, %79 %116 = fadd float %115, %108 %117 = fmul float %70, %79 %118 = fadd float %117, %110 %119 = fmul float %71, %79 %120 = fadd float %119, %112 %121 = fmul float %114, 5.000000e-01 %122 = fmul float %116, 5.000000e-01 %123 = fmul float %120, 5.000000e-01 %124 = fmul float %122, %13 %125 = fadd float %121, %123 %126 = fadd float %124, %123 %127 = fmul float %41, %84 %128 = fmul float %44, %84 %129 = fmul float %47, %84 %130 = fmul float %42, %85 %131 = fadd float %130, %127 %132 = fmul float %45, %85 %133 = fadd float %132, %128 %134 = fmul float %48, %85 %135 = fadd float %134, %129 %136 = fmul float %43, %86 %137 = fadd float %136, %131 %138 = fmul float %46, %86 %139 = fadd float %138, %133 %140 = fmul float %49, %86 %141 = fadd float %140, %135 %142 = fmul float %137, %137 %143 = fmul float %139, %139 %144 = fadd float %143, %142 %145 = fmul float %141, %141 %146 = fadd float %144, %145 %147 = call float @llvm.AMDGPU.rsq.clamped.f32(float %146) %148 = fmul float %137, %147 %149 = fmul float %139, %147 %150 = fmul float %141, %147 %151 = fmul float %14, %148 %152 = fmul float %15, %149 %153 = fadd float %151, %152 %154 = fmul float %16, %150 %155 = fadd float %153, %154 %156 = fadd float %155, %17 %157 = fmul float %18, %148 %158 = fmul float %19, %149 %159 = fadd float %157, %158 %160 = fmul float %20, %150 %161 = fadd float %159, %160 %162 = fadd float %161, %21 %163 = fmul float %22, %148 %164 = fmul float %23, %149 %165 = fadd float %163, %164 %166 = fmul float %24, %150 %167 = fadd float %165, %166 %168 = fadd float %167, %25 %169 = fmul float %148, %149 %170 = fmul float %149, %150 %171 = fmul float %150, %150 %172 = fmul float %150, %148 %173 = fmul float %26, %169 %174 = fmul float %27, %170 %175 = fadd float %173, %174 %176 = fmul float %28, %171 %177 = fadd float %175, %176 %178 = fmul float %29, %172 %179 = fadd float %177, %178 %180 = fmul float %30, %169 %181 = fmul float %31, %170 %182 = fadd float %180, %181 %183 = fmul float %32, %171 %184 = fadd float %182, %183 %185 = fmul float %33, %172 %186 = fadd float %184, %185 %187 = fmul float %34, %169 %188 = fmul float %35, %170 %189 = fadd float %187, %188 %190 = fmul float %36, %171 %191 = fadd float %189, %190 %192 = fmul float %37, %172 %193 = fadd float %191, %192 %194 = fmul float %149, %149 %195 = fmul float %148, %148 %196 = fsub float %195, %194 %197 = fmul float %38, %196 %198 = fadd float %197, %179 %199 = fmul float %39, %196 %200 = fadd float %199, %186 %201 = fmul float %40, %196 %202 = fadd float %201, %193 %203 = fadd float %198, %156 %204 = fadd float %200, %162 %205 = fadd float %202, %168 %206 = fmul float %91, %52 %207 = fadd float %206, %54 %208 = fmul float %92, %53 %209 = fadd float %208, %55 %210 = fmul float %118, %50 %211 = fadd float %210, %51 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %125, float %126, float %118, float %120) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %207, float %209, float %211, float %203) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %204, float %205, float %205, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %114, float %116, float %118, float %120) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[12:15], s[2:3], 0x0 ; C0860300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[16:19], s[8:9], 0x4 ; C0880904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s0, s[12:15], 0x1c ; C2000D1C buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[16:19], 0 idxen ; E00C2000 80040500 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[8:11], v0, s[8:11], 0 idxen ; E00C2000 80020800 s_buffer_load_dword s1, s[12:15], 0x1d ; C2008D1D s_buffer_load_dword s2, s[12:15], 0x1e ; C2010D1E s_buffer_load_dword s5, s[12:15], 0x20 ; C2028D20 s_buffer_load_dword s6, s[12:15], 0x21 ; C2030D21 s_buffer_load_dword s7, s[12:15], 0x22 ; C2038D22 s_buffer_load_dword s8, s[12:15], 0x24 ; C2040D24 s_buffer_load_dword s9, s[12:15], 0x25 ; C2048D25 s_buffer_load_dword s10, s[12:15], 0x26 ; C2050D26 s_buffer_load_dword s11, s[12:15], 0x28 ; C2058D28 s_buffer_load_dword s16, s[12:15], 0x29 ; C2080D29 s_buffer_load_dword s17, s[12:15], 0x2a ; C2088D2A s_buffer_load_dword s3, s[12:15], 0x32 ; C2018D32 s_buffer_load_dword s4, s[12:15], 0x33 ; C2020D33 s_buffer_load_dword s18, s[12:15], 0x34 ; C2090D34 s_buffer_load_dword s19, s[12:15], 0x35 ; C2098D35 s_buffer_load_dword s20, s[12:15], 0x36 ; C20A0D36 s_buffer_load_dword s21, s[12:15], 0x37 ; C20A8D37 s_buffer_load_dword s22, s[12:15], 0x38 ; C20B0D38 s_buffer_load_dword s23, s[12:15], 0x39 ; C20B8D39 s_buffer_load_dword s24, s[12:15], 0x3a ; C20C0D3A s_buffer_load_dword s25, s[12:15], 0x3b ; C20C8D3B s_buffer_load_dword s26, s[12:15], 0x3c ; C20D0D3C s_buffer_load_dword s27, s[12:15], 0x3d ; C20D8D3D s_buffer_load_dword s28, s[12:15], 0x3e ; C20E0D3E s_buffer_load_dword s29, s[12:15], 0x3f ; C20E8D3F s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v0, s20 ; 7E000214 s_buffer_load_dword s20, s[12:15], 0x40 ; C20A0D40 v_mov_b32_e32 v10, s21 ; 7E140215 s_buffer_load_dword s21, s[12:15], 0x41 ; C20A8D41 s_buffer_load_dword s30, s[12:15], 0x42 ; C20F0D42 s_buffer_load_dword s31, s[12:15], 0x43 ; C20F8D43 s_buffer_load_dword s32, s[12:15], 0x44 ; C2100D44 s_buffer_load_dword s33, s[12:15], 0x45 ; C2108D45 s_buffer_load_dword s34, s[12:15], 0x46 ; C2110D46 s_buffer_load_dword s35, s[12:15], 0x47 ; C2118D47 v_mul_f32_e32 v11, s22, v1 ; 10160216 v_mul_f32_e32 v12, s5, v5 ; 10180A05 v_mac_f32_e32 v0, s18, v8 ; 3E001012 v_mac_f32_e32 v10, s19, v9 ; 3E141213 v_mac_f32_e32 v12, s6, v6 ; 3E180C06 v_mul_f32_e32 v8, s8, v5 ; 10100A08 v_mac_f32_e32 v8, s9, v6 ; 3E100C09 v_mul_f32_e32 v5, s11, v5 ; 100A0A0B v_mac_f32_e32 v5, s16, v6 ; 3E0A0C10 v_mac_f32_e32 v12, s7, v7 ; 3E180E07 v_mac_f32_e32 v8, s10, v7 ; 3E100E0A v_mac_f32_e32 v5, s17, v7 ; 3E0A0E11 v_mac_f32_e32 v11, s26, v2 ; 3E16041A v_mul_f32_e32 v6, s23, v1 ; 100C0217 v_mac_f32_e32 v6, s27, v2 ; 3E0C041B v_mul_f32_e32 v7, s24, v1 ; 100E0218 v_mac_f32_e32 v7, s28, v2 ; 3E0E041C v_mul_f32_e32 v1, s25, v1 ; 10020219 v_mac_f32_e32 v1, s29, v2 ; 3E02041D s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v11, s20, v3 ; 3E160614 v_mac_f32_e32 v6, s21, v3 ; 3E0C0615 v_mac_f32_e32 v7, s30, v3 ; 3E0E061E v_mac_f32_e32 v1, s31, v3 ; 3E02061F v_mac_f32_e32 v11, s32, v4 ; 3E160820 v_mac_f32_e32 v6, s33, v4 ; 3E0C0821 v_mac_f32_e32 v7, s34, v4 ; 3E0E0822 v_mac_f32_e32 v1, s35, v4 ; 3E020823 s_buffer_load_dword s5, s[12:15], 0x0 ; C2028D00 s_buffer_load_dword s6, s[12:15], 0x4 ; C2030D04 s_buffer_load_dword s7, s[12:15], 0x5 ; C2038D05 s_buffer_load_dword s8, s[12:15], 0x6 ; C2040D06 s_buffer_load_dword s9, s[12:15], 0x7 ; C2048D07 s_buffer_load_dword s10, s[12:15], 0x8 ; C2050D08 s_buffer_load_dword s11, s[12:15], 0x9 ; C2058D09 s_buffer_load_dword s16, s[12:15], 0xa ; C2080D0A s_buffer_load_dword s17, s[12:15], 0xb ; C2088D0B s_buffer_load_dword s18, s[12:15], 0xc ; C2090D0C s_buffer_load_dword s19, s[12:15], 0xd ; C2098D0D s_buffer_load_dword s20, s[12:15], 0xe ; C20A0D0E s_buffer_load_dword s21, s[12:15], 0xf ; C20A8D0F s_buffer_load_dword s22, s[12:15], 0x10 ; C20B0D10 s_buffer_load_dword s23, s[12:15], 0x11 ; C20B8D11 s_buffer_load_dword s24, s[12:15], 0x12 ; C20C0D12 s_buffer_load_dword s25, s[12:15], 0x13 ; C20C8D13 s_buffer_load_dword s26, s[12:15], 0x14 ; C20D0D14 s_buffer_load_dword s27, s[12:15], 0x15 ; C20D8D15 s_buffer_load_dword s28, s[12:15], 0x16 ; C20E0D16 s_buffer_load_dword s29, s[12:15], 0x17 ; C20E8D17 s_buffer_load_dword s30, s[12:15], 0x18 ; C20F0D18 s_buffer_load_dword s31, s[12:15], 0x19 ; C20F8D19 s_buffer_load_dword s32, s[12:15], 0x1a ; C2100D1A s_buffer_load_dword s12, s[12:15], 0x1b ; C2060D1B v_mul_f32_e32 v2, v12, v12 ; 1004190C v_mac_f32_e32 v2, v8, v8 ; 3E041108 v_mac_f32_e32 v2, v5, v5 ; 3E040B05 v_rsq_clamp_f32_e32 v2, v2 ; 7E045902 v_mul_f32_e32 v3, 0.5, v6 ; 10060CF0 v_mul_f32_e32 v4, 0.5, v1 ; 100802F0 v_mad_f32 v9, 0.5, v11, v4 ; D2820009 041216F0 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v4, s5, v3 ; 3E080605 exp 15, 32, 0, 0, 0, v9, v4, v7, v1 ; F800020F 01070409 v_mul_f32_e32 v3, v2, v8 ; 10061102 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v4, v2, v5 ; 10080B02 v_mul_f32_e32 v5, v4, v3 ; 100A0704 v_mul_f32_e32 v8, s23, v5 ; 10100A17 v_mul_f32_e32 v9, s27, v5 ; 10120A1B v_mul_f32_e32 v5, s31, v5 ; 100A0A1F v_mul_f32_e32 v2, v2, v12 ; 10041902 v_mul_f32_e32 v12, v3, v2 ; 10180503 v_mac_f32_e32 v8, s22, v12 ; 3E101816 v_mac_f32_e32 v9, s26, v12 ; 3E12181A v_mac_f32_e32 v5, s30, v12 ; 3E0A181E v_mul_f32_e32 v12, v4, v4 ; 10180904 v_mac_f32_e32 v8, s24, v12 ; 3E101818 v_mac_f32_e32 v9, s28, v12 ; 3E12181C v_mac_f32_e32 v5, s32, v12 ; 3E0A1820 v_mul_f32_e32 v12, s7, v3 ; 10180607 v_mac_f32_e32 v12, s6, v2 ; 3E180406 v_mul_f32_e32 v13, s11, v3 ; 101A060B v_mac_f32_e32 v13, s10, v2 ; 3E1A040A v_mul_f32_e32 v14, s19, v3 ; 101C0613 v_mac_f32_e32 v14, s18, v2 ; 3E1C0412 v_mac_f32_e32 v12, s8, v4 ; 3E180808 v_mac_f32_e32 v13, s16, v4 ; 3E1A0810 v_mac_f32_e32 v14, s20, v4 ; 3E1C0814 v_mul_f32_e32 v4, v2, v4 ; 10080902 v_mac_f32_e32 v8, s25, v4 ; 3E100819 v_mac_f32_e32 v9, s29, v4 ; 3E12081D v_mac_f32_e32 v5, s12, v4 ; 3E0A080C v_mov_b32_e32 v4, s4 ; 7E080204 v_mac_f32_e32 v4, s3, v7 ; 3E080E03 v_mul_f32_e32 v3, v3, v3 ; 10060703 v_mad_f32 v2, v2, v2, -v3 ; D2820002 840E0502 v_add_f32_e32 v3, s9, v12 ; 06061809 v_mac_f32_e32 v8, s0, v2 ; 3E100400 v_mac_f32_e32 v9, s1, v2 ; 3E120401 v_mac_f32_e32 v5, s2, v2 ; 3E0A0402 v_add_f32_e32 v2, v3, v8 ; 06041103 exp 15, 33, 0, 0, 0, v0, v10, v4, v2 ; F800021F 02040A00 s_waitcnt expcnt(0) ; BF8C070F v_add_f32_e32 v0, s17, v13 ; 06001A11 v_add_f32_e32 v2, s21, v14 ; 06041C15 v_add_f32_e32 v0, v0, v9 ; 06001300 v_add_f32_e32 v2, v2, v5 ; 06040B02 v_mov_b32_e32 v3, 0 ; 7E060280 exp 15, 34, 0, 0, 0, v0, v2, v2, v3 ; F800022F 03020200 exp 15, 12, 0, 1, 0, v11, v6, v7, v1 ; F80008CF 0107060B s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 16 Code Size: 668 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL CONST[0] DCL CONST[2] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].x, IN[1].wwww 1: MOV TEMP[0].yz, IN[2].yxyy 2: MOV TEMP[1].xy, IN[1].xyyy 3: TEX TEMP[1], TEMP[1], SAMP[0], 2D 4: MUL TEMP[1].xyz, TEMP[1], CONST[2] 5: MOV TEMP[2].xy, IN[0].xyyy 6: MOV TEMP[2].w, IN[0].wwww 7: TXP TEMP[2], TEMP[2], SAMP[1], 2D 8: LG2 TEMP[3].x, TEMP[2].xxxx 9: LG2 TEMP[3].y, TEMP[2].yyyy 10: LG2 TEMP[3].z, TEMP[2].zzzz 11: LG2 TEMP[3].w, TEMP[2].wwww 12: MOV TEMP[2].xyz, -TEMP[3] 13: ADD TEMP[0].xyz, TEMP[2].xyzz, TEMP[0].xyzz 14: MUL TEMP[0].xyz, TEMP[1].xyzz, TEMP[0].xyzz 15: MOV_SAT TEMP[1].x, IN[1].zzzz 16: LRP TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[0].xyzz 17: MOV TEMP[0].w, IMM[0].xxxx 18: MOV OUT[0], TEMP[0] 19: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %30 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %31 = load <32 x i8>, <32 x i8> addrspace(2)* %30, align 32, !tbaa !0 %32 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %33 = load <16 x i8>, <16 x i8> addrspace(2)* %32, align 16, !tbaa !0 %34 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %35 = bitcast <8 x i32> addrspace(2)* %34 to <32 x i8> addrspace(2)* %36 = load <32 x i8>, <32 x i8> addrspace(2)* %35, align 32, !tbaa !0 %37 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %38 = bitcast <4 x i32> addrspace(2)* %37 to <16 x i8> addrspace(2)* %39 = load <16 x i8>, <16 x i8> addrspace(2)* %38, align 16, !tbaa !0 %40 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %41 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %42 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %43 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %44 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %45 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %46 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %47 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %48 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %49 = bitcast float %43 to i32 %50 = bitcast float %44 to i32 %51 = insertelement <2 x i32> undef, i32 %49, i32 0 %52 = insertelement <2 x i32> %51, i32 %50, i32 1 %53 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %52, <32 x i8> %31, <16 x i8> %33, i32 2) %54 = extractelement <4 x float> %53, i32 0 %55 = extractelement <4 x float> %53, i32 1 %56 = extractelement <4 x float> %53, i32 2 %57 = fmul float %54, %27 %58 = fmul float %55, %28 %59 = fmul float %56, %29 %60 = fdiv float %40, %42 %61 = fdiv float %41, %42 %62 = bitcast float %60 to i32 %63 = bitcast float %61 to i32 %64 = insertelement <2 x i32> undef, i32 %62, i32 0 %65 = insertelement <2 x i32> %64, i32 %63, i32 1 %66 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %65, <32 x i8> %36, <16 x i8> %39, i32 2) %67 = extractelement <4 x float> %66, i32 0 %68 = extractelement <4 x float> %66, i32 1 %69 = extractelement <4 x float> %66, i32 2 %70 = call float @llvm.log2.f32(float %67) %71 = call float @llvm.log2.f32(float %68) %72 = call float @llvm.log2.f32(float %69) %73 = fsub float %46, %70 %74 = fsub float %47, %71 %75 = fsub float %48, %72 %76 = fmul float %57, %73 %77 = fmul float %58, %74 %78 = fmul float %59, %75 %79 = call float @llvm.AMDIL.clamp.(float %45, float 0.000000e+00, float 1.000000e+00) %80 = call float @llvm.AMDGPU.lrp(float %79, float %76, float %24) %81 = call float @llvm.AMDGPU.lrp(float %79, float %77, float %25) %82 = call float @llvm.AMDGPU.lrp(float %79, float %78, float %26) %83 = call i32 @llvm.SI.packf16(float %80, float %81) %84 = bitcast i32 %83 to float %85 = call i32 @llvm.SI.packf16(float %82, float 1.000000e+00) %86 = bitcast i32 %85 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %84, float %86, float %84, float %86) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 3, 0, [m0] ; C8100300 v_interp_p2_f32 v4, [v4], v1, 3, 0, [m0] ; C8110301 v_mov_b32_e32 v5, 0x6f800000 ; 7E0A02FF 6F800000 v_cmp_gt_f32_e64 vcc, |v4|, v5 ; D008016A 00020B04 v_mov_b32_e32 v5, 0x2f800000 ; 7E0A02FF 2F800000 v_cndmask_b32_e32 v5, 1.0, v5 ; 000A0AF2 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600 v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601 v_interp_p1_f32 v9, v0, 3, 1, [m0] ; C8240700 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 v_interp_p2_f32 v9, [v9], v1, 3, 1, [m0] ; C8250701 v_interp_p1_f32 v10, v0, 0, 2, [m0] ; C8280800 v_interp_p2_f32 v10, [v10], v1, 0, 2, [m0] ; C8290801 v_interp_p1_f32 v0, v0, 1, 2, [m0] ; C8000900 v_mul_f32_e32 v4, v5, v4 ; 10080905 v_rcp_f32_e32 v4, v4 ; 7E085504 s_load_dwordx4 s[20:23], s[4:5], 0x4 ; C08A0504 s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708 v_interp_p2_f32 v0, [v0], v1, 1, 2, [m0] ; C8010901 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[11:13], 7, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[12:19], s[8:11] ; F0800700 00430B06 v_mul_f32_e32 v1, v4, v2 ; 10020504 v_mul_f32_e32 v2, v4, v3 ; 10040704 v_mul_f32_e32 v3, v1, v5 ; 10060B01 v_mul_f32_e32 v4, v2, v5 ; 10080B02 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_buffer_load_dword s5, s[0:3], 0x9 ; C2028109 s_buffer_load_dword s6, s[0:3], 0xa ; C203010A image_sample v[1:3], 7, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[24:31], s[20:23] ; F0800700 00A60103 s_buffer_load_dword s7, s[0:3], 0x0 ; C2038100 s_buffer_load_dword s8, s[0:3], 0x1 ; C2040101 s_buffer_load_dword s0, s[0:3], 0x2 ; C2000102 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_log_f32_e32 v1, v1 ; 7E024F01 v_log_f32_e32 v2, v2 ; 7E044F02 v_log_f32_e32 v3, v3 ; 7E064F03 v_mul_f32_e32 v4, s4, v11 ; 10081604 v_mul_f32_e32 v5, s5, v12 ; 100A1805 v_mul_f32_e32 v6, s6, v13 ; 100C1A06 v_subrev_f32_e32 v1, v1, v9 ; 0A021301 v_subrev_f32_e32 v2, v2, v10 ; 0A041502 v_subrev_f32_e32 v0, v3, v0 ; 0A000103 v_mul_f32_e32 v1, v1, v4 ; 10020901 v_mul_f32_e32 v2, v2, v5 ; 10040B02 v_mul_f32_e32 v0, v0, v6 ; 10000D00 v_add_f32_e64 v3, 0, v8 clamp ; D2060803 00021080 v_sub_f32_e32 v4, 1.0, v3 ; 080806F2 v_mul_f32_e32 v5, s7, v4 ; 100A0807 v_mac_f32_e32 v5, v1, v3 ; 3E0A0701 v_mul_f32_e32 v1, s8, v4 ; 10020808 v_mac_f32_e32 v1, v2, v3 ; 3E020702 v_mul_f32_e32 v2, s0, v4 ; 10040800 v_mac_f32_e32 v2, v0, v3 ; 3E040700 v_cvt_pkrtz_f16_f32_e32 v0, v5, v1 ; 5E000305 v_cvt_pkrtz_f16_f32_e64 v1, v2, 1.0 ; D25E0001 0001E502 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 16 Code Size: 308 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL OUT[5], GENERIC[4] DCL OUT[6], GENERIC[5] DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[0..15] DCL CONST[17..20] DCL TEMP[0..8], LOCAL IMM[0] FLT32 { 0.2500, -1.0000, 10.0000, 0.4999} IMM[1] INT32 {256, 0, 1, 2} IMM[2] FLT32 { 1.0000, 0.0000, 0.1000, 0.0039} IMM[3] FLT32 { 16.0000, -8.0000, 4.0000, -2.0000} IMM[4] INT32 {4, 0, 0, 0} IMM[5] FLT32 { 0.5000, 0.0000, 0.0000, 0.0000} 0: MUL TEMP[0].x, IN[2].xxxx, IMM[0].xxxx 1: F2I TEMP[0].x, TEMP[0].xxxx 2: F2I TEMP[1].x, IN[2].yyyy 3: IDIV TEMP[2].x, TEMP[1].xxxx, IMM[1].xxxx 4: I2F TEMP[3].x, TEMP[0].xxxx 5: I2F TEMP[4].x, TEMP[2].xxxx 6: MOV TEMP[3].y, TEMP[4].xxxx 7: UMUL TEMP[2].x, TEMP[2].xxxx, IMM[1].xxxx 8: INEG TEMP[2].x, TEMP[2].xxxx 9: UADD TEMP[2].x, TEMP[1].xxxx, TEMP[2].xxxx 10: I2F TEMP[2].x, TEMP[2].xxxx 11: MOV TEMP[3].z, TEMP[2].xxxx 12: ADD TEMP[2].xyz, TEMP[3].xyzz, IMM[0].yyyy 13: I2F TEMP[1].x, TEMP[1].xxxx 14: ADD TEMP[1].x, IN[2].yyyy, -TEMP[1].xxxx 15: MAD TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz, IMM[0].wwww 16: F2I TEMP[1].x, TEMP[1].xxxx 17: USEQ TEMP[4].x, TEMP[1].xxxx, IMM[1].yyyy 18: AND TEMP[4].x, TEMP[4].xxxx, IMM[2].xxxx 19: USEQ TEMP[5].x, TEMP[1].xxxx, IMM[1].zzzz 20: AND TEMP[5].x, TEMP[5].xxxx, IMM[2].xxxx 21: MOV TEMP[4].y, TEMP[5].xxxx 22: USEQ TEMP[1].x, TEMP[1].xxxx, IMM[1].wwww 23: AND TEMP[1].x, TEMP[1].xxxx, IMM[2].xxxx 24: MOV TEMP[4].z, TEMP[1].xxxx 25: MOV TEMP[1].xyz, TEMP[4].xyzx 26: MOV TEMP[4].w, IMM[2].yyyy 27: MOV TEMP[4].xyz, TEMP[3].xyzx 28: MOV TEMP[3].y, IMM[2].yzyy 29: DP4 TEMP[4].x, TEMP[1], TEMP[4] 30: MUL TEMP[3].x, TEMP[4].xxxx, IMM[2].wwww 31: MOV TEMP[3].xy, TEMP[3].xyyy 32: MOV TEMP[3].w, IMM[2].yyyy 33: TXL TEMP[3].xy, TEMP[3], SAMP[0], 2D 34: MAD TEMP[4].x, TEMP[3].xxxx, IMM[3].xxxx, IMM[3].yyyy 35: MOV TEMP[2].w, TEMP[4].xxxx 36: MUL TEMP[3].x, TEMP[3].yyyy, IMM[3].zzzz 37: MOV TEMP[1].w, TEMP[3].xxxx 38: UMUL TEMP[0].x, IMM[4].xxxx, TEMP[0].xxxx 39: I2F TEMP[0].x, TEMP[0].xxxx 40: ADD TEMP[0].x, IN[2].xxxx, -TEMP[0].xxxx 41: ADD TEMP[0].x, TEMP[0].xxxx, IMM[3].wwww 42: MUL TEMP[3], CONST[17], IN[0].xxxx 43: MAD TEMP[3], CONST[18], IN[0].yyyy, TEMP[3] 44: MAD TEMP[3], CONST[19], IN[0].zzzz, TEMP[3] 45: MAD TEMP[3], CONST[20], IN[0].wwww, TEMP[3] 46: MOV TEMP[4].x, CONST[12].xxxx 47: MOV TEMP[4].y, CONST[13].xxxx 48: MOV TEMP[4].z, CONST[14].xxxx 49: MOV TEMP[5].x, CONST[12].yyyy 50: MOV TEMP[5].y, CONST[13].yyyy 51: MOV TEMP[5].z, CONST[14].yyyy 52: MOV TEMP[6].x, CONST[12].zzzz 53: MOV TEMP[6].y, CONST[13].zzzz 54: MOV TEMP[6].z, CONST[14].zzzz 55: MUL TEMP[4].xyz, TEMP[4].xyzz, IN[1].xxxx 56: MAD TEMP[4].xyz, TEMP[5].xyzz, IN[1].yyyy, TEMP[4].xyzz 57: MAD TEMP[0].xyz, TEMP[6].xyzz, TEMP[0].xxxx, TEMP[4].xyzz 58: DP3 TEMP[4].x, TEMP[0].xyzz, TEMP[0].xyzz 59: RSQ TEMP[4].x, TEMP[4].xxxx 60: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[4].xxxx 61: MUL TEMP[4].xyw, TEMP[3], IMM[5].xxxx 62: MOV TEMP[5].x, TEMP[4].xxxx 63: MUL TEMP[6].x, TEMP[4].yyyy, CONST[0].xxxx 64: MOV TEMP[5].y, TEMP[6].xxxx 65: ADD TEMP[4].xy, TEMP[5].xyyy, TEMP[4].wwww 66: MOV TEMP[4].zw, TEMP[3].wwzw 67: MOV TEMP[5].w, IMM[2].xxxx 68: MOV TEMP[5].xyz, TEMP[0].xyzx 69: DP4 TEMP[6].x, CONST[1], TEMP[5] 70: DP4 TEMP[7].x, CONST[2], TEMP[5] 71: MOV TEMP[6].y, TEMP[7].xxxx 72: DP4 TEMP[5].x, CONST[3], TEMP[5] 73: MOV TEMP[6].z, TEMP[5].xxxx 74: MUL TEMP[5], TEMP[0].xyzz, TEMP[0].yzzx 75: DP4 TEMP[7].x, CONST[4], TEMP[5] 76: DP4 TEMP[8].x, CONST[5], TEMP[5] 77: MOV TEMP[7].y, TEMP[8].xxxx 78: DP4 TEMP[5].x, CONST[6], TEMP[5] 79: MOV TEMP[7].z, TEMP[5].xxxx 80: MUL TEMP[5].x, TEMP[0].yyyy, TEMP[0].yyyy 81: MAD TEMP[5].x, TEMP[0].xxxx, TEMP[0].xxxx, -TEMP[5].xxxx 82: MAD TEMP[5].xyz, CONST[7].xyzz, TEMP[5].xxxx, TEMP[7].xyzz 83: ADD TEMP[5].xyz, TEMP[5].xyzz, TEMP[6].xyzz 84: MOV TEMP[0].yzw, TEMP[0].yxyz 85: MUL TEMP[6], CONST[8], IN[0].xxxx 86: MAD TEMP[6], CONST[9], IN[0].yyyy, TEMP[6] 87: MAD TEMP[6], CONST[10], IN[0].zzzz, TEMP[6] 88: MAD TEMP[6].xyz, CONST[11], IN[0].wwww, TEMP[6] 89: MOV TEMP[6].xyz, TEMP[6].xyzx 90: MOV TEMP[6].w, TEMP[5].xxxx 91: MOV TEMP[5].xy, TEMP[5].yzyy 92: MOV TEMP[0].x, TEMP[3].zzzz 93: MOV OUT[5], TEMP[6] 94: MOV OUT[1], TEMP[2] 95: MOV OUT[2], TEMP[1] 96: MOV OUT[3], TEMP[4] 97: MOV OUT[4], TEMP[0] 98: MOV OUT[0], TEMP[3] 99: MOV OUT[6], TEMP[5] 100: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272) %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276) %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280) %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284) %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288) %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292) %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296) %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300) %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304) %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308) %72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312) %73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316) %74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320) %75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 324) %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 328) %77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 332) %78 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %79 = load <32 x i8>, <32 x i8> addrspace(2)* %78, align 32, !tbaa !0 %80 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %81 = load <16 x i8>, <16 x i8> addrspace(2)* %80, align 16, !tbaa !0 %82 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %83 = load <16 x i8>, <16 x i8> addrspace(2)* %82, align 16, !tbaa !0 %84 = add i32 %5, %7 %85 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %83, i32 0, i32 %84) %86 = extractelement <4 x float> %85, i32 0 %87 = extractelement <4 x float> %85, i32 1 %88 = extractelement <4 x float> %85, i32 2 %89 = extractelement <4 x float> %85, i32 3 %90 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %91 = load <16 x i8>, <16 x i8> addrspace(2)* %90, align 16, !tbaa !0 %92 = add i32 %5, %7 %93 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %91, i32 0, i32 %92) %94 = extractelement <4 x float> %93, i32 0 %95 = extractelement <4 x float> %93, i32 1 %96 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %97 = load <16 x i8>, <16 x i8> addrspace(2)* %96, align 16, !tbaa !0 %98 = add i32 %5, %7 %99 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %97, i32 0, i32 %98) %100 = extractelement <4 x float> %99, i32 0 %101 = extractelement <4 x float> %99, i32 1 %102 = fmul float %100, 2.500000e-01 %103 = fptosi float %102 to i32 %104 = fptosi float %101 to i32 %105 = sdiv i32 %104, 256 %106 = sitofp i32 %103 to float %107 = sitofp i32 %105 to float %108 = shl nsw i32 %105, 8 %109 = sub i32 %104, %108 %110 = sitofp i32 %109 to float %111 = fadd float %106, -1.000000e+00 %112 = fadd float %107, -1.000000e+00 %113 = fadd float %110, -1.000000e+00 %114 = sitofp i32 %104 to float %115 = fsub float %101, %114 %116 = fmul float %115, 1.000000e+01 %117 = fadd float %116, 0x3FDFFE5CA0000000 %118 = fptosi float %117 to i32 %119 = icmp eq i32 %118, 0 %120 = select i1 %119, float 1.000000e+00, float 0.000000e+00 %121 = icmp eq i32 %118, 1 %122 = select i1 %121, float 1.000000e+00, float 0.000000e+00 %123 = icmp eq i32 %118, 2 %124 = select i1 %123, float 1.000000e+00, float 0.000000e+00 %125 = fmul float %120, %106 %126 = fmul float %122, %107 %127 = fadd float %125, %126 %128 = fmul float %124, %110 %129 = fadd float %127, %128 %130 = fadd float %129, 0.000000e+00 %131 = fmul float %130, 0x3F70101020000000 %132 = bitcast float %131 to i32 %133 = insertelement <4 x i32> undef, i32 %132, i32 0 %134 = insertelement <4 x i32> %133, i32 1036831949, i32 1 %135 = insertelement <4 x i32> %134, i32 0, i32 2 %136 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %135, <32 x i8> %79, <16 x i8> %81, i32 2) %137 = extractelement <4 x float> %136, i32 0 %138 = extractelement <4 x float> %136, i32 1 %139 = fmul float %137, 1.600000e+01 %140 = fadd float %139, -8.000000e+00 %141 = fmul float %138, 4.000000e+00 %142 = shl i32 %103, 2 %143 = sitofp i32 %142 to float %144 = fsub float %100, %143 %145 = fadd float %144, -2.000000e+00 %146 = fmul float %62, %86 %147 = fmul float %63, %86 %148 = fmul float %64, %86 %149 = fmul float %65, %86 %150 = fmul float %66, %87 %151 = fadd float %150, %146 %152 = fmul float %67, %87 %153 = fadd float %152, %147 %154 = fmul float %68, %87 %155 = fadd float %154, %148 %156 = fmul float %69, %87 %157 = fadd float %156, %149 %158 = fmul float %70, %88 %159 = fadd float %158, %151 %160 = fmul float %71, %88 %161 = fadd float %160, %153 %162 = fmul float %72, %88 %163 = fadd float %162, %155 %164 = fmul float %73, %88 %165 = fadd float %164, %157 %166 = fmul float %74, %89 %167 = fadd float %166, %159 %168 = fmul float %75, %89 %169 = fadd float %168, %161 %170 = fmul float %76, %89 %171 = fadd float %170, %163 %172 = fmul float %77, %89 %173 = fadd float %172, %165 %174 = fmul float %53, %94 %175 = fmul float %56, %94 %176 = fmul float %59, %94 %177 = fmul float %54, %95 %178 = fadd float %177, %174 %179 = fmul float %57, %95 %180 = fadd float %179, %175 %181 = fmul float %60, %95 %182 = fadd float %181, %176 %183 = fmul float %55, %145 %184 = fadd float %183, %178 %185 = fmul float %58, %145 %186 = fadd float %185, %180 %187 = fmul float %61, %145 %188 = fadd float %187, %182 %189 = fmul float %184, %184 %190 = fmul float %186, %186 %191 = fadd float %190, %189 %192 = fmul float %188, %188 %193 = fadd float %191, %192 %194 = call float @llvm.AMDGPU.rsq.clamped.f32(float %193) %195 = fmul float %184, %194 %196 = fmul float %186, %194 %197 = fmul float %188, %194 %198 = fmul float %167, 5.000000e-01 %199 = fmul float %169, 5.000000e-01 %200 = fmul float %173, 5.000000e-01 %201 = fmul float %199, %13 %202 = fadd float %198, %200 %203 = fadd float %201, %200 %204 = fmul float %14, %195 %205 = fmul float %15, %196 %206 = fadd float %204, %205 %207 = fmul float %16, %197 %208 = fadd float %206, %207 %209 = fadd float %208, %17 %210 = fmul float %18, %195 %211 = fmul float %19, %196 %212 = fadd float %210, %211 %213 = fmul float %20, %197 %214 = fadd float %212, %213 %215 = fadd float %214, %21 %216 = fmul float %22, %195 %217 = fmul float %23, %196 %218 = fadd float %216, %217 %219 = fmul float %24, %197 %220 = fadd float %218, %219 %221 = fadd float %220, %25 %222 = fmul float %195, %196 %223 = fmul float %196, %197 %224 = fmul float %197, %197 %225 = fmul float %197, %195 %226 = fmul float %26, %222 %227 = fmul float %27, %223 %228 = fadd float %226, %227 %229 = fmul float %28, %224 %230 = fadd float %228, %229 %231 = fmul float %29, %225 %232 = fadd float %230, %231 %233 = fmul float %30, %222 %234 = fmul float %31, %223 %235 = fadd float %233, %234 %236 = fmul float %32, %224 %237 = fadd float %235, %236 %238 = fmul float %33, %225 %239 = fadd float %237, %238 %240 = fmul float %34, %222 %241 = fmul float %35, %223 %242 = fadd float %240, %241 %243 = fmul float %36, %224 %244 = fadd float %242, %243 %245 = fmul float %37, %225 %246 = fadd float %244, %245 %247 = fmul float %196, %196 %248 = fmul float %195, %195 %249 = fsub float %248, %247 %250 = fmul float %38, %249 %251 = fadd float %250, %232 %252 = fmul float %39, %249 %253 = fadd float %252, %239 %254 = fmul float %40, %249 %255 = fadd float %254, %246 %256 = fadd float %251, %209 %257 = fadd float %253, %215 %258 = fadd float %255, %221 %259 = fmul float %41, %86 %260 = fmul float %42, %86 %261 = fmul float %43, %86 %262 = fmul float %44, %87 %263 = fadd float %262, %259 %264 = fmul float %45, %87 %265 = fadd float %264, %260 %266 = fmul float %46, %87 %267 = fadd float %266, %261 %268 = fmul float %47, %88 %269 = fadd float %268, %263 %270 = fmul float %48, %88 %271 = fadd float %270, %265 %272 = fmul float %49, %88 %273 = fadd float %272, %267 %274 = fmul float %50, %89 %275 = fadd float %274, %269 %276 = fmul float %51, %89 %277 = fadd float %276, %271 %278 = fmul float %52, %89 %279 = fadd float %278, %273 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %111, float %112, float %113, float %140) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %120, float %122, float %124, float %141) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %202, float %203, float %171, float %173) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %171, float %195, float %196, float %197) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %275, float %277, float %279, float %256) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %257, float %258, float %258, float %225) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %167, float %169, float %171, float %173) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0x3efff2e5 ; 7E0202FF 3EFFF2E5 v_mov_b32_e32 v2, 0xc1000000 ; 7E0402FF C1000000 v_mov_b32_e32 v5, 0 ; 7E0A0280 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[72:75], s[2:3], 0x0 ; C0A40300 s_load_dwordx4 s[20:23], s[4:5], 0x0 ; C08A0500 s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_load_dwordx8 s[28:35], s[6:7], 0x0 ; C0CE0700 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s70, s[72:75], 0x30 ; C2234930 s_buffer_load_dword s48, s[72:75], 0x31 ; C2184931 buffer_load_format_xyzw v[6:9], v0, s[0:3], 0 idxen ; E00C2000 80000600 buffer_load_format_xyzw v[10:13], v0, s[12:15], 0 idxen ; E00C2000 80030A00 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[12:15], v0, s[8:11], 0 idxen ; E00C2000 80020C00 s_buffer_load_dword s14, s[72:75], 0x32 ; C2074932 s_buffer_load_dword s71, s[72:75], 0x34 ; C223C934 s_buffer_load_dword s52, s[72:75], 0x35 ; C21A4935 s_buffer_load_dword s16, s[72:75], 0x36 ; C2084936 s_buffer_load_dword s76, s[72:75], 0x38 ; C2264938 s_buffer_load_dword s53, s[72:75], 0x39 ; C21AC939 s_buffer_load_dword s19, s[72:75], 0x3a ; C209C93A s_buffer_load_dword s77, s[72:75], 0x44 ; C226C944 s_buffer_load_dword s78, s[72:75], 0x45 ; C2274945 s_buffer_load_dword s79, s[72:75], 0x46 ; C227C946 s_buffer_load_dword s80, s[72:75], 0x47 ; C2284947 s_buffer_load_dword s56, s[72:75], 0x48 ; C21C4948 s_buffer_load_dword s57, s[72:75], 0x49 ; C21CC949 s_buffer_load_dword s54, s[72:75], 0x4a ; C21B494A s_buffer_load_dword s55, s[72:75], 0x4b ; C21BC94B s_buffer_load_dword s49, s[72:75], 0x4c ; C218C94C s_buffer_load_dword s50, s[72:75], 0x4d ; C219494D s_buffer_load_dword s47, s[72:75], 0x4e ; C217C94E s_buffer_load_dword s2, s[72:75], 0xd ; C201490D s_buffer_load_dword s1, s[72:75], 0xe ; C200C90E s_buffer_load_dword s0, s[72:75], 0xf ; C200490F s_buffer_load_dword s12, s[72:75], 0x10 ; C2064910 s_buffer_load_dword s17, s[72:75], 0x11 ; C208C911 s_buffer_load_dword s51, s[72:75], 0x4f ; C219C94F s_buffer_load_dword s46, s[72:75], 0x50 ; C2174950 s_buffer_load_dword s45, s[72:75], 0x51 ; C216C951 s_buffer_load_dword s44, s[72:75], 0x52 ; C2164952 s_buffer_load_dword s43, s[72:75], 0x53 ; C215C953 s_buffer_load_dword s37, s[72:75], 0x0 ; C212C900 s_buffer_load_dword s5, s[72:75], 0x4 ; C202C904 s_buffer_load_dword s6, s[72:75], 0x5 ; C2034905 s_buffer_load_dword s4, s[72:75], 0x6 ; C2024906 s_buffer_load_dword s3, s[72:75], 0x7 ; C201C907 s_buffer_load_dword s18, s[72:75], 0x12 ; C2094912 s_buffer_load_dword s13, s[72:75], 0x13 ; C206C913 s_buffer_load_dword s27, s[72:75], 0x14 ; C20DC914 s_buffer_load_dword s38, s[72:75], 0x15 ; C2134915 s_buffer_load_dword s25, s[72:75], 0x16 ; C20CC916 s_buffer_load_dword s15, s[72:75], 0x17 ; C207C917 s_buffer_load_dword s36, s[72:75], 0x18 ; C2124918 s_buffer_load_dword s39, s[72:75], 0x19 ; C213C919 s_buffer_load_dword s26, s[72:75], 0x1a ; C20D491A s_buffer_load_dword s24, s[72:75], 0x1b ; C20C491B s_buffer_load_dword s9, s[72:75], 0x8 ; C204C908 s_buffer_load_dword s10, s[72:75], 0x9 ; C2054909 s_buffer_load_dword s8, s[72:75], 0xa ; C204490A s_buffer_load_dword s7, s[72:75], 0xb ; C203C90B s_buffer_load_dword s11, s[72:75], 0xc ; C205C90C s_buffer_load_dword s40, s[72:75], 0x1c ; C214491C s_buffer_load_dword s41, s[72:75], 0x1d ; C214C91D s_buffer_load_dword s42, s[72:75], 0x1e ; C215491E s_buffer_load_dword s69, s[72:75], 0x20 ; C222C920 s_buffer_load_dword s68, s[72:75], 0x21 ; C2224921 s_buffer_load_dword s67, s[72:75], 0x22 ; C221C922 s_buffer_load_dword s66, s[72:75], 0x24 ; C2214924 s_buffer_load_dword s65, s[72:75], 0x25 ; C220C925 s_buffer_load_dword s64, s[72:75], 0x26 ; C2204926 s_buffer_load_dword s63, s[72:75], 0x28 ; C21FC928 s_buffer_load_dword s62, s[72:75], 0x29 ; C21F4929 s_buffer_load_dword s61, s[72:75], 0x2a ; C21EC92A s_buffer_load_dword s58, s[72:75], 0x2c ; C21D492C s_buffer_load_dword s59, s[72:75], 0x2d ; C21DC92D s_buffer_load_dword s60, s[72:75], 0x2e ; C21E492E s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v0, s77, v6 ; 10000C4D v_mul_f32_e32 v14, s78, v6 ; 101C0C4E v_mul_f32_e32 v15, s79, v6 ; 101E0C4F v_mul_f32_e32 v16, s80, v6 ; 10200C50 v_mul_f32_e32 v17, s70, v10 ; 10221446 v_mul_f32_e32 v18, s71, v10 ; 10241447 v_mul_f32_e32 v10, s76, v10 ; 1014144C v_mul_f32_e32 v19, s69, v6 ; 10260C45 v_mul_f32_e32 v20, s68, v6 ; 10280C44 v_mac_f32_e32 v17, s48, v11 ; 3E221630 v_mac_f32_e32 v18, s52, v11 ; 3E241634 v_mac_f32_e32 v10, s53, v11 ; 3E141635 v_mul_f32_e32 v6, s67, v6 ; 100C0C43 v_mac_f32_e32 v0, s56, v7 ; 3E000E38 v_mac_f32_e32 v14, s57, v7 ; 3E1C0E39 v_mac_f32_e32 v15, s54, v7 ; 3E1E0E36 v_mac_f32_e32 v16, s55, v7 ; 3E200E37 v_mac_f32_e32 v19, s66, v7 ; 3E260E42 v_mac_f32_e32 v20, s65, v7 ; 3E280E41 v_mac_f32_e32 v6, s64, v7 ; 3E0C0E40 v_cvt_i32_f32_e32 v3, v13 ; 7E06110D v_mac_f32_e32 v0, s49, v8 ; 3E001031 v_mac_f32_e32 v14, s50, v8 ; 3E1C1032 v_mac_f32_e32 v15, s47, v8 ; 3E1E102F v_cvt_f32_i32_e32 v4, v3 ; 7E080B03 v_mac_f32_e32 v16, s51, v8 ; 3E201033 v_mac_f32_e32 v19, s63, v8 ; 3E26103F v_mac_f32_e32 v20, s62, v8 ; 3E28103E v_subrev_f32_e32 v4, v4, v13 ; 0A081B04 v_madmk_f32_e32 v1, v4, v1, 0x41200000 ; 40020304 41200000 v_cvt_i32_f32_e32 v1, v1 ; 7E021101 v_mac_f32_e32 v6, s61, v8 ; 3E0C103D v_mac_f32_e32 v0, s46, v9 ; 3E00122E v_mac_f32_e32 v14, s45, v9 ; 3E1C122D v_cmp_eq_i32_e32 vcc, 0, v1 ; 7D040280 v_cndmask_b32_e64 v7, 0, 1.0, vcc ; D2000007 01A9E480 v_cmp_eq_i32_e32 vcc, 1, v1 ; 7D040281 v_cndmask_b32_e64 v8, 0, 1.0, vcc ; D2000008 01A9E480 v_cmp_eq_i32_e32 vcc, 2, v1 ; 7D040282 v_cndmask_b32_e64 v1, 0, 1.0, vcc ; D2000001 01A9E480 v_mac_f32_e32 v15, s44, v9 ; 3E1E122C v_mac_f32_e32 v16, s43, v9 ; 3E20122B v_mac_f32_e32 v19, s58, v9 ; 3E26123A v_mac_f32_e32 v20, s59, v9 ; 3E28123B v_mac_f32_e32 v6, s60, v9 ; 3E0C123C v_mul_f32_e32 v4, 0x3e800000, v12 ; 100818FF 3E800000 v_cvt_i32_f32_e32 v4, v4 ; 7E081104 v_ashrrev_i32_e32 v9, 31, v3 ; 3012069F v_lshrrev_b32_e32 v9, 24, v9 ; 2C121298 v_add_i32_e32 v9, v3, v9 ; 4A121303 v_and_b32_e32 v11, 0xffffff00, v9 ; 361612FF FFFFFF00 v_sub_i32_e32 v3, v3, v11 ; 4C061703 v_lshlrev_b32_e32 v11, 2, v4 ; 34160882 v_cvt_f32_i32_e32 v11, v11 ; 7E160B0B v_ashrrev_i32_e32 v9, 8, v9 ; 30121288 v_cvt_f32_i32_e32 v9, v9 ; 7E120B09 v_cvt_f32_i32_e32 v13, v4 ; 7E1A0B04 v_cvt_f32_i32_e32 v21, v3 ; 7E2A0B03 v_subrev_f32_e32 v11, v11, v12 ; 0A16190B v_mul_f32_e32 v3, v9, v8 ; 10061109 v_mac_f32_e32 v3, v13, v7 ; 3E060F0D v_mac_f32_e32 v3, v21, v1 ; 3E060315 v_add_f32_e32 v3, 0, v3 ; 06060680 v_mul_f32_e32 v3, 0x3b808081, v3 ; 100606FF 3B808081 v_mov_b32_e32 v4, 0x3dcccccd ; 7E0802FF 3DCCCCCD image_sample_l v[3:4], 3, 0, 0, 0, 0, 0, 0, 0, v[3:6], s[28:35], s[20:23] ; F0900300 00A70303 v_add_f32_e32 v5, -1.0, v13 ; 060A1AF3 v_add_f32_e32 v9, -1.0, v9 ; 061212F3 v_add_f32_e32 v12, -1.0, v21 ; 06182AF3 s_waitcnt vmcnt(0) ; BF8C0770 v_madmk_f32_e32 v2, v3, v2, 0x41800000 ; 40040503 41800000 exp 15, 32, 0, 0, 0, v5, v9, v12, v2 ; F800020F 020C0905 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v2, 4.0, v4 ; 100408F6 exp 15, 33, 0, 0, 0, v7, v8, v1, v2 ; F800021F 02010807 s_waitcnt expcnt(0) ; BF8C070F v_add_f32_e32 v1, -2.0, v11 ; 060216F5 v_mac_f32_e32 v17, s14, v1 ; 3E22020E v_mac_f32_e32 v18, s16, v1 ; 3E240210 v_mac_f32_e32 v10, s19, v1 ; 3E140213 v_mul_f32_e32 v1, v17, v17 ; 10022311 v_mac_f32_e32 v1, v18, v18 ; 3E022512 v_mac_f32_e32 v1, v10, v10 ; 3E02150A v_rsq_clamp_f32_e32 v1, v1 ; 7E025901 v_mul_f32_e32 v2, 0.5, v14 ; 10041CF0 v_mul_f32_e32 v3, 0.5, v16 ; 100620F0 v_mad_f32 v4, 0.5, v0, v3 ; D2820004 040E00F0 v_mac_f32_e32 v3, s37, v2 ; 3E060425 v_mul_f32_e32 v2, v1, v17 ; 10042301 v_mul_f32_e32 v5, v1, v18 ; 100A2501 v_mul_f32_e32 v1, v1, v10 ; 10021501 v_mul_f32_e32 v7, v1, v5 ; 100E0B01 v_mul_f32_e32 v8, s17, v7 ; 10100E11 v_mul_f32_e32 v9, s38, v7 ; 10120E26 v_mul_f32_e32 v7, s39, v7 ; 100E0E27 v_mul_f32_e32 v10, v5, v2 ; 10140505 v_mac_f32_e32 v8, s12, v10 ; 3E10140C v_mac_f32_e32 v9, s27, v10 ; 3E12141B v_mac_f32_e32 v7, s36, v10 ; 3E0E1424 v_mul_f32_e32 v10, v1, v1 ; 10140301 v_mac_f32_e32 v8, s18, v10 ; 3E101412 v_mac_f32_e32 v9, s25, v10 ; 3E121419 v_mac_f32_e32 v7, s26, v10 ; 3E0E141A v_mul_f32_e32 v10, v2, v1 ; 10140302 v_mac_f32_e32 v8, s13, v10 ; 3E10140D v_mac_f32_e32 v9, s15, v10 ; 3E12140F v_mac_f32_e32 v7, s24, v10 ; 3E0E1418 v_mul_f32_e32 v11, v5, v5 ; 10160B05 v_mad_f32 v11, v2, v2, -v11 ; D282000B 842E0502 v_mac_f32_e32 v8, s40, v11 ; 3E101628 v_mac_f32_e32 v9, s41, v11 ; 3E121629 v_mac_f32_e32 v7, s42, v11 ; 3E0E162A exp 15, 34, 0, 0, 0, v4, v3, v15, v16 ; F800022F 100F0304 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v3, s6, v5 ; 10060A06 v_mac_f32_e32 v3, s5, v2 ; 3E060405 v_mul_f32_e32 v4, s10, v5 ; 10080A0A v_mac_f32_e32 v4, s9, v2 ; 3E080409 v_mul_f32_e32 v11, s2, v5 ; 10160A02 v_mac_f32_e32 v11, s11, v2 ; 3E16040B v_mac_f32_e32 v3, s4, v1 ; 3E060204 v_mac_f32_e32 v4, s8, v1 ; 3E080208 v_mac_f32_e32 v11, s1, v1 ; 3E160201 v_add_f32_e32 v3, s3, v3 ; 06060603 v_add_f32_e32 v3, v3, v8 ; 06061103 exp 15, 35, 0, 0, 0, v15, v2, v5, v1 ; F800023F 0105020F exp 15, 36, 0, 0, 0, v19, v20, v6, v3 ; F800024F 03061413 s_waitcnt expcnt(0) ; BF8C070F v_add_f32_e32 v1, s7, v4 ; 06020807 v_add_f32_e32 v1, v1, v9 ; 06021301 v_add_f32_e32 v2, s0, v11 ; 06041600 v_add_f32_e32 v2, v2, v7 ; 06040F02 exp 15, 37, 0, 0, 0, v1, v2, v2, v10 ; F800025F 0A020201 exp 15, 12, 0, 1, 0, v0, v14, v15, v16 ; F80008CF 100F0E00 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 88 VGPRS: 24 Code Size: 960 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL IN[4], GENERIC[4], PERSPECTIVE DCL IN[5], GENERIC[5], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL SVIEW[4], 2D, FLOAT DCL SVIEW[5], 2D, FLOAT DCL CONST[0..2] DCL CONST[8..15] DCL CONST[17] DCL TEMP[0..21], LOCAL IMM[0] FLT32 { -0.2000, 7.0000, 0.0000, 0.5000} IMM[1] FLT32 { 64.0000, -64.0000, 4.0000, 0.6931} IMM[2] FLT32 { 0.0039, 0.0020, 1.0000, 2.0000} IMM[3] FLT32 { 3.0000, 0.6600, 0.0000, 0.0000} 0: MOV TEMP[0].x, IN[4].wwww 1: MOV TEMP[0].yz, IN[5].yxyy 2: ABS TEMP[1].xyz, IN[3].yzww 3: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz 4: RSQ TEMP[2].x, TEMP[2].xxxx 5: MAD TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx, IMM[0].xxxx 6: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[0].yyyy 7: MAX TEMP[1].xyz, TEMP[1].xyzz, IMM[0].zzzz 8: ADD TEMP[2].x, TEMP[1].xxxx, TEMP[1].yyyy 9: ADD TEMP[2].x, TEMP[2].xxxx, TEMP[1].zzzz 10: RCP TEMP[2].xyz, TEMP[2].xxxx 11: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xyzz 12: ADD TEMP[2], IN[0], IMM[0].wwww 13: FLR TEMP[2].xyz, TEMP[2] 14: MOV TEMP[3].x, CONST[8].xxxx 15: MUL TEMP[4].x, TEMP[2].xxxx, CONST[8].xxxx 16: MOV TEMP[5].x, TEMP[4].xxxx 17: FLR TEMP[4].x, TEMP[4].xxxx 18: MUL TEMP[4].x, TEMP[4].xxxx, CONST[8].xxxx 19: FSGE TEMP[6].x, TEMP[2].xxxx, IMM[1].xxxx 20: UIF TEMP[6].xxxx :0 21: MOV TEMP[3].x, CONST[9].xxxx 22: ADD TEMP[6].x, TEMP[2].xxxx, IMM[1].yyyy 23: MUL TEMP[6].x, TEMP[6].xxxx, CONST[9].xxxx 24: MOV TEMP[5].x, TEMP[6].xxxx 25: FLR TEMP[7].x, TEMP[6].xxxx 26: MUL TEMP[7].x, TEMP[7].xxxx, CONST[9].xxxx 27: MOV TEMP[4].x, TEMP[7].xxxx 28: FRC TEMP[6].x, TEMP[6].xxxx 29: FRC TEMP[8].x, TEMP[7].xxxx 30: MOV TEMP[6].y, TEMP[8].xxxx 31: FLR TEMP[7].x, TEMP[7].xxxx 32: ADD TEMP[7].x, TEMP[7].xxxx, IMM[1].zzzz 33: MOV TEMP[6].z, TEMP[7].xxxx 34: MOV TEMP[6].xyz, TEMP[6].xyzx 35: ELSE :0 36: FRC TEMP[5].x, TEMP[5].xxxx 37: FRC TEMP[7].x, TEMP[4].xxxx 38: MOV TEMP[5].y, TEMP[7].xxxx 39: FLR TEMP[4].x, TEMP[4].xxxx 40: MOV TEMP[5].z, TEMP[4].xxxx 41: MOV TEMP[6].xyz, TEMP[5].xyzx 42: ENDIF 43: MOV TEMP[4].x, CONST[8].xxxx 44: MUL TEMP[5].x, TEMP[2].yyyy, CONST[8].xxxx 45: MOV TEMP[7].x, TEMP[5].xxxx 46: FLR TEMP[5].x, TEMP[5].xxxx 47: MUL TEMP[5].x, TEMP[5].xxxx, CONST[8].xxxx 48: FSGE TEMP[8].x, TEMP[2].yyyy, IMM[1].xxxx 49: UIF TEMP[8].xxxx :0 50: MOV TEMP[4].x, CONST[9].xxxx 51: ADD TEMP[8].x, TEMP[2].yyyy, IMM[1].yyyy 52: MUL TEMP[8].x, TEMP[8].xxxx, CONST[9].xxxx 53: MOV TEMP[7].x, TEMP[8].xxxx 54: FLR TEMP[9].x, TEMP[8].xxxx 55: MUL TEMP[9].x, TEMP[9].xxxx, CONST[9].xxxx 56: MOV TEMP[5].x, TEMP[9].xxxx 57: FRC TEMP[8].x, TEMP[8].xxxx 58: FRC TEMP[10].x, TEMP[9].xxxx 59: MOV TEMP[8].y, TEMP[10].xxxx 60: FLR TEMP[9].x, TEMP[9].xxxx 61: ADD TEMP[9].x, TEMP[9].xxxx, IMM[1].zzzz 62: MOV TEMP[8].z, TEMP[9].xxxx 63: MOV TEMP[8].xyz, TEMP[8].xyzx 64: ELSE :0 65: FRC TEMP[7].x, TEMP[7].xxxx 66: FRC TEMP[9].x, TEMP[5].xxxx 67: MOV TEMP[7].y, TEMP[9].xxxx 68: FLR TEMP[5].x, TEMP[5].xxxx 69: MOV TEMP[7].z, TEMP[5].xxxx 70: MOV TEMP[8].xyz, TEMP[7].xyzx 71: ENDIF 72: MOV TEMP[5].x, CONST[8].xxxx 73: MUL TEMP[7].x, TEMP[2].zzzz, CONST[8].xxxx 74: MOV TEMP[9].x, TEMP[7].xxxx 75: FLR TEMP[7].x, TEMP[7].xxxx 76: MUL TEMP[7].x, TEMP[7].xxxx, CONST[8].xxxx 77: FSGE TEMP[10].x, TEMP[2].zzzz, IMM[1].xxxx 78: UIF TEMP[10].xxxx :0 79: MOV TEMP[5].x, CONST[9].xxxx 80: ADD TEMP[2].x, TEMP[2].zzzz, IMM[1].yyyy 81: MUL TEMP[2].x, TEMP[2].xxxx, CONST[9].xxxx 82: MOV TEMP[9].x, TEMP[2].xxxx 83: FLR TEMP[10].x, TEMP[2].xxxx 84: MUL TEMP[10].x, TEMP[10].xxxx, CONST[9].xxxx 85: MOV TEMP[7].x, TEMP[10].xxxx 86: FRC TEMP[2].x, TEMP[2].xxxx 87: FRC TEMP[11].x, TEMP[10].xxxx 88: MOV TEMP[2].y, TEMP[11].xxxx 89: FLR TEMP[10].x, TEMP[10].xxxx 90: ADD TEMP[10].x, TEMP[10].xxxx, IMM[1].zzzz 91: MOV TEMP[2].z, TEMP[10].xxxx 92: MOV TEMP[2].xyz, TEMP[2].xyzx 93: ELSE :0 94: FRC TEMP[9].x, TEMP[9].xxxx 95: FRC TEMP[10].x, TEMP[7].xxxx 96: MOV TEMP[9].y, TEMP[10].xxxx 97: FLR TEMP[7].x, TEMP[7].xxxx 98: MOV TEMP[9].z, TEMP[7].xxxx 99: MOV TEMP[2].xyz, TEMP[9].xyzx 100: ENDIF 101: ADD TEMP[7].xyz, IN[4].xyzz, -CONST[0].xyzz 102: DP3 TEMP[7].x, TEMP[7].xyzz, TEMP[7].xyzz 103: MUL TEMP[7].x, CONST[14].xxxx, TEMP[7].xxxx 104: LG2 TEMP[7].x, TEMP[7].xxxx 105: MUL TEMP[7].x, TEMP[7].xxxx, IMM[1].wwww 106: MUL TEMP[7].x, TEMP[7].xxxx, CONST[13].xxxx 107: MOV TEMP[9].xy, IN[4].xyxx 108: MOV TEMP[10].x, IMM[2].xxxx 109: FSNE TEMP[11].x, CONST[8].xxxx, TEMP[3].xxxx 110: UIF TEMP[11].xxxx :0 111: MOV TEMP[10].x, IMM[2].yyyy 112: RCP TEMP[11].x, CONST[11].xxxx 113: MUL TEMP[9].xy, IN[4].xyyy, TEMP[11].xxxx 114: ELSE :0 115: RCP TEMP[11].x, CONST[10].xxxx 116: MUL TEMP[9].xy, TEMP[9].xyyy, TEMP[11].xxxx 117: ENDIF 118: FRC TEMP[9].xy, TEMP[9].xyyy 119: MUL TEMP[11].x, CONST[12].xxxx, IMM[2].wwww 120: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[10].xxxx 121: ADD TEMP[11].x, IMM[2].zzzz, -TEMP[11].xxxx 122: MUL TEMP[10].x, TEMP[10].xxxx, CONST[12].xxxx 123: MAD TEMP[9].xy, TEMP[9].xyyy, TEMP[11].xxxx, TEMP[10].xxxx 124: MAD TEMP[9].xy, TEMP[9].xyyy, TEMP[3].xxxx, TEMP[6].xyyy 125: MOV TEMP[10].xy, TEMP[9].xyyy 126: MOV TEMP[10].w, TEMP[7].xxxx 127: TXL TEMP[10], TEMP[10], SAMP[4], 2D 128: FSEQ TEMP[11].x, TEMP[6].zzzz, IMM[1].zzzz 129: AND TEMP[11].x, TEMP[11].xxxx, IMM[2].zzzz 130: MOV TEMP[12].xy, TEMP[9].xyyy 131: MOV TEMP[12].w, TEMP[7].xxxx 132: TXL TEMP[12], TEMP[12], SAMP[3], 2D 133: FSEQ TEMP[13].x, TEMP[6].zzzz, IMM[3].xxxx 134: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz 135: MOV TEMP[14].xy, TEMP[9].xyyy 136: MOV TEMP[14].w, TEMP[7].xxxx 137: TXL TEMP[14], TEMP[14], SAMP[2], 2D 138: FSEQ TEMP[15].x, TEMP[6].zzzz, IMM[2].wwww 139: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz 140: MOV TEMP[16].xy, TEMP[9].xyyy 141: MOV TEMP[16].w, TEMP[7].xxxx 142: TXL TEMP[16], TEMP[16], SAMP[1], 2D 143: FSEQ TEMP[17].x, TEMP[6].zzzz, IMM[2].zzzz 144: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz 145: MOV TEMP[9].xy, TEMP[9].xyyy 146: MOV TEMP[9].w, TEMP[7].xxxx 147: TXL TEMP[9], TEMP[9], SAMP[0], 2D 148: FSEQ TEMP[18].x, TEMP[6].zzzz, IMM[0].zzzz 149: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz 150: MUL TEMP[9], TEMP[9], TEMP[18].xxxx 151: MAD TEMP[9], TEMP[16], TEMP[17].xxxx, TEMP[9] 152: MAD TEMP[9], TEMP[14], TEMP[15].xxxx, TEMP[9] 153: MAD TEMP[9], TEMP[12], TEMP[13].xxxx, TEMP[9] 154: MAD TEMP[9], TEMP[10], TEMP[11].xxxx, TEMP[9] 155: MOV TEMP[10].xy, IN[4].zyzz 156: MOV TEMP[11].x, IMM[2].xxxx 157: FSNE TEMP[12].x, CONST[8].xxxx, TEMP[3].xxxx 158: UIF TEMP[12].xxxx :0 159: MOV TEMP[11].x, IMM[2].yyyy 160: RCP TEMP[12].x, CONST[11].xxxx 161: MUL TEMP[10].xy, IN[4].zyyy, TEMP[12].xxxx 162: ELSE :0 163: RCP TEMP[12].x, CONST[10].xxxx 164: MUL TEMP[10].xy, TEMP[10].xyyy, TEMP[12].xxxx 165: ENDIF 166: FRC TEMP[10].xy, TEMP[10].xyyy 167: MUL TEMP[12].x, CONST[12].xxxx, IMM[2].wwww 168: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[11].xxxx 169: ADD TEMP[12].x, IMM[2].zzzz, -TEMP[12].xxxx 170: MUL TEMP[11].x, TEMP[11].xxxx, CONST[12].xxxx 171: MAD TEMP[10].xy, TEMP[10].xyyy, TEMP[12].xxxx, TEMP[11].xxxx 172: MAD TEMP[10].xy, TEMP[10].xyyy, TEMP[3].xxxx, TEMP[6].xyyy 173: MOV TEMP[11].xy, TEMP[10].xyyy 174: MOV TEMP[11].w, TEMP[7].xxxx 175: TXL TEMP[11], TEMP[11], SAMP[4], 2D 176: FSEQ TEMP[12].x, TEMP[6].zzzz, IMM[1].zzzz 177: AND TEMP[12].x, TEMP[12].xxxx, IMM[2].zzzz 178: MOV TEMP[13].xy, TEMP[10].xyyy 179: MOV TEMP[13].w, TEMP[7].xxxx 180: TXL TEMP[13], TEMP[13], SAMP[3], 2D 181: FSEQ TEMP[14].x, TEMP[6].zzzz, IMM[3].xxxx 182: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz 183: MOV TEMP[15].xy, TEMP[10].xyyy 184: MOV TEMP[15].w, TEMP[7].xxxx 185: TXL TEMP[15], TEMP[15], SAMP[2], 2D 186: FSEQ TEMP[16].x, TEMP[6].zzzz, IMM[2].wwww 187: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz 188: MOV TEMP[17].xy, TEMP[10].xyyy 189: MOV TEMP[17].w, TEMP[7].xxxx 190: TXL TEMP[17], TEMP[17], SAMP[1], 2D 191: FSEQ TEMP[18].x, TEMP[6].zzzz, IMM[2].zzzz 192: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz 193: MOV TEMP[10].xy, TEMP[10].xyyy 194: MOV TEMP[10].w, TEMP[7].xxxx 195: TXL TEMP[10], TEMP[10], SAMP[0], 2D 196: FSEQ TEMP[19].x, TEMP[6].zzzz, IMM[0].zzzz 197: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz 198: MUL TEMP[10], TEMP[10], TEMP[19].xxxx 199: MAD TEMP[10], TEMP[17], TEMP[18].xxxx, TEMP[10] 200: MAD TEMP[10], TEMP[15], TEMP[16].xxxx, TEMP[10] 201: MAD TEMP[10], TEMP[13], TEMP[14].xxxx, TEMP[10] 202: MAD TEMP[10], TEMP[11], TEMP[12].xxxx, TEMP[10] 203: MOV TEMP[11].xy, IN[4].zxzz 204: MOV TEMP[12].x, IMM[2].xxxx 205: FSNE TEMP[13].x, CONST[8].xxxx, TEMP[3].xxxx 206: UIF TEMP[13].xxxx :0 207: MOV TEMP[12].x, IMM[2].yyyy 208: RCP TEMP[13].x, CONST[11].xxxx 209: MUL TEMP[11].xy, IN[4].zxxx, TEMP[13].xxxx 210: ELSE :0 211: RCP TEMP[13].x, CONST[10].xxxx 212: MUL TEMP[11].xy, TEMP[11].xyyy, TEMP[13].xxxx 213: ENDIF 214: FRC TEMP[11].xy, TEMP[11].xyyy 215: MUL TEMP[13].x, CONST[12].xxxx, IMM[2].wwww 216: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[12].xxxx 217: ADD TEMP[13].x, IMM[2].zzzz, -TEMP[13].xxxx 218: MUL TEMP[12].x, TEMP[12].xxxx, CONST[12].xxxx 219: MAD TEMP[11].xy, TEMP[11].xyyy, TEMP[13].xxxx, TEMP[12].xxxx 220: MAD TEMP[3].xy, TEMP[11].xyyy, TEMP[3].xxxx, TEMP[6].xyyy 221: MOV TEMP[11].xy, TEMP[3].xyyy 222: MOV TEMP[11].w, TEMP[7].xxxx 223: TXL TEMP[11], TEMP[11], SAMP[4], 2D 224: FSEQ TEMP[12].x, TEMP[6].zzzz, IMM[1].zzzz 225: AND TEMP[12].x, TEMP[12].xxxx, IMM[2].zzzz 226: MOV TEMP[13].xy, TEMP[3].xyyy 227: MOV TEMP[13].w, TEMP[7].xxxx 228: TXL TEMP[13], TEMP[13], SAMP[3], 2D 229: FSEQ TEMP[14].x, TEMP[6].zzzz, IMM[3].xxxx 230: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz 231: MOV TEMP[15].xy, TEMP[3].xyyy 232: MOV TEMP[15].w, TEMP[7].xxxx 233: TXL TEMP[15], TEMP[15], SAMP[2], 2D 234: FSEQ TEMP[16].x, TEMP[6].zzzz, IMM[2].wwww 235: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz 236: MOV TEMP[17].xy, TEMP[3].xyyy 237: MOV TEMP[17].w, TEMP[7].xxxx 238: TXL TEMP[17], TEMP[17], SAMP[1], 2D 239: FSEQ TEMP[18].x, TEMP[6].zzzz, IMM[2].zzzz 240: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz 241: MOV TEMP[3].xy, TEMP[3].xyyy 242: MOV TEMP[3].w, TEMP[7].xxxx 243: TXL TEMP[3], TEMP[3], SAMP[0], 2D 244: FSEQ TEMP[6].x, TEMP[6].zzzz, IMM[0].zzzz 245: AND TEMP[6].x, TEMP[6].xxxx, IMM[2].zzzz 246: MUL TEMP[3], TEMP[3], TEMP[6].xxxx 247: MAD TEMP[3], TEMP[17], TEMP[18].xxxx, TEMP[3] 248: MAD TEMP[3], TEMP[15], TEMP[16].xxxx, TEMP[3] 249: MAD TEMP[3], TEMP[13], TEMP[14].xxxx, TEMP[3] 250: MAD TEMP[3], TEMP[11], TEMP[12].xxxx, TEMP[3] 251: MOV TEMP[6].xy, IN[4].xyxx 252: MOV TEMP[11].x, IMM[2].xxxx 253: FSNE TEMP[12].x, CONST[8].xxxx, TEMP[4].xxxx 254: UIF TEMP[12].xxxx :0 255: MOV TEMP[11].x, IMM[2].yyyy 256: RCP TEMP[12].x, CONST[11].xxxx 257: MUL TEMP[6].xy, IN[4].xyyy, TEMP[12].xxxx 258: ELSE :0 259: RCP TEMP[12].x, CONST[10].xxxx 260: MUL TEMP[6].xy, TEMP[6].xyyy, TEMP[12].xxxx 261: ENDIF 262: FRC TEMP[6].xy, TEMP[6].xyyy 263: MUL TEMP[12].x, CONST[12].xxxx, IMM[2].wwww 264: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[11].xxxx 265: ADD TEMP[12].x, IMM[2].zzzz, -TEMP[12].xxxx 266: MUL TEMP[11].x, TEMP[11].xxxx, CONST[12].xxxx 267: MAD TEMP[6].xy, TEMP[6].xyyy, TEMP[12].xxxx, TEMP[11].xxxx 268: MAD TEMP[6].xy, TEMP[6].xyyy, TEMP[4].xxxx, TEMP[8].xyyy 269: MOV TEMP[11].xy, TEMP[6].xyyy 270: MOV TEMP[11].w, TEMP[7].xxxx 271: TXL TEMP[11], TEMP[11], SAMP[4], 2D 272: FSEQ TEMP[12].x, TEMP[8].zzzz, IMM[1].zzzz 273: AND TEMP[12].x, TEMP[12].xxxx, IMM[2].zzzz 274: MOV TEMP[13].xy, TEMP[6].xyyy 275: MOV TEMP[13].w, TEMP[7].xxxx 276: TXL TEMP[13], TEMP[13], SAMP[3], 2D 277: FSEQ TEMP[14].x, TEMP[8].zzzz, IMM[3].xxxx 278: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz 279: MOV TEMP[15].xy, TEMP[6].xyyy 280: MOV TEMP[15].w, TEMP[7].xxxx 281: TXL TEMP[15], TEMP[15], SAMP[2], 2D 282: FSEQ TEMP[16].x, TEMP[8].zzzz, IMM[2].wwww 283: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz 284: MOV TEMP[17].xy, TEMP[6].xyyy 285: MOV TEMP[17].w, TEMP[7].xxxx 286: TXL TEMP[17], TEMP[17], SAMP[1], 2D 287: FSEQ TEMP[18].x, TEMP[8].zzzz, IMM[2].zzzz 288: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz 289: MOV TEMP[6].xy, TEMP[6].xyyy 290: MOV TEMP[6].w, TEMP[7].xxxx 291: TXL TEMP[6], TEMP[6], SAMP[0], 2D 292: FSEQ TEMP[19].x, TEMP[8].zzzz, IMM[0].zzzz 293: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz 294: MUL TEMP[6], TEMP[6], TEMP[19].xxxx 295: MAD TEMP[6], TEMP[17], TEMP[18].xxxx, TEMP[6] 296: MAD TEMP[6], TEMP[15], TEMP[16].xxxx, TEMP[6] 297: MAD TEMP[6], TEMP[13], TEMP[14].xxxx, TEMP[6] 298: MAD TEMP[6], TEMP[11], TEMP[12].xxxx, TEMP[6] 299: MOV TEMP[11].xy, IN[4].zyzz 300: MOV TEMP[12].x, IMM[2].xxxx 301: FSNE TEMP[13].x, CONST[8].xxxx, TEMP[4].xxxx 302: UIF TEMP[13].xxxx :0 303: MOV TEMP[12].x, IMM[2].yyyy 304: RCP TEMP[13].x, CONST[11].xxxx 305: MUL TEMP[11].xy, IN[4].zyyy, TEMP[13].xxxx 306: ELSE :0 307: RCP TEMP[13].x, CONST[10].xxxx 308: MUL TEMP[11].xy, TEMP[11].xyyy, TEMP[13].xxxx 309: ENDIF 310: FRC TEMP[11].xy, TEMP[11].xyyy 311: MUL TEMP[13].x, CONST[12].xxxx, IMM[2].wwww 312: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[12].xxxx 313: ADD TEMP[13].x, IMM[2].zzzz, -TEMP[13].xxxx 314: MUL TEMP[12].x, TEMP[12].xxxx, CONST[12].xxxx 315: MAD TEMP[11].xy, TEMP[11].xyyy, TEMP[13].xxxx, TEMP[12].xxxx 316: MAD TEMP[11].xy, TEMP[11].xyyy, TEMP[4].xxxx, TEMP[8].xyyy 317: MOV TEMP[12].xy, TEMP[11].xyyy 318: MOV TEMP[12].w, TEMP[7].xxxx 319: TXL TEMP[12], TEMP[12], SAMP[4], 2D 320: FSEQ TEMP[13].x, TEMP[8].zzzz, IMM[1].zzzz 321: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz 322: MOV TEMP[14].xy, TEMP[11].xyyy 323: MOV TEMP[14].w, TEMP[7].xxxx 324: TXL TEMP[14], TEMP[14], SAMP[3], 2D 325: FSEQ TEMP[15].x, TEMP[8].zzzz, IMM[3].xxxx 326: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz 327: MOV TEMP[16].xy, TEMP[11].xyyy 328: MOV TEMP[16].w, TEMP[7].xxxx 329: TXL TEMP[16], TEMP[16], SAMP[2], 2D 330: FSEQ TEMP[17].x, TEMP[8].zzzz, IMM[2].wwww 331: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz 332: MOV TEMP[18].xy, TEMP[11].xyyy 333: MOV TEMP[18].w, TEMP[7].xxxx 334: TXL TEMP[18], TEMP[18], SAMP[1], 2D 335: FSEQ TEMP[19].x, TEMP[8].zzzz, IMM[2].zzzz 336: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz 337: MOV TEMP[11].xy, TEMP[11].xyyy 338: MOV TEMP[11].w, TEMP[7].xxxx 339: TXL TEMP[11], TEMP[11], SAMP[0], 2D 340: FSEQ TEMP[20].x, TEMP[8].zzzz, IMM[0].zzzz 341: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz 342: MUL TEMP[11], TEMP[11], TEMP[20].xxxx 343: MAD TEMP[11], TEMP[18], TEMP[19].xxxx, TEMP[11] 344: MAD TEMP[11], TEMP[16], TEMP[17].xxxx, TEMP[11] 345: MAD TEMP[11], TEMP[14], TEMP[15].xxxx, TEMP[11] 346: MAD TEMP[11], TEMP[12], TEMP[13].xxxx, TEMP[11] 347: MOV TEMP[12].xy, IN[4].zxzz 348: MOV TEMP[13].x, IMM[2].xxxx 349: FSNE TEMP[14].x, CONST[8].xxxx, TEMP[4].xxxx 350: UIF TEMP[14].xxxx :0 351: MOV TEMP[13].x, IMM[2].yyyy 352: RCP TEMP[14].x, CONST[11].xxxx 353: MUL TEMP[12].xy, IN[4].zxxx, TEMP[14].xxxx 354: ELSE :0 355: RCP TEMP[14].x, CONST[10].xxxx 356: MUL TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx 357: ENDIF 358: FRC TEMP[12].xy, TEMP[12].xyyy 359: MUL TEMP[14].x, CONST[12].xxxx, IMM[2].wwww 360: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[13].xxxx 361: ADD TEMP[14].x, IMM[2].zzzz, -TEMP[14].xxxx 362: MUL TEMP[13].x, TEMP[13].xxxx, CONST[12].xxxx 363: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx, TEMP[13].xxxx 364: MAD TEMP[4].xy, TEMP[12].xyyy, TEMP[4].xxxx, TEMP[8].xyyy 365: MOV TEMP[12].xy, TEMP[4].xyyy 366: MOV TEMP[12].w, TEMP[7].xxxx 367: TXL TEMP[12], TEMP[12], SAMP[4], 2D 368: FSEQ TEMP[13].x, TEMP[8].zzzz, IMM[1].zzzz 369: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz 370: MOV TEMP[14].xy, TEMP[4].xyyy 371: MOV TEMP[14].w, TEMP[7].xxxx 372: TXL TEMP[14], TEMP[14], SAMP[3], 2D 373: FSEQ TEMP[15].x, TEMP[8].zzzz, IMM[3].xxxx 374: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz 375: MOV TEMP[16].xy, TEMP[4].xyyy 376: MOV TEMP[16].w, TEMP[7].xxxx 377: TXL TEMP[16], TEMP[16], SAMP[2], 2D 378: FSEQ TEMP[17].x, TEMP[8].zzzz, IMM[2].wwww 379: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz 380: MOV TEMP[18].xy, TEMP[4].xyyy 381: MOV TEMP[18].w, TEMP[7].xxxx 382: TXL TEMP[18], TEMP[18], SAMP[1], 2D 383: FSEQ TEMP[19].x, TEMP[8].zzzz, IMM[2].zzzz 384: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz 385: MOV TEMP[4].xy, TEMP[4].xyyy 386: MOV TEMP[4].w, TEMP[7].xxxx 387: TXL TEMP[4], TEMP[4], SAMP[0], 2D 388: FSEQ TEMP[8].x, TEMP[8].zzzz, IMM[0].zzzz 389: AND TEMP[8].x, TEMP[8].xxxx, IMM[2].zzzz 390: MUL TEMP[4], TEMP[4], TEMP[8].xxxx 391: MAD TEMP[4], TEMP[18], TEMP[19].xxxx, TEMP[4] 392: MAD TEMP[4], TEMP[16], TEMP[17].xxxx, TEMP[4] 393: MAD TEMP[4], TEMP[14], TEMP[15].xxxx, TEMP[4] 394: MAD TEMP[4], TEMP[12], TEMP[13].xxxx, TEMP[4] 395: MOV TEMP[8].xy, IN[4].xyxx 396: MOV TEMP[12].x, IMM[2].xxxx 397: FSNE TEMP[13].x, CONST[8].xxxx, TEMP[5].xxxx 398: UIF TEMP[13].xxxx :0 399: MOV TEMP[12].x, IMM[2].yyyy 400: RCP TEMP[13].x, CONST[11].xxxx 401: MUL TEMP[8].xy, IN[4].xyyy, TEMP[13].xxxx 402: ELSE :0 403: RCP TEMP[13].x, CONST[10].xxxx 404: MUL TEMP[8].xy, TEMP[8].xyyy, TEMP[13].xxxx 405: ENDIF 406: FRC TEMP[8].xy, TEMP[8].xyyy 407: MUL TEMP[13].x, CONST[12].xxxx, IMM[2].wwww 408: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[12].xxxx 409: ADD TEMP[13].x, IMM[2].zzzz, -TEMP[13].xxxx 410: MUL TEMP[12].x, TEMP[12].xxxx, CONST[12].xxxx 411: MAD TEMP[8].xy, TEMP[8].xyyy, TEMP[13].xxxx, TEMP[12].xxxx 412: MAD TEMP[8].xy, TEMP[8].xyyy, TEMP[5].xxxx, TEMP[2].xyyy 413: MOV TEMP[12].xy, TEMP[8].xyyy 414: MOV TEMP[12].w, TEMP[7].xxxx 415: TXL TEMP[12], TEMP[12], SAMP[4], 2D 416: FSEQ TEMP[13].x, TEMP[2].zzzz, IMM[1].zzzz 417: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz 418: MOV TEMP[14].xy, TEMP[8].xyyy 419: MOV TEMP[14].w, TEMP[7].xxxx 420: TXL TEMP[14], TEMP[14], SAMP[3], 2D 421: FSEQ TEMP[15].x, TEMP[2].zzzz, IMM[3].xxxx 422: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz 423: MOV TEMP[16].xy, TEMP[8].xyyy 424: MOV TEMP[16].w, TEMP[7].xxxx 425: TXL TEMP[16], TEMP[16], SAMP[2], 2D 426: FSEQ TEMP[17].x, TEMP[2].zzzz, IMM[2].wwww 427: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz 428: MOV TEMP[18].xy, TEMP[8].xyyy 429: MOV TEMP[18].w, TEMP[7].xxxx 430: TXL TEMP[18], TEMP[18], SAMP[1], 2D 431: FSEQ TEMP[19].x, TEMP[2].zzzz, IMM[2].zzzz 432: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz 433: MOV TEMP[8].xy, TEMP[8].xyyy 434: MOV TEMP[8].w, TEMP[7].xxxx 435: TXL TEMP[8], TEMP[8], SAMP[0], 2D 436: FSEQ TEMP[20].x, TEMP[2].zzzz, IMM[0].zzzz 437: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz 438: MUL TEMP[8], TEMP[8], TEMP[20].xxxx 439: MAD TEMP[8], TEMP[18], TEMP[19].xxxx, TEMP[8] 440: MAD TEMP[8], TEMP[16], TEMP[17].xxxx, TEMP[8] 441: MAD TEMP[8], TEMP[14], TEMP[15].xxxx, TEMP[8] 442: MAD TEMP[8], TEMP[12], TEMP[13].xxxx, TEMP[8] 443: MOV TEMP[12].xy, IN[4].zyzz 444: MOV TEMP[13].x, IMM[2].xxxx 445: FSNE TEMP[14].x, CONST[8].xxxx, TEMP[5].xxxx 446: UIF TEMP[14].xxxx :0 447: MOV TEMP[13].x, IMM[2].yyyy 448: RCP TEMP[14].x, CONST[11].xxxx 449: MUL TEMP[12].xy, IN[4].zyyy, TEMP[14].xxxx 450: ELSE :0 451: RCP TEMP[14].x, CONST[10].xxxx 452: MUL TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx 453: ENDIF 454: FRC TEMP[12].xy, TEMP[12].xyyy 455: MUL TEMP[14].x, CONST[12].xxxx, IMM[2].wwww 456: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[13].xxxx 457: ADD TEMP[14].x, IMM[2].zzzz, -TEMP[14].xxxx 458: MUL TEMP[13].x, TEMP[13].xxxx, CONST[12].xxxx 459: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx, TEMP[13].xxxx 460: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[5].xxxx, TEMP[2].xyyy 461: MOV TEMP[13].xy, TEMP[12].xyyy 462: MOV TEMP[13].w, TEMP[7].xxxx 463: TXL TEMP[13], TEMP[13], SAMP[4], 2D 464: FSEQ TEMP[14].x, TEMP[2].zzzz, IMM[1].zzzz 465: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz 466: MOV TEMP[15].xy, TEMP[12].xyyy 467: MOV TEMP[15].w, TEMP[7].xxxx 468: TXL TEMP[15], TEMP[15], SAMP[3], 2D 469: FSEQ TEMP[16].x, TEMP[2].zzzz, IMM[3].xxxx 470: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz 471: MOV TEMP[17].xy, TEMP[12].xyyy 472: MOV TEMP[17].w, TEMP[7].xxxx 473: TXL TEMP[17], TEMP[17], SAMP[2], 2D 474: FSEQ TEMP[18].x, TEMP[2].zzzz, IMM[2].wwww 475: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz 476: MOV TEMP[19].xy, TEMP[12].xyyy 477: MOV TEMP[19].w, TEMP[7].xxxx 478: TXL TEMP[19], TEMP[19], SAMP[1], 2D 479: FSEQ TEMP[20].x, TEMP[2].zzzz, IMM[2].zzzz 480: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz 481: MOV TEMP[12].xy, TEMP[12].xyyy 482: MOV TEMP[12].w, TEMP[7].xxxx 483: TXL TEMP[12], TEMP[12], SAMP[0], 2D 484: FSEQ TEMP[21].x, TEMP[2].zzzz, IMM[0].zzzz 485: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz 486: MUL TEMP[12], TEMP[12], TEMP[21].xxxx 487: MAD TEMP[12], TEMP[19], TEMP[20].xxxx, TEMP[12] 488: MAD TEMP[12], TEMP[17], TEMP[18].xxxx, TEMP[12] 489: MAD TEMP[12], TEMP[15], TEMP[16].xxxx, TEMP[12] 490: MAD TEMP[12], TEMP[13], TEMP[14].xxxx, TEMP[12] 491: MOV TEMP[13].xy, IN[4].zxzz 492: MOV TEMP[14].x, IMM[2].xxxx 493: FSNE TEMP[15].x, CONST[8].xxxx, TEMP[5].xxxx 494: UIF TEMP[15].xxxx :0 495: MOV TEMP[14].x, IMM[2].yyyy 496: RCP TEMP[15].x, CONST[11].xxxx 497: MUL TEMP[13].xy, IN[4].zxxx, TEMP[15].xxxx 498: ELSE :0 499: RCP TEMP[15].x, CONST[10].xxxx 500: MUL TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx 501: ENDIF 502: FRC TEMP[13].xy, TEMP[13].xyyy 503: MUL TEMP[15].x, CONST[12].xxxx, IMM[2].wwww 504: MUL TEMP[15].x, TEMP[15].xxxx, TEMP[14].xxxx 505: ADD TEMP[15].x, IMM[2].zzzz, -TEMP[15].xxxx 506: MUL TEMP[14].x, TEMP[14].xxxx, CONST[12].xxxx 507: MAD TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx, TEMP[14].xxxx 508: MAD TEMP[5].xy, TEMP[13].xyyy, TEMP[5].xxxx, TEMP[2].xyyy 509: MOV TEMP[13].xy, TEMP[5].xyyy 510: MOV TEMP[13].w, TEMP[7].xxxx 511: TXL TEMP[13], TEMP[13], SAMP[4], 2D 512: FSEQ TEMP[14].x, TEMP[2].zzzz, IMM[1].zzzz 513: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz 514: MOV TEMP[15].xy, TEMP[5].xyyy 515: MOV TEMP[15].w, TEMP[7].xxxx 516: TXL TEMP[15], TEMP[15], SAMP[3], 2D 517: FSEQ TEMP[16].x, TEMP[2].zzzz, IMM[3].xxxx 518: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz 519: MOV TEMP[17].xy, TEMP[5].xyyy 520: MOV TEMP[17].w, TEMP[7].xxxx 521: TXL TEMP[17], TEMP[17], SAMP[2], 2D 522: FSEQ TEMP[18].x, TEMP[2].zzzz, IMM[2].wwww 523: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz 524: MOV TEMP[19].xy, TEMP[5].xyyy 525: MOV TEMP[19].w, TEMP[7].xxxx 526: TXL TEMP[19], TEMP[19], SAMP[1], 2D 527: FSEQ TEMP[20].x, TEMP[2].zzzz, IMM[2].zzzz 528: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz 529: MOV TEMP[5].xy, TEMP[5].xyyy 530: MOV TEMP[5].w, TEMP[7].xxxx 531: TXL TEMP[5], TEMP[5], SAMP[0], 2D 532: FSEQ TEMP[2].x, TEMP[2].zzzz, IMM[0].zzzz 533: AND TEMP[2].x, TEMP[2].xxxx, IMM[2].zzzz 534: MUL TEMP[2], TEMP[5], TEMP[2].xxxx 535: MAD TEMP[2], TEMP[19], TEMP[20].xxxx, TEMP[2] 536: MAD TEMP[2], TEMP[17], TEMP[18].xxxx, TEMP[2] 537: MAD TEMP[2], TEMP[15], TEMP[16].xxxx, TEMP[2] 538: MAD TEMP[2], TEMP[13], TEMP[14].xxxx, TEMP[2] 539: MUL TEMP[5], TEMP[8], TEMP[1].zzzz 540: MAD TEMP[5], TEMP[12], TEMP[1].xxxx, TEMP[5] 541: MAD TEMP[2], TEMP[2], TEMP[1].yyyy, TEMP[5] 542: MUL TEMP[5], TEMP[6], TEMP[1].zzzz 543: MAD TEMP[5], TEMP[11], TEMP[1].xxxx, TEMP[5] 544: MAD TEMP[4], TEMP[4], TEMP[1].yyyy, TEMP[5] 545: MUL TEMP[5], TEMP[9], TEMP[1].zzzz 546: MAD TEMP[5], TEMP[10], TEMP[1].xxxx, TEMP[5] 547: MAD TEMP[1], TEMP[3], TEMP[1].yyyy, TEMP[5] 548: MUL TEMP[1], IN[1].xxxx, TEMP[1] 549: MAD TEMP[1], IN[1].yyyy, TEMP[4], TEMP[1] 550: MAD TEMP[1].xyz, IN[1].zzzz, TEMP[2], TEMP[1] 551: MOV TEMP[2].xy, IN[2].xyyy 552: MOV TEMP[2].w, IN[2].wwww 553: TXP TEMP[2], TEMP[2], SAMP[5], 2D 554: LG2 TEMP[3].x, TEMP[2].xxxx 555: LG2 TEMP[3].y, TEMP[2].yyyy 556: LG2 TEMP[3].z, TEMP[2].zzzz 557: LG2 TEMP[3].w, TEMP[2].wwww 558: MOV TEMP[2], -TEMP[3] 559: ADD TEMP[0].xyz, TEMP[2].xyzz, TEMP[0].xyzz 560: MUL TEMP[3].xyz, TEMP[0].xyzz, IMM[3].yyyy 561: MUL TEMP[4].xyz, CONST[17], IMM[2].wwww 562: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[3].xyzz 563: MUL TEMP[3].xyz, TEMP[4].xyzz, TEMP[3].xyzz 564: MAX TEMP[3].xyz, TEMP[3].xyzz, CONST[15].xyzz 565: MIN TEMP[3].xyz, TEMP[3].xyzz, IMM[2].zzzz 566: MOV_SAT TEMP[2].x, TEMP[2].wwww 567: MUL TEMP[2].x, TEMP[2].xxxx, IN[1].wwww 568: MUL TEMP[4].xyz, TEMP[0].xyzz, IMM[0].wwww 569: MUL TEMP[4].xyz, TEMP[1].xyzz, TEMP[4].xyzz 570: MAD TEMP[2].xyz, TEMP[0].xyzz, TEMP[2].xxxx, TEMP[4].xyzz 571: MAD TEMP[0].xyz, TEMP[3].xyzz, TEMP[1].xyzz, TEMP[2].xyzz 572: MAD TEMP[1].x, IN[3].xxxx, CONST[2].zzzz, CONST[2].wwww 573: MOV_SAT TEMP[1].x, TEMP[1].xxxx 574: LRP TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[1].xyzz 575: MOV TEMP[0].w, IMM[2].zzzz 576: MOV OUT[0], TEMP[0] 577: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 244) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 248) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 272) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 276) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 280) %45 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %46 = load <8 x i32>, <8 x i32> addrspace(2)* %45, align 32, !tbaa !0 %47 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %48 = load <4 x i32>, <4 x i32> addrspace(2)* %47, align 16, !tbaa !0 %49 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %50 = load <8 x i32>, <8 x i32> addrspace(2)* %49, align 32, !tbaa !0 %51 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %52 = load <4 x i32>, <4 x i32> addrspace(2)* %51, align 16, !tbaa !0 %53 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %54 = load <8 x i32>, <8 x i32> addrspace(2)* %53, align 32, !tbaa !0 %55 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %56 = load <4 x i32>, <4 x i32> addrspace(2)* %55, align 16, !tbaa !0 %57 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %58 = load <8 x i32>, <8 x i32> addrspace(2)* %57, align 32, !tbaa !0 %59 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %60 = load <4 x i32>, <4 x i32> addrspace(2)* %59, align 16, !tbaa !0 %61 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %62 = load <8 x i32>, <8 x i32> addrspace(2)* %61, align 32, !tbaa !0 %63 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %64 = load <4 x i32>, <4 x i32> addrspace(2)* %63, align 16, !tbaa !0 %65 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5 %66 = bitcast <8 x i32> addrspace(2)* %65 to <32 x i8> addrspace(2)* %67 = load <32 x i8>, <32 x i8> addrspace(2)* %66, align 32, !tbaa !0 %68 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5 %69 = bitcast <4 x i32> addrspace(2)* %68 to <16 x i8> addrspace(2)* %70 = load <16 x i8>, <16 x i8> addrspace(2)* %69, align 16, !tbaa !0 %71 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %72 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %73 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %74 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %75 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %76 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %77 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %78 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %79 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %80 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %81 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %82 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %83 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %84 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7) %85 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %86 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %87 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %88 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7) %89 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7) %90 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %91 = call float @llvm.fabs.f32(float %82) %92 = call float @llvm.fabs.f32(float %83) %93 = call float @llvm.fabs.f32(float %84) %94 = fmul float %91, %91 %95 = fmul float %92, %92 %96 = fadd float %95, %94 %97 = fmul float %93, %93 %98 = fadd float %96, %97 %99 = call float @llvm.AMDGPU.rsq.clamped.f32(float %98) %100 = fmul float %91, %99 %101 = fadd float %100, 0xBFC99999A0000000 %102 = fmul float %92, %99 %103 = fadd float %102, 0xBFC99999A0000000 %104 = fmul float %93, %99 %105 = fadd float %104, 0xBFC99999A0000000 %106 = fmul float %101, 7.000000e+00 %107 = fmul float %103, 7.000000e+00 %108 = fmul float %105, 7.000000e+00 %109 = call float @llvm.maxnum.f32(float %106, float 0.000000e+00) %110 = call float @llvm.maxnum.f32(float %107, float 0.000000e+00) %111 = call float @llvm.maxnum.f32(float %108, float 0.000000e+00) %112 = fadd float %109, %110 %113 = fadd float %112, %111 %114 = fdiv float 1.000000e+00, %113 %115 = fmul float %109, %114 %116 = fmul float %110, %114 %117 = fmul float %111, %114 %118 = fadd float %71, 5.000000e-01 %119 = fadd float %72, 5.000000e-01 %120 = fadd float %73, 5.000000e-01 %121 = call float @llvm.floor.f32(float %118) %122 = call float @llvm.floor.f32(float %119) %123 = call float @llvm.floor.f32(float %120) %124 = fmul float %121, %32 %125 = call float @llvm.floor.f32(float %124) %126 = fmul float %125, %32 %127 = fcmp ult float %121, 6.400000e+01 br i1 %127, label %ELSE, label %IF IF: ; preds = %main_body %128 = fadd float %121, -6.400000e+01 %129 = fmul float %128, %33 %130 = call float @llvm.floor.f32(float %129) %131 = fmul float %130, %33 %132 = call float @llvm.floor.f32(float %129) %133 = fsub float %129, %132 %134 = call float @llvm.floor.f32(float %131) %135 = fsub float %131, %134 %136 = call float @llvm.floor.f32(float %131) %137 = fadd float %136, 4.000000e+00 br label %ENDIF ELSE: ; preds = %main_body %138 = call float @llvm.floor.f32(float %124) %139 = fsub float %124, %138 %140 = call float @llvm.floor.f32(float %126) %141 = fsub float %126, %140 %142 = call float @llvm.floor.f32(float %126) br label %ENDIF ENDIF: ; preds = %ELSE, %IF %temp12.0 = phi float [ %33, %IF ], [ %32, %ELSE ] %temp24.0 = phi float [ %133, %IF ], [ %139, %ELSE ] %temp25.0 = phi float [ %135, %IF ], [ %141, %ELSE ] %temp26.0 = phi float [ %137, %IF ], [ %142, %ELSE ] %143 = fmul float %122, %32 %144 = call float @llvm.floor.f32(float %143) %145 = fmul float %144, %32 %146 = fcmp ult float %122, 6.400000e+01 br i1 %146, label %ELSE90, label %IF89 IF89: ; preds = %ENDIF %147 = fadd float %122, -6.400000e+01 %148 = fmul float %147, %33 %149 = call float @llvm.floor.f32(float %148) %150 = fmul float %149, %33 %151 = call float @llvm.floor.f32(float %148) %152 = fsub float %148, %151 %153 = call float @llvm.floor.f32(float %150) %154 = fsub float %150, %153 %155 = call float @llvm.floor.f32(float %150) %156 = fadd float %155, 4.000000e+00 br label %ENDIF88 ELSE90: ; preds = %ENDIF %157 = call float @llvm.floor.f32(float %143) %158 = fsub float %143, %157 %159 = call float @llvm.floor.f32(float %145) %160 = fsub float %145, %159 %161 = call float @llvm.floor.f32(float %145) br label %ENDIF88 ENDIF88: ; preds = %ELSE90, %IF89 %temp16.0 = phi float [ %33, %IF89 ], [ %32, %ELSE90 ] %temp32.0 = phi float [ %152, %IF89 ], [ %158, %ELSE90 ] %temp33.0 = phi float [ %154, %IF89 ], [ %160, %ELSE90 ] %temp34.0 = phi float [ %156, %IF89 ], [ %161, %ELSE90 ] %162 = fmul float %123, %32 %163 = call float @llvm.floor.f32(float %162) %164 = fmul float %163, %32 %165 = fcmp ult float %123, 6.400000e+01 br i1 %165, label %ELSE93, label %IF92 IF92: ; preds = %ENDIF88 %166 = fadd float %123, -6.400000e+01 %167 = fmul float %166, %33 %168 = call float @llvm.floor.f32(float %167) %169 = fmul float %168, %33 %170 = call float @llvm.floor.f32(float %167) %171 = fsub float %167, %170 %172 = call float @llvm.floor.f32(float %169) %173 = fsub float %169, %172 %174 = call float @llvm.floor.f32(float %169) %175 = fadd float %174, 4.000000e+00 br label %ENDIF91 ELSE93: ; preds = %ENDIF88 %176 = call float @llvm.floor.f32(float %162) %177 = fsub float %162, %176 %178 = call float @llvm.floor.f32(float %164) %179 = fsub float %164, %178 %180 = call float @llvm.floor.f32(float %164) br label %ENDIF91 ENDIF91: ; preds = %ELSE93, %IF92 %temp8.0 = phi float [ %171, %IF92 ], [ %177, %ELSE93 ] %temp9.0 = phi float [ %173, %IF92 ], [ %179, %ELSE93 ] %temp10.0 = phi float [ %175, %IF92 ], [ %180, %ELSE93 ] %temp20.0 = phi float [ %33, %IF92 ], [ %32, %ELSE93 ] %181 = fsub float %85, %24 %182 = fsub float %86, %25 %183 = fsub float %87, %26 %184 = fmul float %181, %181 %185 = fmul float %182, %182 %186 = fadd float %185, %184 %187 = fmul float %183, %183 %188 = fadd float %186, %187 %189 = fmul float %38, %188 %190 = call float @llvm.log2.f32(float %189) %191 = fmul float %190, 0x3FE62E4300000000 %192 = fmul float %191, %37 %193 = fcmp une float %32, %temp12.0 %.sink121 = select i1 %193, float %35, float %34 %temp40.0 = select i1 %193, float 1.953125e-03, float 3.906250e-03 %194 = fdiv float 1.000000e+00, %.sink121 %195 = fmul float %85, %194 %196 = fmul float %86, %194 %197 = call float @llvm.floor.f32(float %195) %198 = fsub float %195, %197 %199 = call float @llvm.floor.f32(float %196) %200 = fsub float %196, %199 %201 = fmul float %36, 2.000000e+00 %202 = fmul float %201, %temp40.0 %203 = fsub float 1.000000e+00, %202 %204 = fmul float %temp40.0, %36 %205 = fmul float %198, %203 %206 = fadd float %205, %204 %207 = fmul float %200, %203 %208 = fadd float %207, %204 %209 = fmul float %206, %temp12.0 %210 = fadd float %209, %temp24.0 %211 = fmul float %208, %temp12.0 %212 = fadd float %211, %temp25.0 %213 = bitcast float %210 to i32 %214 = bitcast float %212 to i32 %215 = bitcast float %192 to i32 %216 = insertelement <4 x i32> undef, i32 %213, i32 0 %217 = insertelement <4 x i32> %216, i32 %214, i32 1 %218 = insertelement <4 x i32> %217, i32 %215, i32 2 %219 = bitcast <8 x i32> %62 to <32 x i8> %220 = bitcast <4 x i32> %64 to <16 x i8> %221 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %218, <32 x i8> %219, <16 x i8> %220, i32 2) %222 = extractelement <4 x float> %221, i32 0 %223 = extractelement <4 x float> %221, i32 1 %224 = extractelement <4 x float> %221, i32 2 %225 = fcmp oeq float %temp26.0, 4.000000e+00 %226 = select i1 %225, float 1.000000e+00, float 0.000000e+00 %227 = bitcast float %210 to i32 %228 = bitcast float %212 to i32 %229 = bitcast float %192 to i32 %230 = insertelement <4 x i32> undef, i32 %227, i32 0 %231 = insertelement <4 x i32> %230, i32 %228, i32 1 %232 = insertelement <4 x i32> %231, i32 %229, i32 2 %233 = bitcast <8 x i32> %58 to <32 x i8> %234 = bitcast <4 x i32> %60 to <16 x i8> %235 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %232, <32 x i8> %233, <16 x i8> %234, i32 2) %236 = extractelement <4 x float> %235, i32 0 %237 = extractelement <4 x float> %235, i32 1 %238 = extractelement <4 x float> %235, i32 2 %239 = fcmp oeq float %temp26.0, 3.000000e+00 %240 = select i1 %239, float 1.000000e+00, float 0.000000e+00 %241 = bitcast float %210 to i32 %242 = bitcast float %212 to i32 %243 = bitcast float %192 to i32 %244 = insertelement <4 x i32> undef, i32 %241, i32 0 %245 = insertelement <4 x i32> %244, i32 %242, i32 1 %246 = insertelement <4 x i32> %245, i32 %243, i32 2 %247 = bitcast <8 x i32> %54 to <32 x i8> %248 = bitcast <4 x i32> %56 to <16 x i8> %249 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %246, <32 x i8> %247, <16 x i8> %248, i32 2) %250 = extractelement <4 x float> %249, i32 0 %251 = extractelement <4 x float> %249, i32 1 %252 = extractelement <4 x float> %249, i32 2 %253 = fcmp oeq float %temp26.0, 2.000000e+00 %254 = select i1 %253, float 1.000000e+00, float 0.000000e+00 %255 = bitcast float %210 to i32 %256 = bitcast float %212 to i32 %257 = bitcast float %192 to i32 %258 = insertelement <4 x i32> undef, i32 %255, i32 0 %259 = insertelement <4 x i32> %258, i32 %256, i32 1 %260 = insertelement <4 x i32> %259, i32 %257, i32 2 %261 = bitcast <8 x i32> %50 to <32 x i8> %262 = bitcast <4 x i32> %52 to <16 x i8> %263 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %260, <32 x i8> %261, <16 x i8> %262, i32 2) %264 = extractelement <4 x float> %263, i32 0 %265 = extractelement <4 x float> %263, i32 1 %266 = extractelement <4 x float> %263, i32 2 %267 = fcmp oeq float %temp26.0, 1.000000e+00 %268 = select i1 %267, float 1.000000e+00, float 0.000000e+00 %269 = bitcast float %210 to i32 %270 = bitcast float %212 to i32 %271 = bitcast float %192 to i32 %272 = insertelement <4 x i32> undef, i32 %269, i32 0 %273 = insertelement <4 x i32> %272, i32 %270, i32 1 %274 = insertelement <4 x i32> %273, i32 %271, i32 2 %275 = bitcast <8 x i32> %46 to <32 x i8> %276 = bitcast <4 x i32> %48 to <16 x i8> %277 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %274, <32 x i8> %275, <16 x i8> %276, i32 2) %278 = extractelement <4 x float> %277, i32 0 %279 = extractelement <4 x float> %277, i32 1 %280 = extractelement <4 x float> %277, i32 2 %281 = fcmp oeq float %temp26.0, 0.000000e+00 %282 = select i1 %281, float 1.000000e+00, float 0.000000e+00 %283 = fmul float %278, %282 %284 = fmul float %279, %282 %285 = fmul float %280, %282 %286 = fmul float %264, %268 %287 = fadd float %286, %283 %288 = fmul float %265, %268 %289 = fadd float %288, %284 %290 = fmul float %266, %268 %291 = fadd float %290, %285 %292 = fmul float %250, %254 %293 = fadd float %292, %287 %294 = fmul float %251, %254 %295 = fadd float %294, %289 %296 = fmul float %252, %254 %297 = fadd float %296, %291 %298 = fmul float %236, %240 %299 = fadd float %298, %293 %300 = fmul float %237, %240 %301 = fadd float %300, %295 %302 = fmul float %238, %240 %303 = fadd float %302, %297 %304 = fmul float %222, %226 %305 = fadd float %304, %299 %306 = fmul float %223, %226 %307 = fadd float %306, %301 %308 = fmul float %224, %226 %309 = fadd float %308, %303 %310 = fcmp une float %32, %temp12.0 %.sink122 = select i1 %310, float %35, float %34 %temp44.0 = select i1 %310, float 1.953125e-03, float 3.906250e-03 %311 = fdiv float 1.000000e+00, %.sink122 %312 = fmul float %87, %311 %313 = fmul float %86, %311 %314 = call float @llvm.floor.f32(float %312) %315 = fsub float %312, %314 %316 = call float @llvm.floor.f32(float %313) %317 = fsub float %313, %316 %318 = fmul float %36, 2.000000e+00 %319 = fmul float %318, %temp44.0 %320 = fsub float 1.000000e+00, %319 %321 = fmul float %temp44.0, %36 %322 = fmul float %315, %320 %323 = fadd float %322, %321 %324 = fmul float %317, %320 %325 = fadd float %324, %321 %326 = fmul float %323, %temp12.0 %327 = fadd float %326, %temp24.0 %328 = fmul float %325, %temp12.0 %329 = fadd float %328, %temp25.0 %330 = bitcast float %327 to i32 %331 = bitcast float %329 to i32 %332 = bitcast float %192 to i32 %333 = insertelement <4 x i32> undef, i32 %330, i32 0 %334 = insertelement <4 x i32> %333, i32 %331, i32 1 %335 = insertelement <4 x i32> %334, i32 %332, i32 2 %336 = bitcast <8 x i32> %62 to <32 x i8> %337 = bitcast <4 x i32> %64 to <16 x i8> %338 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %335, <32 x i8> %336, <16 x i8> %337, i32 2) %339 = extractelement <4 x float> %338, i32 0 %340 = extractelement <4 x float> %338, i32 1 %341 = extractelement <4 x float> %338, i32 2 %342 = fcmp oeq float %temp26.0, 4.000000e+00 %343 = select i1 %342, float 1.000000e+00, float 0.000000e+00 %344 = bitcast float %327 to i32 %345 = bitcast float %329 to i32 %346 = bitcast float %192 to i32 %347 = insertelement <4 x i32> undef, i32 %344, i32 0 %348 = insertelement <4 x i32> %347, i32 %345, i32 1 %349 = insertelement <4 x i32> %348, i32 %346, i32 2 %350 = bitcast <8 x i32> %58 to <32 x i8> %351 = bitcast <4 x i32> %60 to <16 x i8> %352 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %349, <32 x i8> %350, <16 x i8> %351, i32 2) %353 = extractelement <4 x float> %352, i32 0 %354 = extractelement <4 x float> %352, i32 1 %355 = extractelement <4 x float> %352, i32 2 %356 = fcmp oeq float %temp26.0, 3.000000e+00 %357 = select i1 %356, float 1.000000e+00, float 0.000000e+00 %358 = bitcast float %327 to i32 %359 = bitcast float %329 to i32 %360 = bitcast float %192 to i32 %361 = insertelement <4 x i32> undef, i32 %358, i32 0 %362 = insertelement <4 x i32> %361, i32 %359, i32 1 %363 = insertelement <4 x i32> %362, i32 %360, i32 2 %364 = bitcast <8 x i32> %54 to <32 x i8> %365 = bitcast <4 x i32> %56 to <16 x i8> %366 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %363, <32 x i8> %364, <16 x i8> %365, i32 2) %367 = extractelement <4 x float> %366, i32 0 %368 = extractelement <4 x float> %366, i32 1 %369 = extractelement <4 x float> %366, i32 2 %370 = fcmp oeq float %temp26.0, 2.000000e+00 %371 = select i1 %370, float 1.000000e+00, float 0.000000e+00 %372 = bitcast float %327 to i32 %373 = bitcast float %329 to i32 %374 = bitcast float %192 to i32 %375 = insertelement <4 x i32> undef, i32 %372, i32 0 %376 = insertelement <4 x i32> %375, i32 %373, i32 1 %377 = insertelement <4 x i32> %376, i32 %374, i32 2 %378 = bitcast <8 x i32> %50 to <32 x i8> %379 = bitcast <4 x i32> %52 to <16 x i8> %380 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %377, <32 x i8> %378, <16 x i8> %379, i32 2) %381 = extractelement <4 x float> %380, i32 0 %382 = extractelement <4 x float> %380, i32 1 %383 = extractelement <4 x float> %380, i32 2 %384 = fcmp oeq float %temp26.0, 1.000000e+00 %385 = select i1 %384, float 1.000000e+00, float 0.000000e+00 %386 = bitcast float %327 to i32 %387 = bitcast float %329 to i32 %388 = bitcast float %192 to i32 %389 = insertelement <4 x i32> undef, i32 %386, i32 0 %390 = insertelement <4 x i32> %389, i32 %387, i32 1 %391 = insertelement <4 x i32> %390, i32 %388, i32 2 %392 = bitcast <8 x i32> %46 to <32 x i8> %393 = bitcast <4 x i32> %48 to <16 x i8> %394 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %391, <32 x i8> %392, <16 x i8> %393, i32 2) %395 = extractelement <4 x float> %394, i32 0 %396 = extractelement <4 x float> %394, i32 1 %397 = extractelement <4 x float> %394, i32 2 %398 = fcmp oeq float %temp26.0, 0.000000e+00 %399 = select i1 %398, float 1.000000e+00, float 0.000000e+00 %400 = fmul float %395, %399 %401 = fmul float %396, %399 %402 = fmul float %397, %399 %403 = fmul float %381, %385 %404 = fadd float %403, %400 %405 = fmul float %382, %385 %406 = fadd float %405, %401 %407 = fmul float %383, %385 %408 = fadd float %407, %402 %409 = fmul float %367, %371 %410 = fadd float %409, %404 %411 = fmul float %368, %371 %412 = fadd float %411, %406 %413 = fmul float %369, %371 %414 = fadd float %413, %408 %415 = fmul float %353, %357 %416 = fadd float %415, %410 %417 = fmul float %354, %357 %418 = fadd float %417, %412 %419 = fmul float %355, %357 %420 = fadd float %419, %414 %421 = fmul float %339, %343 %422 = fadd float %421, %416 %423 = fmul float %340, %343 %424 = fadd float %423, %418 %425 = fmul float %341, %343 %426 = fadd float %425, %420 %427 = fcmp une float %32, %temp12.0 %.sink123 = select i1 %427, float %35, float %34 %temp48.0 = select i1 %427, float 1.953125e-03, float 3.906250e-03 %428 = fdiv float 1.000000e+00, %.sink123 %429 = fmul float %87, %428 %430 = fmul float %85, %428 %431 = call float @llvm.floor.f32(float %429) %432 = fsub float %429, %431 %433 = call float @llvm.floor.f32(float %430) %434 = fsub float %430, %433 %435 = fmul float %36, 2.000000e+00 %436 = fmul float %435, %temp48.0 %437 = fsub float 1.000000e+00, %436 %438 = fmul float %temp48.0, %36 %439 = fmul float %432, %437 %440 = fadd float %439, %438 %441 = fmul float %434, %437 %442 = fadd float %441, %438 %443 = fmul float %440, %temp12.0 %444 = fadd float %443, %temp24.0 %445 = fmul float %442, %temp12.0 %446 = fadd float %445, %temp25.0 %447 = bitcast float %444 to i32 %448 = bitcast float %446 to i32 %449 = bitcast float %192 to i32 %450 = insertelement <4 x i32> undef, i32 %447, i32 0 %451 = insertelement <4 x i32> %450, i32 %448, i32 1 %452 = insertelement <4 x i32> %451, i32 %449, i32 2 %453 = bitcast <8 x i32> %62 to <32 x i8> %454 = bitcast <4 x i32> %64 to <16 x i8> %455 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %452, <32 x i8> %453, <16 x i8> %454, i32 2) %456 = extractelement <4 x float> %455, i32 0 %457 = extractelement <4 x float> %455, i32 1 %458 = extractelement <4 x float> %455, i32 2 %459 = fcmp oeq float %temp26.0, 4.000000e+00 %460 = select i1 %459, float 1.000000e+00, float 0.000000e+00 %461 = bitcast float %444 to i32 %462 = bitcast float %446 to i32 %463 = bitcast float %192 to i32 %464 = insertelement <4 x i32> undef, i32 %461, i32 0 %465 = insertelement <4 x i32> %464, i32 %462, i32 1 %466 = insertelement <4 x i32> %465, i32 %463, i32 2 %467 = bitcast <8 x i32> %58 to <32 x i8> %468 = bitcast <4 x i32> %60 to <16 x i8> %469 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %466, <32 x i8> %467, <16 x i8> %468, i32 2) %470 = extractelement <4 x float> %469, i32 0 %471 = extractelement <4 x float> %469, i32 1 %472 = extractelement <4 x float> %469, i32 2 %473 = fcmp oeq float %temp26.0, 3.000000e+00 %474 = select i1 %473, float 1.000000e+00, float 0.000000e+00 %475 = bitcast float %444 to i32 %476 = bitcast float %446 to i32 %477 = bitcast float %192 to i32 %478 = insertelement <4 x i32> undef, i32 %475, i32 0 %479 = insertelement <4 x i32> %478, i32 %476, i32 1 %480 = insertelement <4 x i32> %479, i32 %477, i32 2 %481 = bitcast <8 x i32> %54 to <32 x i8> %482 = bitcast <4 x i32> %56 to <16 x i8> %483 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %480, <32 x i8> %481, <16 x i8> %482, i32 2) %484 = extractelement <4 x float> %483, i32 0 %485 = extractelement <4 x float> %483, i32 1 %486 = extractelement <4 x float> %483, i32 2 %487 = fcmp oeq float %temp26.0, 2.000000e+00 %488 = select i1 %487, float 1.000000e+00, float 0.000000e+00 %489 = bitcast float %444 to i32 %490 = bitcast float %446 to i32 %491 = bitcast float %192 to i32 %492 = insertelement <4 x i32> undef, i32 %489, i32 0 %493 = insertelement <4 x i32> %492, i32 %490, i32 1 %494 = insertelement <4 x i32> %493, i32 %491, i32 2 %495 = bitcast <8 x i32> %50 to <32 x i8> %496 = bitcast <4 x i32> %52 to <16 x i8> %497 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %494, <32 x i8> %495, <16 x i8> %496, i32 2) %498 = extractelement <4 x float> %497, i32 0 %499 = extractelement <4 x float> %497, i32 1 %500 = extractelement <4 x float> %497, i32 2 %501 = fcmp oeq float %temp26.0, 1.000000e+00 %502 = select i1 %501, float 1.000000e+00, float 0.000000e+00 %503 = bitcast float %444 to i32 %504 = bitcast float %446 to i32 %505 = bitcast float %192 to i32 %506 = insertelement <4 x i32> undef, i32 %503, i32 0 %507 = insertelement <4 x i32> %506, i32 %504, i32 1 %508 = insertelement <4 x i32> %507, i32 %505, i32 2 %509 = bitcast <8 x i32> %46 to <32 x i8> %510 = bitcast <4 x i32> %48 to <16 x i8> %511 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %508, <32 x i8> %509, <16 x i8> %510, i32 2) %512 = extractelement <4 x float> %511, i32 0 %513 = extractelement <4 x float> %511, i32 1 %514 = extractelement <4 x float> %511, i32 2 %515 = fcmp oeq float %temp26.0, 0.000000e+00 %516 = select i1 %515, float 1.000000e+00, float 0.000000e+00 %517 = fmul float %512, %516 %518 = fmul float %513, %516 %519 = fmul float %514, %516 %520 = fmul float %498, %502 %521 = fadd float %520, %517 %522 = fmul float %499, %502 %523 = fadd float %522, %518 %524 = fmul float %500, %502 %525 = fadd float %524, %519 %526 = fmul float %484, %488 %527 = fadd float %526, %521 %528 = fmul float %485, %488 %529 = fadd float %528, %523 %530 = fmul float %486, %488 %531 = fadd float %530, %525 %532 = fmul float %470, %474 %533 = fadd float %532, %527 %534 = fmul float %471, %474 %535 = fadd float %534, %529 %536 = fmul float %472, %474 %537 = fadd float %536, %531 %538 = fmul float %456, %460 %539 = fadd float %538, %533 %540 = fmul float %457, %460 %541 = fadd float %540, %535 %542 = fmul float %458, %460 %543 = fadd float %542, %537 %544 = fcmp une float %32, %temp16.0 %.sink124 = select i1 %544, float %35, float %34 %temp44.2 = select i1 %544, float 1.953125e-03, float 3.906250e-03 %545 = fdiv float 1.000000e+00, %.sink124 %546 = fmul float %85, %545 %547 = fmul float %86, %545 %548 = call float @llvm.floor.f32(float %546) %549 = fsub float %546, %548 %550 = call float @llvm.floor.f32(float %547) %551 = fsub float %547, %550 %552 = fmul float %36, 2.000000e+00 %553 = fmul float %552, %temp44.2 %554 = fsub float 1.000000e+00, %553 %555 = fmul float %temp44.2, %36 %556 = fmul float %549, %554 %557 = fadd float %556, %555 %558 = fmul float %551, %554 %559 = fadd float %558, %555 %560 = fmul float %557, %temp16.0 %561 = fadd float %560, %temp32.0 %562 = fmul float %559, %temp16.0 %563 = fadd float %562, %temp33.0 %564 = bitcast float %561 to i32 %565 = bitcast float %563 to i32 %566 = bitcast float %192 to i32 %567 = insertelement <4 x i32> undef, i32 %564, i32 0 %568 = insertelement <4 x i32> %567, i32 %565, i32 1 %569 = insertelement <4 x i32> %568, i32 %566, i32 2 %570 = bitcast <8 x i32> %62 to <32 x i8> %571 = bitcast <4 x i32> %64 to <16 x i8> %572 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %569, <32 x i8> %570, <16 x i8> %571, i32 2) %573 = extractelement <4 x float> %572, i32 0 %574 = extractelement <4 x float> %572, i32 1 %575 = extractelement <4 x float> %572, i32 2 %576 = fcmp oeq float %temp34.0, 4.000000e+00 %577 = select i1 %576, float 1.000000e+00, float 0.000000e+00 %578 = bitcast float %561 to i32 %579 = bitcast float %563 to i32 %580 = bitcast float %192 to i32 %581 = insertelement <4 x i32> undef, i32 %578, i32 0 %582 = insertelement <4 x i32> %581, i32 %579, i32 1 %583 = insertelement <4 x i32> %582, i32 %580, i32 2 %584 = bitcast <8 x i32> %58 to <32 x i8> %585 = bitcast <4 x i32> %60 to <16 x i8> %586 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %583, <32 x i8> %584, <16 x i8> %585, i32 2) %587 = extractelement <4 x float> %586, i32 0 %588 = extractelement <4 x float> %586, i32 1 %589 = extractelement <4 x float> %586, i32 2 %590 = fcmp oeq float %temp34.0, 3.000000e+00 %591 = select i1 %590, float 1.000000e+00, float 0.000000e+00 %592 = bitcast float %561 to i32 %593 = bitcast float %563 to i32 %594 = bitcast float %192 to i32 %595 = insertelement <4 x i32> undef, i32 %592, i32 0 %596 = insertelement <4 x i32> %595, i32 %593, i32 1 %597 = insertelement <4 x i32> %596, i32 %594, i32 2 %598 = bitcast <8 x i32> %54 to <32 x i8> %599 = bitcast <4 x i32> %56 to <16 x i8> %600 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %597, <32 x i8> %598, <16 x i8> %599, i32 2) %601 = extractelement <4 x float> %600, i32 0 %602 = extractelement <4 x float> %600, i32 1 %603 = extractelement <4 x float> %600, i32 2 %604 = fcmp oeq float %temp34.0, 2.000000e+00 %605 = select i1 %604, float 1.000000e+00, float 0.000000e+00 %606 = bitcast float %561 to i32 %607 = bitcast float %563 to i32 %608 = bitcast float %192 to i32 %609 = insertelement <4 x i32> undef, i32 %606, i32 0 %610 = insertelement <4 x i32> %609, i32 %607, i32 1 %611 = insertelement <4 x i32> %610, i32 %608, i32 2 %612 = bitcast <8 x i32> %50 to <32 x i8> %613 = bitcast <4 x i32> %52 to <16 x i8> %614 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %611, <32 x i8> %612, <16 x i8> %613, i32 2) %615 = extractelement <4 x float> %614, i32 0 %616 = extractelement <4 x float> %614, i32 1 %617 = extractelement <4 x float> %614, i32 2 %618 = fcmp oeq float %temp34.0, 1.000000e+00 %619 = select i1 %618, float 1.000000e+00, float 0.000000e+00 %620 = bitcast float %561 to i32 %621 = bitcast float %563 to i32 %622 = bitcast float %192 to i32 %623 = insertelement <4 x i32> undef, i32 %620, i32 0 %624 = insertelement <4 x i32> %623, i32 %621, i32 1 %625 = insertelement <4 x i32> %624, i32 %622, i32 2 %626 = bitcast <8 x i32> %46 to <32 x i8> %627 = bitcast <4 x i32> %48 to <16 x i8> %628 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %625, <32 x i8> %626, <16 x i8> %627, i32 2) %629 = extractelement <4 x float> %628, i32 0 %630 = extractelement <4 x float> %628, i32 1 %631 = extractelement <4 x float> %628, i32 2 %632 = fcmp oeq float %temp34.0, 0.000000e+00 %633 = select i1 %632, float 1.000000e+00, float 0.000000e+00 %634 = fmul float %629, %633 %635 = fmul float %630, %633 %636 = fmul float %631, %633 %637 = fmul float %615, %619 %638 = fadd float %637, %634 %639 = fmul float %616, %619 %640 = fadd float %639, %635 %641 = fmul float %617, %619 %642 = fadd float %641, %636 %643 = fmul float %601, %605 %644 = fadd float %643, %638 %645 = fmul float %602, %605 %646 = fadd float %645, %640 %647 = fmul float %603, %605 %648 = fadd float %647, %642 %649 = fmul float %587, %591 %650 = fadd float %649, %644 %651 = fmul float %588, %591 %652 = fadd float %651, %646 %653 = fmul float %589, %591 %654 = fadd float %653, %648 %655 = fmul float %573, %577 %656 = fadd float %655, %650 %657 = fmul float %574, %577 %658 = fadd float %657, %652 %659 = fmul float %575, %577 %660 = fadd float %659, %654 %661 = fcmp une float %32, %temp16.0 %.sink125 = select i1 %661, float %35, float %34 %temp48.1 = select i1 %661, float 1.953125e-03, float 3.906250e-03 %662 = fdiv float 1.000000e+00, %.sink125 %663 = fmul float %87, %662 %664 = fmul float %86, %662 %665 = call float @llvm.floor.f32(float %663) %666 = fsub float %663, %665 %667 = call float @llvm.floor.f32(float %664) %668 = fsub float %664, %667 %669 = fmul float %36, 2.000000e+00 %670 = fmul float %669, %temp48.1 %671 = fsub float 1.000000e+00, %670 %672 = fmul float %temp48.1, %36 %673 = fmul float %666, %671 %674 = fadd float %673, %672 %675 = fmul float %668, %671 %676 = fadd float %675, %672 %677 = fmul float %674, %temp16.0 %678 = fadd float %677, %temp32.0 %679 = fmul float %676, %temp16.0 %680 = fadd float %679, %temp33.0 %681 = bitcast float %678 to i32 %682 = bitcast float %680 to i32 %683 = bitcast float %192 to i32 %684 = insertelement <4 x i32> undef, i32 %681, i32 0 %685 = insertelement <4 x i32> %684, i32 %682, i32 1 %686 = insertelement <4 x i32> %685, i32 %683, i32 2 %687 = bitcast <8 x i32> %62 to <32 x i8> %688 = bitcast <4 x i32> %64 to <16 x i8> %689 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %686, <32 x i8> %687, <16 x i8> %688, i32 2) %690 = extractelement <4 x float> %689, i32 0 %691 = extractelement <4 x float> %689, i32 1 %692 = extractelement <4 x float> %689, i32 2 %693 = fcmp oeq float %temp34.0, 4.000000e+00 %694 = select i1 %693, float 1.000000e+00, float 0.000000e+00 %695 = bitcast float %678 to i32 %696 = bitcast float %680 to i32 %697 = bitcast float %192 to i32 %698 = insertelement <4 x i32> undef, i32 %695, i32 0 %699 = insertelement <4 x i32> %698, i32 %696, i32 1 %700 = insertelement <4 x i32> %699, i32 %697, i32 2 %701 = bitcast <8 x i32> %58 to <32 x i8> %702 = bitcast <4 x i32> %60 to <16 x i8> %703 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %700, <32 x i8> %701, <16 x i8> %702, i32 2) %704 = extractelement <4 x float> %703, i32 0 %705 = extractelement <4 x float> %703, i32 1 %706 = extractelement <4 x float> %703, i32 2 %707 = fcmp oeq float %temp34.0, 3.000000e+00 %708 = select i1 %707, float 1.000000e+00, float 0.000000e+00 %709 = bitcast float %678 to i32 %710 = bitcast float %680 to i32 %711 = bitcast float %192 to i32 %712 = insertelement <4 x i32> undef, i32 %709, i32 0 %713 = insertelement <4 x i32> %712, i32 %710, i32 1 %714 = insertelement <4 x i32> %713, i32 %711, i32 2 %715 = bitcast <8 x i32> %54 to <32 x i8> %716 = bitcast <4 x i32> %56 to <16 x i8> %717 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %714, <32 x i8> %715, <16 x i8> %716, i32 2) %718 = extractelement <4 x float> %717, i32 0 %719 = extractelement <4 x float> %717, i32 1 %720 = extractelement <4 x float> %717, i32 2 %721 = fcmp oeq float %temp34.0, 2.000000e+00 %722 = select i1 %721, float 1.000000e+00, float 0.000000e+00 %723 = bitcast float %678 to i32 %724 = bitcast float %680 to i32 %725 = bitcast float %192 to i32 %726 = insertelement <4 x i32> undef, i32 %723, i32 0 %727 = insertelement <4 x i32> %726, i32 %724, i32 1 %728 = insertelement <4 x i32> %727, i32 %725, i32 2 %729 = bitcast <8 x i32> %50 to <32 x i8> %730 = bitcast <4 x i32> %52 to <16 x i8> %731 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %728, <32 x i8> %729, <16 x i8> %730, i32 2) %732 = extractelement <4 x float> %731, i32 0 %733 = extractelement <4 x float> %731, i32 1 %734 = extractelement <4 x float> %731, i32 2 %735 = fcmp oeq float %temp34.0, 1.000000e+00 %736 = select i1 %735, float 1.000000e+00, float 0.000000e+00 %737 = bitcast float %678 to i32 %738 = bitcast float %680 to i32 %739 = bitcast float %192 to i32 %740 = insertelement <4 x i32> undef, i32 %737, i32 0 %741 = insertelement <4 x i32> %740, i32 %738, i32 1 %742 = insertelement <4 x i32> %741, i32 %739, i32 2 %743 = bitcast <8 x i32> %46 to <32 x i8> %744 = bitcast <4 x i32> %48 to <16 x i8> %745 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %742, <32 x i8> %743, <16 x i8> %744, i32 2) %746 = extractelement <4 x float> %745, i32 0 %747 = extractelement <4 x float> %745, i32 1 %748 = extractelement <4 x float> %745, i32 2 %749 = fcmp oeq float %temp34.0, 0.000000e+00 %750 = select i1 %749, float 1.000000e+00, float 0.000000e+00 %751 = fmul float %746, %750 %752 = fmul float %747, %750 %753 = fmul float %748, %750 %754 = fmul float %732, %736 %755 = fadd float %754, %751 %756 = fmul float %733, %736 %757 = fadd float %756, %752 %758 = fmul float %734, %736 %759 = fadd float %758, %753 %760 = fmul float %718, %722 %761 = fadd float %760, %755 %762 = fmul float %719, %722 %763 = fadd float %762, %757 %764 = fmul float %720, %722 %765 = fadd float %764, %759 %766 = fmul float %704, %708 %767 = fadd float %766, %761 %768 = fmul float %705, %708 %769 = fadd float %768, %763 %770 = fmul float %706, %708 %771 = fadd float %770, %765 %772 = fmul float %690, %694 %773 = fadd float %772, %767 %774 = fmul float %691, %694 %775 = fadd float %774, %769 %776 = fmul float %692, %694 %777 = fadd float %776, %771 %778 = fcmp une float %32, %temp16.0 %.sink126 = select i1 %778, float %35, float %34 %temp52.0 = select i1 %778, float 1.953125e-03, float 3.906250e-03 %779 = fdiv float 1.000000e+00, %.sink126 %780 = fmul float %87, %779 %781 = fmul float %85, %779 %782 = call float @llvm.floor.f32(float %780) %783 = fsub float %780, %782 %784 = call float @llvm.floor.f32(float %781) %785 = fsub float %781, %784 %786 = fmul float %36, 2.000000e+00 %787 = fmul float %786, %temp52.0 %788 = fsub float 1.000000e+00, %787 %789 = fmul float %temp52.0, %36 %790 = fmul float %783, %788 %791 = fadd float %790, %789 %792 = fmul float %785, %788 %793 = fadd float %792, %789 %794 = fmul float %791, %temp16.0 %795 = fadd float %794, %temp32.0 %796 = fmul float %793, %temp16.0 %797 = fadd float %796, %temp33.0 %798 = bitcast float %795 to i32 %799 = bitcast float %797 to i32 %800 = bitcast float %192 to i32 %801 = insertelement <4 x i32> undef, i32 %798, i32 0 %802 = insertelement <4 x i32> %801, i32 %799, i32 1 %803 = insertelement <4 x i32> %802, i32 %800, i32 2 %804 = bitcast <8 x i32> %62 to <32 x i8> %805 = bitcast <4 x i32> %64 to <16 x i8> %806 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %803, <32 x i8> %804, <16 x i8> %805, i32 2) %807 = extractelement <4 x float> %806, i32 0 %808 = extractelement <4 x float> %806, i32 1 %809 = extractelement <4 x float> %806, i32 2 %810 = fcmp oeq float %temp34.0, 4.000000e+00 %811 = select i1 %810, float 1.000000e+00, float 0.000000e+00 %812 = bitcast float %795 to i32 %813 = bitcast float %797 to i32 %814 = bitcast float %192 to i32 %815 = insertelement <4 x i32> undef, i32 %812, i32 0 %816 = insertelement <4 x i32> %815, i32 %813, i32 1 %817 = insertelement <4 x i32> %816, i32 %814, i32 2 %818 = bitcast <8 x i32> %58 to <32 x i8> %819 = bitcast <4 x i32> %60 to <16 x i8> %820 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %817, <32 x i8> %818, <16 x i8> %819, i32 2) %821 = extractelement <4 x float> %820, i32 0 %822 = extractelement <4 x float> %820, i32 1 %823 = extractelement <4 x float> %820, i32 2 %824 = fcmp oeq float %temp34.0, 3.000000e+00 %825 = select i1 %824, float 1.000000e+00, float 0.000000e+00 %826 = bitcast float %795 to i32 %827 = bitcast float %797 to i32 %828 = bitcast float %192 to i32 %829 = insertelement <4 x i32> undef, i32 %826, i32 0 %830 = insertelement <4 x i32> %829, i32 %827, i32 1 %831 = insertelement <4 x i32> %830, i32 %828, i32 2 %832 = bitcast <8 x i32> %54 to <32 x i8> %833 = bitcast <4 x i32> %56 to <16 x i8> %834 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %831, <32 x i8> %832, <16 x i8> %833, i32 2) %835 = extractelement <4 x float> %834, i32 0 %836 = extractelement <4 x float> %834, i32 1 %837 = extractelement <4 x float> %834, i32 2 %838 = fcmp oeq float %temp34.0, 2.000000e+00 %839 = select i1 %838, float 1.000000e+00, float 0.000000e+00 %840 = bitcast float %795 to i32 %841 = bitcast float %797 to i32 %842 = bitcast float %192 to i32 %843 = insertelement <4 x i32> undef, i32 %840, i32 0 %844 = insertelement <4 x i32> %843, i32 %841, i32 1 %845 = insertelement <4 x i32> %844, i32 %842, i32 2 %846 = bitcast <8 x i32> %50 to <32 x i8> %847 = bitcast <4 x i32> %52 to <16 x i8> %848 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %845, <32 x i8> %846, <16 x i8> %847, i32 2) %849 = extractelement <4 x float> %848, i32 0 %850 = extractelement <4 x float> %848, i32 1 %851 = extractelement <4 x float> %848, i32 2 %852 = fcmp oeq float %temp34.0, 1.000000e+00 %853 = select i1 %852, float 1.000000e+00, float 0.000000e+00 %854 = bitcast float %795 to i32 %855 = bitcast float %797 to i32 %856 = bitcast float %192 to i32 %857 = insertelement <4 x i32> undef, i32 %854, i32 0 %858 = insertelement <4 x i32> %857, i32 %855, i32 1 %859 = insertelement <4 x i32> %858, i32 %856, i32 2 %860 = bitcast <8 x i32> %46 to <32 x i8> %861 = bitcast <4 x i32> %48 to <16 x i8> %862 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %859, <32 x i8> %860, <16 x i8> %861, i32 2) %863 = extractelement <4 x float> %862, i32 0 %864 = extractelement <4 x float> %862, i32 1 %865 = extractelement <4 x float> %862, i32 2 %866 = fcmp oeq float %temp34.0, 0.000000e+00 %867 = select i1 %866, float 1.000000e+00, float 0.000000e+00 %868 = fmul float %863, %867 %869 = fmul float %864, %867 %870 = fmul float %865, %867 %871 = fmul float %849, %853 %872 = fadd float %871, %868 %873 = fmul float %850, %853 %874 = fadd float %873, %869 %875 = fmul float %851, %853 %876 = fadd float %875, %870 %877 = fmul float %835, %839 %878 = fadd float %877, %872 %879 = fmul float %836, %839 %880 = fadd float %879, %874 %881 = fmul float %837, %839 %882 = fadd float %881, %876 %883 = fmul float %821, %825 %884 = fadd float %883, %878 %885 = fmul float %822, %825 %886 = fadd float %885, %880 %887 = fmul float %823, %825 %888 = fadd float %887, %882 %889 = fmul float %807, %811 %890 = fadd float %889, %884 %891 = fmul float %808, %811 %892 = fadd float %891, %886 %893 = fmul float %809, %811 %894 = fadd float %893, %888 %895 = fcmp une float %32, %temp20.0 %.sink127 = select i1 %895, float %35, float %34 %temp48.3 = select i1 %895, float 1.953125e-03, float 3.906250e-03 %896 = fdiv float 1.000000e+00, %.sink127 %897 = fmul float %85, %896 %898 = fmul float %86, %896 %899 = call float @llvm.floor.f32(float %897) %900 = fsub float %897, %899 %901 = call float @llvm.floor.f32(float %898) %902 = fsub float %898, %901 %903 = fmul float %36, 2.000000e+00 %904 = fmul float %903, %temp48.3 %905 = fsub float 1.000000e+00, %904 %906 = fmul float %temp48.3, %36 %907 = fmul float %900, %905 %908 = fadd float %907, %906 %909 = fmul float %902, %905 %910 = fadd float %909, %906 %911 = fmul float %908, %temp20.0 %912 = fadd float %911, %temp8.0 %913 = fmul float %910, %temp20.0 %914 = fadd float %913, %temp9.0 %915 = bitcast float %912 to i32 %916 = bitcast float %914 to i32 %917 = bitcast float %192 to i32 %918 = insertelement <4 x i32> undef, i32 %915, i32 0 %919 = insertelement <4 x i32> %918, i32 %916, i32 1 %920 = insertelement <4 x i32> %919, i32 %917, i32 2 %921 = bitcast <8 x i32> %62 to <32 x i8> %922 = bitcast <4 x i32> %64 to <16 x i8> %923 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %920, <32 x i8> %921, <16 x i8> %922, i32 2) %924 = extractelement <4 x float> %923, i32 0 %925 = extractelement <4 x float> %923, i32 1 %926 = extractelement <4 x float> %923, i32 2 %927 = fcmp oeq float %temp10.0, 4.000000e+00 %928 = select i1 %927, float 1.000000e+00, float 0.000000e+00 %929 = bitcast float %912 to i32 %930 = bitcast float %914 to i32 %931 = bitcast float %192 to i32 %932 = insertelement <4 x i32> undef, i32 %929, i32 0 %933 = insertelement <4 x i32> %932, i32 %930, i32 1 %934 = insertelement <4 x i32> %933, i32 %931, i32 2 %935 = bitcast <8 x i32> %58 to <32 x i8> %936 = bitcast <4 x i32> %60 to <16 x i8> %937 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %934, <32 x i8> %935, <16 x i8> %936, i32 2) %938 = extractelement <4 x float> %937, i32 0 %939 = extractelement <4 x float> %937, i32 1 %940 = extractelement <4 x float> %937, i32 2 %941 = fcmp oeq float %temp10.0, 3.000000e+00 %942 = select i1 %941, float 1.000000e+00, float 0.000000e+00 %943 = bitcast float %912 to i32 %944 = bitcast float %914 to i32 %945 = bitcast float %192 to i32 %946 = insertelement <4 x i32> undef, i32 %943, i32 0 %947 = insertelement <4 x i32> %946, i32 %944, i32 1 %948 = insertelement <4 x i32> %947, i32 %945, i32 2 %949 = bitcast <8 x i32> %54 to <32 x i8> %950 = bitcast <4 x i32> %56 to <16 x i8> %951 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %948, <32 x i8> %949, <16 x i8> %950, i32 2) %952 = extractelement <4 x float> %951, i32 0 %953 = extractelement <4 x float> %951, i32 1 %954 = extractelement <4 x float> %951, i32 2 %955 = fcmp oeq float %temp10.0, 2.000000e+00 %956 = select i1 %955, float 1.000000e+00, float 0.000000e+00 %957 = bitcast float %912 to i32 %958 = bitcast float %914 to i32 %959 = bitcast float %192 to i32 %960 = insertelement <4 x i32> undef, i32 %957, i32 0 %961 = insertelement <4 x i32> %960, i32 %958, i32 1 %962 = insertelement <4 x i32> %961, i32 %959, i32 2 %963 = bitcast <8 x i32> %50 to <32 x i8> %964 = bitcast <4 x i32> %52 to <16 x i8> %965 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %962, <32 x i8> %963, <16 x i8> %964, i32 2) %966 = extractelement <4 x float> %965, i32 0 %967 = extractelement <4 x float> %965, i32 1 %968 = extractelement <4 x float> %965, i32 2 %969 = fcmp oeq float %temp10.0, 1.000000e+00 %970 = select i1 %969, float 1.000000e+00, float 0.000000e+00 %971 = bitcast float %912 to i32 %972 = bitcast float %914 to i32 %973 = bitcast float %192 to i32 %974 = insertelement <4 x i32> undef, i32 %971, i32 0 %975 = insertelement <4 x i32> %974, i32 %972, i32 1 %976 = insertelement <4 x i32> %975, i32 %973, i32 2 %977 = bitcast <8 x i32> %46 to <32 x i8> %978 = bitcast <4 x i32> %48 to <16 x i8> %979 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %976, <32 x i8> %977, <16 x i8> %978, i32 2) %980 = extractelement <4 x float> %979, i32 0 %981 = extractelement <4 x float> %979, i32 1 %982 = extractelement <4 x float> %979, i32 2 %983 = fcmp oeq float %temp10.0, 0.000000e+00 %984 = select i1 %983, float 1.000000e+00, float 0.000000e+00 %985 = fmul float %980, %984 %986 = fmul float %981, %984 %987 = fmul float %982, %984 %988 = fmul float %966, %970 %989 = fadd float %988, %985 %990 = fmul float %967, %970 %991 = fadd float %990, %986 %992 = fmul float %968, %970 %993 = fadd float %992, %987 %994 = fmul float %952, %956 %995 = fadd float %994, %989 %996 = fmul float %953, %956 %997 = fadd float %996, %991 %998 = fmul float %954, %956 %999 = fadd float %998, %993 %1000 = fmul float %938, %942 %1001 = fadd float %1000, %995 %1002 = fmul float %939, %942 %1003 = fadd float %1002, %997 %1004 = fmul float %940, %942 %1005 = fadd float %1004, %999 %1006 = fmul float %924, %928 %1007 = fadd float %1006, %1001 %1008 = fmul float %925, %928 %1009 = fadd float %1008, %1003 %1010 = fmul float %926, %928 %1011 = fadd float %1010, %1005 %1012 = fcmp une float %32, %temp20.0 %.sink128 = select i1 %1012, float %35, float %34 %temp52.1 = select i1 %1012, float 1.953125e-03, float 3.906250e-03 %1013 = fdiv float 1.000000e+00, %.sink128 %1014 = fmul float %87, %1013 %1015 = fmul float %86, %1013 %1016 = call float @llvm.floor.f32(float %1014) %1017 = fsub float %1014, %1016 %1018 = call float @llvm.floor.f32(float %1015) %1019 = fsub float %1015, %1018 %1020 = fmul float %36, 2.000000e+00 %1021 = fmul float %1020, %temp52.1 %1022 = fsub float 1.000000e+00, %1021 %1023 = fmul float %temp52.1, %36 %1024 = fmul float %1017, %1022 %1025 = fadd float %1024, %1023 %1026 = fmul float %1019, %1022 %1027 = fadd float %1026, %1023 %1028 = fmul float %1025, %temp20.0 %1029 = fadd float %1028, %temp8.0 %1030 = fmul float %1027, %temp20.0 %1031 = fadd float %1030, %temp9.0 %1032 = bitcast float %1029 to i32 %1033 = bitcast float %1031 to i32 %1034 = bitcast float %192 to i32 %1035 = insertelement <4 x i32> undef, i32 %1032, i32 0 %1036 = insertelement <4 x i32> %1035, i32 %1033, i32 1 %1037 = insertelement <4 x i32> %1036, i32 %1034, i32 2 %1038 = bitcast <8 x i32> %62 to <32 x i8> %1039 = bitcast <4 x i32> %64 to <16 x i8> %1040 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1037, <32 x i8> %1038, <16 x i8> %1039, i32 2) %1041 = extractelement <4 x float> %1040, i32 0 %1042 = extractelement <4 x float> %1040, i32 1 %1043 = extractelement <4 x float> %1040, i32 2 %1044 = fcmp oeq float %temp10.0, 4.000000e+00 %1045 = select i1 %1044, float 1.000000e+00, float 0.000000e+00 %1046 = bitcast float %1029 to i32 %1047 = bitcast float %1031 to i32 %1048 = bitcast float %192 to i32 %1049 = insertelement <4 x i32> undef, i32 %1046, i32 0 %1050 = insertelement <4 x i32> %1049, i32 %1047, i32 1 %1051 = insertelement <4 x i32> %1050, i32 %1048, i32 2 %1052 = bitcast <8 x i32> %58 to <32 x i8> %1053 = bitcast <4 x i32> %60 to <16 x i8> %1054 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1051, <32 x i8> %1052, <16 x i8> %1053, i32 2) %1055 = extractelement <4 x float> %1054, i32 0 %1056 = extractelement <4 x float> %1054, i32 1 %1057 = extractelement <4 x float> %1054, i32 2 %1058 = fcmp oeq float %temp10.0, 3.000000e+00 %1059 = select i1 %1058, float 1.000000e+00, float 0.000000e+00 %1060 = bitcast float %1029 to i32 %1061 = bitcast float %1031 to i32 %1062 = bitcast float %192 to i32 %1063 = insertelement <4 x i32> undef, i32 %1060, i32 0 %1064 = insertelement <4 x i32> %1063, i32 %1061, i32 1 %1065 = insertelement <4 x i32> %1064, i32 %1062, i32 2 %1066 = bitcast <8 x i32> %54 to <32 x i8> %1067 = bitcast <4 x i32> %56 to <16 x i8> %1068 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1065, <32 x i8> %1066, <16 x i8> %1067, i32 2) %1069 = extractelement <4 x float> %1068, i32 0 %1070 = extractelement <4 x float> %1068, i32 1 %1071 = extractelement <4 x float> %1068, i32 2 %1072 = fcmp oeq float %temp10.0, 2.000000e+00 %1073 = select i1 %1072, float 1.000000e+00, float 0.000000e+00 %1074 = bitcast float %1029 to i32 %1075 = bitcast float %1031 to i32 %1076 = bitcast float %192 to i32 %1077 = insertelement <4 x i32> undef, i32 %1074, i32 0 %1078 = insertelement <4 x i32> %1077, i32 %1075, i32 1 %1079 = insertelement <4 x i32> %1078, i32 %1076, i32 2 %1080 = bitcast <8 x i32> %50 to <32 x i8> %1081 = bitcast <4 x i32> %52 to <16 x i8> %1082 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1079, <32 x i8> %1080, <16 x i8> %1081, i32 2) %1083 = extractelement <4 x float> %1082, i32 0 %1084 = extractelement <4 x float> %1082, i32 1 %1085 = extractelement <4 x float> %1082, i32 2 %1086 = fcmp oeq float %temp10.0, 1.000000e+00 %1087 = select i1 %1086, float 1.000000e+00, float 0.000000e+00 %1088 = bitcast float %1029 to i32 %1089 = bitcast float %1031 to i32 %1090 = bitcast float %192 to i32 %1091 = insertelement <4 x i32> undef, i32 %1088, i32 0 %1092 = insertelement <4 x i32> %1091, i32 %1089, i32 1 %1093 = insertelement <4 x i32> %1092, i32 %1090, i32 2 %1094 = bitcast <8 x i32> %46 to <32 x i8> %1095 = bitcast <4 x i32> %48 to <16 x i8> %1096 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1093, <32 x i8> %1094, <16 x i8> %1095, i32 2) %1097 = extractelement <4 x float> %1096, i32 0 %1098 = extractelement <4 x float> %1096, i32 1 %1099 = extractelement <4 x float> %1096, i32 2 %1100 = fcmp oeq float %temp10.0, 0.000000e+00 %1101 = select i1 %1100, float 1.000000e+00, float 0.000000e+00 %1102 = fmul float %1097, %1101 %1103 = fmul float %1098, %1101 %1104 = fmul float %1099, %1101 %1105 = fmul float %1083, %1087 %1106 = fadd float %1105, %1102 %1107 = fmul float %1084, %1087 %1108 = fadd float %1107, %1103 %1109 = fmul float %1085, %1087 %1110 = fadd float %1109, %1104 %1111 = fmul float %1069, %1073 %1112 = fadd float %1111, %1106 %1113 = fmul float %1070, %1073 %1114 = fadd float %1113, %1108 %1115 = fmul float %1071, %1073 %1116 = fadd float %1115, %1110 %1117 = fmul float %1055, %1059 %1118 = fadd float %1117, %1112 %1119 = fmul float %1056, %1059 %1120 = fadd float %1119, %1114 %1121 = fmul float %1057, %1059 %1122 = fadd float %1121, %1116 %1123 = fmul float %1041, %1045 %1124 = fadd float %1123, %1118 %1125 = fmul float %1042, %1045 %1126 = fadd float %1125, %1120 %1127 = fmul float %1043, %1045 %1128 = fadd float %1127, %1122 %1129 = fcmp une float %32, %temp20.0 %.sink129 = select i1 %1129, float %35, float %34 %temp56.0 = select i1 %1129, float 1.953125e-03, float 3.906250e-03 %1130 = fdiv float 1.000000e+00, %.sink129 %1131 = fmul float %87, %1130 %1132 = fmul float %85, %1130 %1133 = call float @llvm.floor.f32(float %1131) %1134 = fsub float %1131, %1133 %1135 = call float @llvm.floor.f32(float %1132) %1136 = fsub float %1132, %1135 %1137 = fmul float %36, 2.000000e+00 %1138 = fmul float %1137, %temp56.0 %1139 = fsub float 1.000000e+00, %1138 %1140 = fmul float %temp56.0, %36 %1141 = fmul float %1134, %1139 %1142 = fadd float %1141, %1140 %1143 = fmul float %1136, %1139 %1144 = fadd float %1143, %1140 %1145 = fmul float %1142, %temp20.0 %1146 = fadd float %1145, %temp8.0 %1147 = fmul float %1144, %temp20.0 %1148 = fadd float %1147, %temp9.0 %1149 = bitcast float %1146 to i32 %1150 = bitcast float %1148 to i32 %1151 = bitcast float %192 to i32 %1152 = insertelement <4 x i32> undef, i32 %1149, i32 0 %1153 = insertelement <4 x i32> %1152, i32 %1150, i32 1 %1154 = insertelement <4 x i32> %1153, i32 %1151, i32 2 %1155 = bitcast <8 x i32> %62 to <32 x i8> %1156 = bitcast <4 x i32> %64 to <16 x i8> %1157 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1154, <32 x i8> %1155, <16 x i8> %1156, i32 2) %1158 = extractelement <4 x float> %1157, i32 0 %1159 = extractelement <4 x float> %1157, i32 1 %1160 = extractelement <4 x float> %1157, i32 2 %1161 = fcmp oeq float %temp10.0, 4.000000e+00 %1162 = select i1 %1161, float 1.000000e+00, float 0.000000e+00 %1163 = bitcast float %1146 to i32 %1164 = bitcast float %1148 to i32 %1165 = bitcast float %192 to i32 %1166 = insertelement <4 x i32> undef, i32 %1163, i32 0 %1167 = insertelement <4 x i32> %1166, i32 %1164, i32 1 %1168 = insertelement <4 x i32> %1167, i32 %1165, i32 2 %1169 = bitcast <8 x i32> %58 to <32 x i8> %1170 = bitcast <4 x i32> %60 to <16 x i8> %1171 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1168, <32 x i8> %1169, <16 x i8> %1170, i32 2) %1172 = extractelement <4 x float> %1171, i32 0 %1173 = extractelement <4 x float> %1171, i32 1 %1174 = extractelement <4 x float> %1171, i32 2 %1175 = fcmp oeq float %temp10.0, 3.000000e+00 %1176 = select i1 %1175, float 1.000000e+00, float 0.000000e+00 %1177 = bitcast float %1146 to i32 %1178 = bitcast float %1148 to i32 %1179 = bitcast float %192 to i32 %1180 = insertelement <4 x i32> undef, i32 %1177, i32 0 %1181 = insertelement <4 x i32> %1180, i32 %1178, i32 1 %1182 = insertelement <4 x i32> %1181, i32 %1179, i32 2 %1183 = bitcast <8 x i32> %54 to <32 x i8> %1184 = bitcast <4 x i32> %56 to <16 x i8> %1185 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1182, <32 x i8> %1183, <16 x i8> %1184, i32 2) %1186 = extractelement <4 x float> %1185, i32 0 %1187 = extractelement <4 x float> %1185, i32 1 %1188 = extractelement <4 x float> %1185, i32 2 %1189 = fcmp oeq float %temp10.0, 2.000000e+00 %1190 = select i1 %1189, float 1.000000e+00, float 0.000000e+00 %1191 = bitcast float %1146 to i32 %1192 = bitcast float %1148 to i32 %1193 = bitcast float %192 to i32 %1194 = insertelement <4 x i32> undef, i32 %1191, i32 0 %1195 = insertelement <4 x i32> %1194, i32 %1192, i32 1 %1196 = insertelement <4 x i32> %1195, i32 %1193, i32 2 %1197 = bitcast <8 x i32> %50 to <32 x i8> %1198 = bitcast <4 x i32> %52 to <16 x i8> %1199 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1196, <32 x i8> %1197, <16 x i8> %1198, i32 2) %1200 = extractelement <4 x float> %1199, i32 0 %1201 = extractelement <4 x float> %1199, i32 1 %1202 = extractelement <4 x float> %1199, i32 2 %1203 = fcmp oeq float %temp10.0, 1.000000e+00 %1204 = select i1 %1203, float 1.000000e+00, float 0.000000e+00 %1205 = bitcast float %1146 to i32 %1206 = bitcast float %1148 to i32 %1207 = bitcast float %192 to i32 %1208 = insertelement <4 x i32> undef, i32 %1205, i32 0 %1209 = insertelement <4 x i32> %1208, i32 %1206, i32 1 %1210 = insertelement <4 x i32> %1209, i32 %1207, i32 2 %1211 = bitcast <8 x i32> %46 to <32 x i8> %1212 = bitcast <4 x i32> %48 to <16 x i8> %1213 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1210, <32 x i8> %1211, <16 x i8> %1212, i32 2) %1214 = extractelement <4 x float> %1213, i32 0 %1215 = extractelement <4 x float> %1213, i32 1 %1216 = extractelement <4 x float> %1213, i32 2 %1217 = fcmp oeq float %temp10.0, 0.000000e+00 %1218 = select i1 %1217, float 1.000000e+00, float 0.000000e+00 %1219 = fmul float %1214, %1218 %1220 = fmul float %1215, %1218 %1221 = fmul float %1216, %1218 %1222 = fmul float %1200, %1204 %1223 = fadd float %1222, %1219 %1224 = fmul float %1201, %1204 %1225 = fadd float %1224, %1220 %1226 = fmul float %1202, %1204 %1227 = fadd float %1226, %1221 %1228 = fmul float %1186, %1190 %1229 = fadd float %1228, %1223 %1230 = fmul float %1187, %1190 %1231 = fadd float %1230, %1225 %1232 = fmul float %1188, %1190 %1233 = fadd float %1232, %1227 %1234 = fmul float %1172, %1176 %1235 = fadd float %1234, %1229 %1236 = fmul float %1173, %1176 %1237 = fadd float %1236, %1231 %1238 = fmul float %1174, %1176 %1239 = fadd float %1238, %1233 %1240 = fmul float %1158, %1162 %1241 = fadd float %1240, %1235 %1242 = fmul float %1159, %1162 %1243 = fadd float %1242, %1237 %1244 = fmul float %1160, %1162 %1245 = fadd float %1244, %1239 %1246 = fmul float %1007, %117 %1247 = fmul float %1009, %117 %1248 = fmul float %1011, %117 %1249 = fmul float %1124, %115 %1250 = fadd float %1249, %1246 %1251 = fmul float %1126, %115 %1252 = fadd float %1251, %1247 %1253 = fmul float %1128, %115 %1254 = fadd float %1253, %1248 %1255 = fmul float %1241, %116 %1256 = fadd float %1255, %1250 %1257 = fmul float %1243, %116 %1258 = fadd float %1257, %1252 %1259 = fmul float %1245, %116 %1260 = fadd float %1259, %1254 %1261 = fmul float %656, %117 %1262 = fmul float %658, %117 %1263 = fmul float %660, %117 %1264 = fmul float %773, %115 %1265 = fadd float %1264, %1261 %1266 = fmul float %775, %115 %1267 = fadd float %1266, %1262 %1268 = fmul float %777, %115 %1269 = fadd float %1268, %1263 %1270 = fmul float %890, %116 %1271 = fadd float %1270, %1265 %1272 = fmul float %892, %116 %1273 = fadd float %1272, %1267 %1274 = fmul float %894, %116 %1275 = fadd float %1274, %1269 %1276 = fmul float %305, %117 %1277 = fmul float %307, %117 %1278 = fmul float %309, %117 %1279 = fmul float %422, %115 %1280 = fadd float %1279, %1276 %1281 = fmul float %424, %115 %1282 = fadd float %1281, %1277 %1283 = fmul float %426, %115 %1284 = fadd float %1283, %1278 %1285 = fmul float %539, %116 %1286 = fadd float %1285, %1280 %1287 = fmul float %541, %116 %1288 = fadd float %1287, %1282 %1289 = fmul float %543, %116 %1290 = fadd float %1289, %1284 %1291 = fmul float %74, %1286 %1292 = fmul float %74, %1288 %1293 = fmul float %74, %1290 %1294 = fmul float %75, %1271 %1295 = fadd float %1294, %1291 %1296 = fmul float %75, %1273 %1297 = fadd float %1296, %1292 %1298 = fmul float %75, %1275 %1299 = fadd float %1298, %1293 %1300 = fmul float %76, %1256 %1301 = fadd float %1300, %1295 %1302 = fmul float %76, %1258 %1303 = fadd float %1302, %1297 %1304 = fmul float %76, %1260 %1305 = fadd float %1304, %1299 %1306 = fdiv float %78, %80 %1307 = fdiv float %79, %80 %1308 = bitcast float %1306 to i32 %1309 = bitcast float %1307 to i32 %1310 = insertelement <2 x i32> undef, i32 %1308, i32 0 %1311 = insertelement <2 x i32> %1310, i32 %1309, i32 1 %1312 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1311, <32 x i8> %67, <16 x i8> %70, i32 2) %1313 = extractelement <4 x float> %1312, i32 0 %1314 = extractelement <4 x float> %1312, i32 1 %1315 = extractelement <4 x float> %1312, i32 2 %1316 = extractelement <4 x float> %1312, i32 3 %1317 = call float @llvm.log2.f32(float %1313) %1318 = call float @llvm.log2.f32(float %1314) %1319 = call float @llvm.log2.f32(float %1315) %1320 = call float @llvm.log2.f32(float %1316) %1321 = fsub float -0.000000e+00, %1320 %1322 = fsub float %88, %1317 %1323 = fsub float %89, %1318 %1324 = fsub float %90, %1319 %1325 = fmul float %1322, 0x3FE51EB860000000 %1326 = fmul float %1323, 0x3FE51EB860000000 %1327 = fmul float %1324, 0x3FE51EB860000000 %1328 = fmul float %42, 2.000000e+00 %1329 = fmul float %43, 2.000000e+00 %1330 = fmul float %44, 2.000000e+00 %1331 = fmul float %1325, %1325 %1332 = fmul float %1326, %1326 %1333 = fmul float %1327, %1327 %1334 = fmul float %1328, %1331 %1335 = fmul float %1329, %1332 %1336 = fmul float %1330, %1333 %1337 = call float @llvm.maxnum.f32(float %1334, float %39) %1338 = call float @llvm.maxnum.f32(float %1335, float %40) %1339 = call float @llvm.maxnum.f32(float %1336, float %41) %1340 = call float @llvm.minnum.f32(float %1337, float 1.000000e+00) %1341 = call float @llvm.minnum.f32(float %1338, float 1.000000e+00) %1342 = call float @llvm.minnum.f32(float %1339, float 1.000000e+00) %1343 = call float @llvm.AMDIL.clamp.(float %1321, float 0.000000e+00, float 1.000000e+00) %1344 = fmul float %1343, %77 %1345 = fmul float %1322, 5.000000e-01 %1346 = fmul float %1323, 5.000000e-01 %1347 = fmul float %1324, 5.000000e-01 %1348 = fmul float %1301, %1345 %1349 = fmul float %1303, %1346 %1350 = fmul float %1305, %1347 %1351 = fmul float %1322, %1344 %1352 = fadd float %1351, %1348 %1353 = fmul float %1323, %1344 %1354 = fadd float %1353, %1349 %1355 = fmul float %1324, %1344 %1356 = fadd float %1355, %1350 %1357 = fmul float %1340, %1301 %1358 = fadd float %1357, %1352 %1359 = fmul float %1341, %1303 %1360 = fadd float %1359, %1354 %1361 = fmul float %1342, %1305 %1362 = fadd float %1361, %1356 %1363 = fmul float %81, %30 %1364 = fadd float %1363, %31 %1365 = call float @llvm.AMDIL.clamp.(float %1364, float 0.000000e+00, float 1.000000e+00) %1366 = call float @llvm.AMDGPU.lrp(float %1365, float %1358, float %27) %1367 = call float @llvm.AMDGPU.lrp(float %1365, float %1360, float %28) %1368 = call float @llvm.AMDGPU.lrp(float %1365, float %1362, float %29) %1369 = call i32 @llvm.SI.packf16(float %1366, float %1367) %1370 = bitcast i32 %1369 to float %1371 = call i32 @llvm.SI.packf16(float %1368, float 1.000000e+00) %1372 = bitcast i32 %1371 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %1370, float %1372, float %1370, float %1372) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.floor.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v11, v0, 0, 0, [m0] ; C82C0000 v_interp_p2_f32 v11, [v11], v1, 0, 0, [m0] ; C82D0001 v_interp_p1_f32 v12, v0, 1, 0, [m0] ; C8300100 v_interp_p2_f32 v12, [v12], v1, 1, 0, [m0] ; C8310101 v_interp_p1_f32 v14, v0, 2, 0, [m0] ; C8380200 v_interp_p2_f32 v14, [v14], v1, 2, 0, [m0] ; C8390201 v_interp_p1_f32 v3, v0, 0, 1, [m0] ; C80C0400 v_interp_p2_f32 v3, [v3], v1, 0, 1, [m0] ; C80D0401 v_interp_p1_f32 v4, v0, 1, 1, [m0] ; C8100500 v_interp_p2_f32 v4, [v4], v1, 1, 1, [m0] ; C8110501 v_interp_p1_f32 v5, v0, 2, 1, [m0] ; C8140600 v_interp_p2_f32 v5, [v5], v1, 2, 1, [m0] ; C8150601 v_interp_p1_f32 v2, v0, 3, 1, [m0] ; C8080700 v_interp_p2_f32 v2, [v2], v1, 3, 1, [m0] ; C8090701 v_interp_p1_f32 v6, v0, 0, 2, [m0] ; C8180800 v_interp_p2_f32 v6, [v6], v1, 0, 2, [m0] ; C8190801 v_interp_p1_f32 v7, v0, 1, 2, [m0] ; C81C0900 v_interp_p2_f32 v7, [v7], v1, 1, 2, [m0] ; C81D0901 v_interp_p1_f32 v10, v0, 3, 2, [m0] ; C8280B00 v_interp_p2_f32 v10, [v10], v1, 3, 2, [m0] ; C8290B01 v_interp_p1_f32 v15, v0, 0, 3, [m0] ; C83C0C00 v_interp_p2_f32 v15, [v15], v1, 0, 3, [m0] ; C83D0C01 v_interp_p1_f32 v17, v0, 1, 3, [m0] ; C8440D00 v_interp_p2_f32 v17, [v17], v1, 1, 3, [m0] ; C8450D01 v_interp_p1_f32 v18, v0, 2, 3, [m0] ; C8480E00 v_interp_p2_f32 v18, [v18], v1, 2, 3, [m0] ; C8490E01 v_interp_p1_f32 v19, v0, 3, 3, [m0] ; C84C0F00 v_interp_p2_f32 v19, [v19], v1, 3, 3, [m0] ; C84D0F01 v_interp_p1_f32 v20, v0, 0, 4, [m0] ; C8501000 v_interp_p2_f32 v20, [v20], v1, 0, 4, [m0] ; C8511001 v_interp_p1_f32 v27, v0, 1, 4, [m0] ; C86C1100 v_interp_p2_f32 v27, [v27], v1, 1, 4, [m0] ; C86D1101 v_interp_p1_f32 v13, v0, 2, 4, [m0] ; C8341200 v_interp_p2_f32 v13, [v13], v1, 2, 4, [m0] ; C8351201 v_interp_p1_f32 v8, v0, 3, 4, [m0] ; C8201300 v_interp_p2_f32 v8, [v8], v1, 3, 4, [m0] ; C8211301 v_interp_p1_f32 v9, v0, 0, 5, [m0] ; C8241400 v_interp_p2_f32 v9, [v9], v1, 0, 5, [m0] ; C8251401 s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300 v_interp_p1_f32 v0, v0, 1, 5, [m0] ; C8001500 v_interp_p2_f32 v0, [v0], v1, 1, 5, [m0] ; C8011501 v_mov_b32_e32 v1, 0x7fffffff ; 7E0202FF 7FFFFFFF v_and_b32_e32 v16, v17, v1 ; 36200311 v_mul_f32_e64 v17, |v17|, |v17| ; D2100311 00022311 v_mad_f32 v17, |v18|, |v18|, v17 ; D2820311 04462512 v_and_b32_e32 v32, v18, v1 ; 36400312 v_and_b32_e32 v31, v19, v1 ; 363E0313 v_mad_f32 v1, |v19|, |v19|, v17 ; D2820301 04462713 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s1, s[8:11], 0x24 ; C2008924 s_buffer_load_dword s0, s[8:11], 0x20 ; C2000920 v_rsq_clamp_f32_e32 v33, v1 ; 7E425901 v_add_f32_e32 v1, 0.5, v11 ; 060216F0 v_add_f32_e32 v11, 0.5, v12 ; 061618F0 v_add_f32_e32 v12, 0.5, v14 ; 06181CF0 v_floor_f32_e32 v1, v1 ; 7E024901 v_floor_f32_e32 v14, v11 ; 7E1C490B v_mov_b32_e32 v11, 0x42800000 ; 7E1602FF 42800000 v_cmp_le_f32_e32 vcc, v11, v1 ; 7C06030B v_floor_f32_e32 v17, v12 ; 7E22490C s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v12, s1 ; 7E180201 v_mul_f32_e32 v19, s0, v1 ; 10260200 v_floor_f32_e32 v18, v19 ; 7E244913 s_and_saveexec_b64 s[2:3], vcc ; BE82246A s_xor_b64 s[2:3], exec, s[2:3] ; 8982027E v_mov_b32_e32 v11, 0xc2800000 ; 7E1602FF C2800000 v_add_f32_e32 v1, v1, v11 ; 06021701 v_mul_f32_e32 v11, s1, v1 ; 10160201 v_floor_f32_e32 v11, v11 ; 7E16490B v_mul_f32_e32 v21, s1, v11 ; 102A1601 v_mad_f32 v24, v1, s1, -v11 ; D2820018 842C0301 v_floor_f32_e32 v1, v21 ; 7E024915 v_mad_f32 v25, v11, s1, -v1 ; D2820019 8404030B v_add_f32_e32 v11, 4.0, v1 ; 061602F6 s_or_saveexec_b64 s[2:3], s[2:3] ; BE822502 v_mov_b32_e32 v1, s0 ; 7E020200 v_mov_b32_e32 v30, v12 ; 7E3C030C s_xor_b64 exec, exec, s[2:3] ; 89FE027E v_mul_f32_e32 v11, s0, v18 ; 10162400 v_floor_f32_e32 v21, v19 ; 7E2A4913 v_subrev_f32_e32 v24, v21, v19 ; 0A302715 v_floor_f32_e32 v11, v11 ; 7E16490B v_mad_f32 v25, v18, s0, -v11 ; D2820019 842C0112 v_mov_b32_e32 v30, v1 ; 7E3C0301 s_or_b64 exec, exec, s[2:3] ; 88FE027E v_mul_f32_e32 v19, s0, v14 ; 10261C00 v_floor_f32_e32 v18, v19 ; 7E244913 v_mov_b32_e32 v21, 0x42800000 ; 7E2A02FF 42800000 v_cmp_le_f32_e32 vcc, v21, v14 ; 7C061D15 s_and_saveexec_b64 s[2:3], vcc ; BE82246A s_xor_b64 s[2:3], exec, s[2:3] ; 8982027E v_mov_b32_e32 v21, 0xc2800000 ; 7E2A02FF C2800000 v_add_f32_e32 v14, v14, v21 ; 061C2B0E v_mul_f32_e32 v21, s1, v14 ; 102A1C01 v_floor_f32_e32 v21, v21 ; 7E2A4915 v_mul_f32_e32 v23, s1, v21 ; 102E2A01 v_mad_f32 v22, v14, s1, -v21 ; D2820016 8454030E v_floor_f32_e32 v14, v23 ; 7E1C4917 v_mad_f32 v23, v21, s1, -v14 ; D2820017 84380315 v_add_f32_e32 v21, 4.0, v14 ; 062A1CF6 s_or_saveexec_b64 s[2:3], s[2:3] ; BE822502 v_mov_b32_e32 v28, v12 ; 7E38030C s_xor_b64 exec, exec, s[2:3] ; 89FE027E v_mul_f32_e32 v14, s0, v18 ; 101C2400 v_floor_f32_e32 v21, v19 ; 7E2A4913 v_subrev_f32_e32 v22, v21, v19 ; 0A2C2715 v_floor_f32_e32 v21, v14 ; 7E2A490E v_mad_f32 v23, v18, s0, -v21 ; D2820017 84540112 v_mov_b32_e32 v28, v1 ; 7E380301 s_or_b64 exec, exec, s[2:3] ; 88FE027E s_buffer_load_dword s13, s[8:11], 0xb ; C206890B s_buffer_load_dword s14, s[8:11], 0x28 ; C2070928 s_buffer_load_dword s12, s[8:11], 0x2c ; C206092C v_mul_f32_e32 v35, s0, v17 ; 10462200 v_floor_f32_e32 v34, v35 ; 7E444923 v_mov_b32_e32 v14, 0x42800000 ; 7E1C02FF 42800000 v_cmp_le_f32_e32 vcc, v14, v17 ; 7C06230E s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[2:3], vcc ; BE82246A s_xor_b64 s[2:3], exec, s[2:3] ; 8982027E v_mov_b32_e32 v14, 0xc2800000 ; 7E1C02FF C2800000 v_add_f32_e32 v14, v17, v14 ; 061C1D11 v_mul_f32_e32 v17, s1, v14 ; 10221C01 v_floor_f32_e32 v17, v17 ; 7E224911 v_mul_f32_e32 v19, s1, v17 ; 10262201 v_mad_f32 v18, v14, s1, -v17 ; D2820012 8444030E v_floor_f32_e32 v14, v19 ; 7E1C4913 v_mad_f32 v19, v17, s1, -v14 ; D2820013 84380311 v_add_f32_e32 v14, 4.0, v14 ; 061C1CF6 s_or_saveexec_b64 s[2:3], s[2:3] ; BE822502 v_mov_b32_e32 v17, s13 ; 7E22020D v_mov_b32_e32 v26, s14 ; 7E34020E s_buffer_load_dword s43, s[8:11], 0x0 ; C2158900 s_buffer_load_dword s41, s[8:11], 0x1 ; C2148901 s_buffer_load_dword s42, s[8:11], 0x2 ; C2150902 s_buffer_load_dword s32, s[8:11], 0x4 ; C2100904 s_buffer_load_dword s31, s[8:11], 0x5 ; C20F8905 s_buffer_load_dword s30, s[8:11], 0x6 ; C20F0906 s_buffer_load_dword s13, s[8:11], 0xa ; C206890A s_buffer_load_dword s39, s[8:11], 0x30 ; C2138930 s_buffer_load_dword s40, s[8:11], 0x34 ; C2140934 s_buffer_load_dword s44, s[8:11], 0x38 ; C2160938 s_buffer_load_dword s33, s[8:11], 0x3c ; C210893C s_buffer_load_dword s34, s[8:11], 0x3d ; C211093D s_buffer_load_dword s35, s[8:11], 0x3e ; C211893E s_buffer_load_dword s38, s[8:11], 0x44 ; C2130944 s_buffer_load_dword s37, s[8:11], 0x45 ; C2128945 s_buffer_load_dword s36, s[8:11], 0x46 ; C2120946 v_mov_b32_e32 v29, s12 ; 7E3A020C s_waitcnt lgkmcnt(0) ; BF8C007F s_xor_b64 exec, exec, s[2:3] ; 89FE027E v_mul_f32_e32 v12, s0, v34 ; 10184400 v_floor_f32_e32 v14, v35 ; 7E1C4923 v_subrev_f32_e32 v18, v14, v35 ; 0A24470E v_floor_f32_e32 v14, v12 ; 7E1C490C v_mad_f32 v19, v34, s0, -v14 ; D2820013 84380122 v_mov_b32_e32 v12, v1 ; 7E180301 s_or_b64 exec, exec, s[2:3] ; 88FE027E v_mac_f32_e32 v17, s13, v15 ; 3E221E0D v_add_f32_e64 v1, 0, v17 clamp ; D2060801 00022280 v_cmp_neq_f32_e64 s[28:29], s0, v30 ; D01A001C 00023C00 v_cmp_eq_f32_e64 s[22:23], 4.0, v11 ; D0040016 000216F6 v_cmp_eq_f32_e64 s[18:19], 2.0, v11 ; D0040012 000216F4 v_cmp_eq_f32_e64 s[14:15], 1.0, v11 ; D004000E 000216F2 v_cmp_eq_f32_e64 s[16:17], 0, v11 ; D0040010 00021680 v_cmp_neq_f32_e64 s[24:25], s0, v28 ; D01A0018 00023800 v_cmp_neq_f32_e64 s[26:27], s0, v12 ; D01A001A 00021800 v_cmp_eq_f32_e64 s[20:21], 4.0, v21 ; D0040014 00022AF6 v_cmp_eq_f32_e64 s[12:13], 2.0, v21 ; D004000C 00022AF4 v_cmp_eq_f32_e64 s[8:9], 1.0, v21 ; D0040008 00022AF2 v_cmp_eq_f32_e64 s[10:11], 0, v21 ; D004000A 00022A80 v_cmp_eq_f32_e32 vcc, 4.0, v14 ; 7C041CF6 v_subrev_f32_e32 v15, s43, v20 ; 0A1E282B v_mul_f32_e32 v34, v15, v15 ; 10441F0F v_cmp_eq_f32_e64 s[0:1], 2.0, v14 ; D0040000 00021CF4 v_mov_b32_e32 v15, 0xbe4ccccd ; 7E1E02FF BE4CCCCD v_mad_f32 v17, v33, v16, v15 ; D2820011 043E2121 v_mad_f32 v16, v33, v32, v15 ; D2820010 043E4121 v_mac_f32_e32 v15, v33, v31 ; 3E1E3F21 v_subrev_f32_e32 v31, s41, v27 ; 0A3E3629 v_mac_f32_e32 v34, v31, v31 ; 3E443F1F v_subrev_f32_e32 v31, s42, v13 ; 0A3E1A2A v_mac_f32_e32 v34, v31, v31 ; 3E443F1F v_mul_f32_e32 v31, s44, v34 ; 103E442C v_log_f32_e32 v31, v31 ; 7E3E4F1F v_cmp_eq_f32_e64 s[2:3], 1.0, v14 ; D0040002 00021CF2 v_cndmask_b32_e64 v32, v26, v29, s[28:29] ; D2000020 00723B1A v_rcp_f32_e32 v32, v32 ; 7E405520 v_mul_f32_e32 v31, 0x3f317218, v31 ; 103E3EFF 3F317218 v_mov_b32_e32 v33, 0x3b000000 ; 7E4202FF 3B000000 v_mov_b32_e32 v34, 0x3b800000 ; 7E4402FF 3B800000 v_cndmask_b32_e64 v35, v34, v33, s[28:29] ; D2000023 00724322 v_mul_f32_e32 v36, v32, v20 ; 10482920 v_floor_f32_e32 v36, v36 ; 7E484924 v_mad_f32 v36, v20, v32, -v36 ; D2820024 84924114 v_mul_f32_e32 v37, v32, v27 ; 104A3720 v_floor_f32_e32 v37, v37 ; 7E4A4925 v_mad_f32 v37, v27, v32, -v37 ; D2820025 8496411B v_add_f32_e64 v38, s39, s39 ; D2060026 00004E27 v_mad_f32 v39, -v38, v35, 1.0 ; D2820027 23CA4726 v_mul_f32_e32 v35, s39, v35 ; 10464627 v_mul_f32_e32 v40, v32, v13 ; 10501B20 v_floor_f32_e32 v40, v40 ; 7E504928 v_mad_f32 v32, v13, v32, -v40 ; D2820020 84A2410D v_mad_f32 v36, v39, v36, v35 ; D2820024 048E4927 v_mad_f32 v37, v39, v37, v35 ; D2820025 048E4B27 v_mac_f32_e32 v35, v39, v32 ; 3E464127 v_mul_f32_e32 v41, s40, v31 ; 10523E28 v_mad_f32 v39, v30, v36, v24 ; D2820027 0462491E v_mad_f32 v40, v30, v37, v25 ; D2820028 04664B1E v_cndmask_b32_e64 v31, v26, v29, s[24:25] ; D200001F 00623B1A v_rcp_f32_e32 v31, v31 ; 7E3E551F v_mac_f32_e32 v24, v30, v35 ; 3E30471E v_mov_b32_e32 v42, v24 ; 7E540318 v_mov_b32_e32 v43, v25 ; 7E560319 v_mov_b32_e32 v44, v26 ; 7E58031A v_mov_b32_e32 v45, v27 ; 7E5A031B v_mac_f32_e32 v25, v30, v36 ; 3E32491E v_mul_f32_e32 v30, v31, v20 ; 103C291F v_floor_f32_e32 v30, v30 ; 7E3C491E v_mad_f32 v30, v20, v31, -v30 ; D282001E 847A3F14 v_mul_f32_e32 v32, v31, v27 ; 1040371F v_floor_f32_e32 v32, v32 ; 7E404920 v_mad_f32 v32, v27, v31, -v32 ; D2820020 84823F1B v_mul_f32_e32 v35, v31, v13 ; 10461B1F v_floor_f32_e32 v35, v35 ; 7E464923 v_mad_f32 v31, v13, v31, -v35 ; D282001F 848E3F0D v_cndmask_b32_e64 v29, v26, v29, s[26:27] ; D200001D 006A3B1A v_cndmask_b32_e64 v26, v34, v33, s[24:25] ; D200001A 00624322 v_cndmask_b32_e64 v33, v34, v33, s[26:27] ; D2000021 006A4322 v_mov_b32_e32 v43, v40 ; 7E560328 s_load_dwordx4 s[24:27], s[4:5], 0x0 ; C08C0500 s_load_dwordx4 s[40:43], s[4:5], 0x4 ; C0940504 s_load_dwordx4 s[60:63], s[4:5], 0x8 ; C09E0508 s_load_dwordx4 s[72:75], s[4:5], 0xc ; C0A4050C s_load_dwordx4 s[84:87], s[4:5], 0x10 ; C0AA0510 s_load_dwordx8 s[88:95], s[6:7], 0x20 ; C0EC0720 s_load_dwordx8 s[76:83], s[6:7], 0x18 ; C0E60718 s_load_dwordx8 s[64:71], s[6:7], 0x10 ; C0E00710 s_load_dwordx8 s[52:59], s[6:7], 0x8 ; C0DA0708 s_load_dwordx8 s[44:51], s[6:7], 0x0 ; C0D60700 v_mad_f32 v34, -v38, v26, 1.0 ; D2820022 23CA3526 v_mul_f32_e32 v35, s39, v26 ; 10463427 v_mad_f32 v30, v34, v30, v35 ; D282001E 048E3D22 v_mad_f32 v32, v34, v32, v35 ; D2820020 048E4122 v_mac_f32_e32 v35, v34, v31 ; 3E463F22 v_mov_b32_e32 v44, v41 ; 7E580329 v_mov_b32_e32 v26, v41 ; 7E340329 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[46:48], 7, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[88:95], s[84:87] ; F0900700 02B62E27 image_sample_l v[49:51], 7, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[76:83], s[72:75] ; F0900700 02533127 image_sample_l v[52:54], 7, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[64:71], s[60:63] ; F0900700 01F03427 image_sample_l v[55:57], 7, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[52:59], s[40:43] ; F0900700 014D3727 image_sample_l v[58:60], 7, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[44:51], s[24:27] ; F0900700 00CB3A27 v_mad_f32 v39, v28, v30, v22 ; D2820027 045A3D1C v_mad_f32 v40, v28, v32, v23 ; D2820028 045E411C v_rcp_f32_e32 v29, v29 ; 7E3A551D v_mac_f32_e32 v22, v28, v35 ; 3E2C471C v_mul_f32_e32 v31, s39, v33 ; 103E4227 v_mov_b32_e32 v34, v22 ; 7E440316 v_mov_b32_e32 v35, v23 ; 7E460317 v_mov_b32_e32 v36, v24 ; 7E480318 v_mov_b32_e32 v37, v25 ; 7E4A0319 v_mac_f32_e32 v23, v28, v30 ; 3E2E3D1C v_mul_f32_e32 v28, v29, v20 ; 1038291D v_floor_f32_e32 v28, v28 ; 7E38491C v_mad_f32 v20, v20, v29, -v28 ; D2820014 84723B14 v_mul_f32_e32 v28, v29, v27 ; 1038371D v_floor_f32_e32 v28, v28 ; 7E38491C v_mad_f32 v27, v27, v29, -v28 ; D282001B 84723B1B v_mul_f32_e32 v28, v29, v13 ; 10381B1D v_floor_f32_e32 v28, v28 ; 7E38491C v_mad_f32 v13, v13, v29, -v28 ; D282000D 84723B0D v_mad_f32 v28, -v38, v33, 1.0 ; D282001C 23CA4326 v_mad_f32 v20, v28, v20, v31 ; D2820014 047E291C v_mad_f32 v27, v28, v27, v31 ; D282001B 047E371C v_mac_f32_e32 v31, v28, v13 ; 3E3E1B1C image_sample_l v[28:30], 7, 0, 0, 0, 0, 0, 0, 0, v[42:45], s[88:95], s[84:87] ; F0900700 02B61C2A image_sample_l v[61:63], 7, 0, 0, 0, 0, 0, 0, 0, v[42:45], s[76:83], s[72:75] ; F0900700 02533D2A image_sample_l v[64:66], 7, 0, 0, 0, 0, 0, 0, 0, v[42:45], s[64:71], s[60:63] ; F0900700 01F0402A image_sample_l v[67:69], 7, 0, 0, 0, 0, 0, 0, 0, v[42:45], s[52:59], s[40:43] ; F0900700 014D432A image_sample_l v[42:44], 7, 0, 0, 0, 0, 0, 0, 0, v[42:45], s[44:51], s[24:27] ; F0900700 00CB2A2A v_mov_b32_e32 v35, v40 ; 7E460328 image_sample_l v[70:72], 7, 0, 0, 0, 0, 0, 0, 0, v[24:27], s[88:95], s[84:87] ; F0900700 02B64618 image_sample_l v[73:75], 7, 0, 0, 0, 0, 0, 0, 0, v[24:27], s[76:83], s[72:75] ; F0900700 02534918 image_sample_l v[76:78], 7, 0, 0, 0, 0, 0, 0, 0, v[24:27], s[64:71], s[60:63] ; F0900700 01F04C18 image_sample_l v[79:81], 7, 0, 0, 0, 0, 0, 0, 0, v[24:27], s[52:59], s[40:43] ; F0900700 014D4F18 image_sample_l v[82:84], 7, 0, 0, 0, 0, 0, 0, 0, v[24:27], s[44:51], s[24:27] ; F0900700 00CB5218 v_mov_b32_e32 v36, v41 ; 7E480329 v_mov_b32_e32 v24, v41 ; 7E300329 s_waitcnt vmcnt(5) ; BF8C0775 image_sample_l v[85:87], 7, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[88:95], s[84:87] ; F0900700 02B65527 image_sample_l v[88:90], 7, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[76:83], s[72:75] ; F0900700 02535827 image_sample_l v[91:93], 7, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[64:71], s[60:63] ; F0900700 01F05B27 image_sample_l v[94:96], 7, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[52:59], s[40:43] ; F0900700 014D5E27 image_sample_l v[97:99], 7, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[44:51], s[24:27] ; F0900700 00CB6127 v_mad_f32 v39, v12, v20, v18 ; D2820027 044A290C v_mad_f32 v40, v12, v27, v19 ; D2820028 044E370C v_mac_f32_e32 v18, v12, v31 ; 3E243F0C v_mov_b32_e32 v100, v18 ; 7EC80312 v_mov_b32_e32 v101, v19 ; 7ECA0313 v_mov_b32_e32 v102, v20 ; 7ECC0314 v_mov_b32_e32 v103, v21 ; 7ECE0315 v_mac_f32_e32 v19, v12, v20 ; 3E26290C image_sample_l v[25:27], 7, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[88:95], s[84:87] ; F0900700 02B61922 image_sample_l v[31:33], 7, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[76:83], s[72:75] ; F0900700 02531F22 image_sample_l v[104:106], 7, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[64:71], s[60:63] ; F0900700 01F06822 image_sample_l v[107:109], 7, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[52:59], s[40:43] ; F0900700 014D6B22 image_sample_l v[34:36], 7, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[44:51], s[24:27] ; F0900700 00CB2222 v_mov_b32_e32 v101, v40 ; 7ECA0328 s_waitcnt vmcnt(4) ; BF8C0774 image_sample_l v[110:112], 7, 0, 0, 0, 0, 0, 0, 0, v[22:25], s[88:95], s[84:87] ; F0900700 02B66E16 image_sample_l v[113:115], 7, 0, 0, 0, 0, 0, 0, 0, v[22:25], s[76:83], s[72:75] ; F0900700 02537116 image_sample_l v[116:118], 7, 0, 0, 0, 0, 0, 0, 0, v[22:25], s[64:71], s[60:63] ; F0900700 01F07416 image_sample_l v[119:121], 7, 0, 0, 0, 0, 0, 0, 0, v[22:25], s[52:59], s[40:43] ; F0900700 014D7716 image_sample_l v[22:24], 7, 0, 0, 0, 0, 0, 0, 0, v[22:25], s[44:51], s[24:27] ; F0900700 00CB1616 v_mov_b32_e32 v102, v41 ; 7ECC0329 v_mov_b32_e32 v20, v41 ; 7E280329 image_sample_l v[122:124], 7, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[88:95], s[84:87] ; F0900700 02B67A27 image_sample_l v[125:127], 7, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[76:83], s[72:75] ; F0900700 02537D27 image_sample_l v[128:130], 7, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[64:71], s[60:63] ; F0900700 01F08027 image_sample_l v[131:133], 7, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[52:59], s[40:43] ; F0900700 014D8327 image_sample_l v[37:39], 7, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[44:51], s[24:27] ; F0900700 00CB2527 image_sample_l v[134:136], 7, 0, 0, 0, 0, 0, 0, 0, v[100:103], s[88:95], s[84:87] ; F0900700 02B68664 image_sample_l v[137:139], 7, 0, 0, 0, 0, 0, 0, 0, v[100:103], s[76:83], s[72:75] ; F0900700 02538964 image_sample_l v[140:142], 7, 0, 0, 0, 0, 0, 0, 0, v[100:103], s[64:71], s[60:63] ; F0900700 01F08C64 image_sample_l v[143:145], 7, 0, 0, 0, 0, 0, 0, 0, v[100:103], s[52:59], s[40:43] ; F0900700 014D8F64 image_sample_l v[100:102], 7, 0, 0, 0, 0, 0, 0, 0, v[100:103], s[44:51], s[24:27] ; F0900700 00CB6464 image_sample_l v[146:148], 7, 0, 0, 0, 0, 0, 0, 0, v[18:21], s[88:95], s[84:87] ; F0900700 02B69212 image_sample_l v[149:151], 7, 0, 0, 0, 0, 0, 0, 0, v[18:21], s[76:83], s[72:75] ; F0900700 02539512 image_sample_l v[152:154], 7, 0, 0, 0, 0, 0, 0, 0, v[18:21], s[64:71], s[60:63] ; F0900700 01F09812 v_cndmask_b32_e64 v12, 0, 1.0, s[22:23] ; D200000C 0059E480 v_mov_b32_e32 v13, 0x40400000 ; 7E1A02FF 40400000 v_cmp_eq_f32_e64 s[22:23], v11, v13 ; D0040016 00021B0B v_cndmask_b32_e64 v11, 0, 1.0, s[22:23] ; D200000B 0059E480 v_cndmask_b32_e64 v40, 0, 1.0, s[18:19] ; D2000028 0049E480 v_cndmask_b32_e64 v41, 0, 1.0, s[14:15] ; D2000029 0039E480 v_cndmask_b32_e64 v45, 0, 1.0, s[16:17] ; D200002D 0041E480 v_cndmask_b32_e64 v103, 0, 1.0, s[20:21] ; D2000067 0051E480 v_cmp_eq_f32_e64 s[14:15], v21, v13 ; D004000E 00021B15 v_cndmask_b32_e64 v21, 0, 1.0, s[14:15] ; D2000015 0039E480 v_cndmask_b32_e64 v155, 0, 1.0, s[12:13] ; D200009B 0031E480 v_cndmask_b32_e64 v156, 0, 1.0, s[8:9] ; D200009C 0021E480 v_cndmask_b32_e64 v157, 0, 1.0, s[10:11] ; D200009D 0029E480 v_cmp_eq_f32_e64 s[8:9], 0, v14 ; D0040008 00021C80 v_cndmask_b32_e64 v158, 0, 1.0, s[8:9] ; D200009E 0021E480 v_mul_f32_e32 v58, v45, v58 ; 1074752D v_mul_f32_e32 v59, v45, v59 ; 1076772D v_mul_f32_e32 v60, v45, v60 ; 1078792D v_mac_f32_e32 v58, v41, v55 ; 3E746F29 v_mac_f32_e32 v59, v41, v56 ; 3E767129 v_mac_f32_e32 v60, v41, v57 ; 3E787329 v_mul_f32_e32 v42, v45, v42 ; 1054552D v_mul_f32_e32 v43, v45, v43 ; 1056572D v_mul_f32_e32 v44, v45, v44 ; 1058592D v_mac_f32_e32 v42, v41, v67 ; 3E548729 v_mac_f32_e32 v43, v41, v68 ; 3E568929 v_mac_f32_e32 v44, v41, v69 ; 3E588B29 v_mul_f32_e32 v55, v45, v82 ; 106EA52D v_mul_f32_e32 v56, v45, v83 ; 1070A72D v_mul_f32_e32 v45, v45, v84 ; 105AA92D v_mac_f32_e32 v55, v41, v79 ; 3E6E9F29 v_mac_f32_e32 v56, v41, v80 ; 3E70A129 v_mac_f32_e32 v45, v41, v81 ; 3E5AA329 v_mac_f32_e32 v58, v40, v52 ; 3E746928 v_mac_f32_e32 v59, v40, v53 ; 3E766B28 v_mac_f32_e32 v60, v40, v54 ; 3E786D28 v_mac_f32_e32 v42, v40, v64 ; 3E548128 v_mac_f32_e32 v43, v40, v65 ; 3E568328 v_mac_f32_e32 v44, v40, v66 ; 3E588528 v_mac_f32_e32 v55, v40, v76 ; 3E6E9928 v_mac_f32_e32 v56, v40, v77 ; 3E709B28 v_mac_f32_e32 v45, v40, v78 ; 3E5A9D28 v_mac_f32_e32 v58, v11, v49 ; 3E74630B v_mac_f32_e32 v59, v11, v50 ; 3E76650B v_mac_f32_e32 v60, v11, v51 ; 3E78670B v_mac_f32_e32 v42, v11, v61 ; 3E547B0B v_mac_f32_e32 v43, v11, v62 ; 3E567D0B v_mac_f32_e32 v44, v11, v63 ; 3E587F0B v_mac_f32_e32 v55, v11, v73 ; 3E6E930B v_mac_f32_e32 v56, v11, v74 ; 3E70950B v_mac_f32_e32 v45, v11, v75 ; 3E5A970B v_mac_f32_e32 v58, v12, v46 ; 3E745D0C v_mac_f32_e32 v59, v12, v47 ; 3E765F0C v_mac_f32_e32 v60, v12, v48 ; 3E78610C v_mac_f32_e32 v42, v12, v28 ; 3E54390C v_mac_f32_e32 v43, v12, v29 ; 3E563B0C v_mac_f32_e32 v44, v12, v30 ; 3E583D0C v_mac_f32_e32 v55, v12, v70 ; 3E6E8D0C v_mac_f32_e32 v56, v12, v71 ; 3E708F0C v_mac_f32_e32 v45, v12, v72 ; 3E5A910C v_mul_f32_e32 v11, v157, v97 ; 1016C39D v_mul_f32_e32 v12, v157, v98 ; 1018C59D v_mul_f32_e32 v28, v157, v99 ; 1038C79D v_mac_f32_e32 v11, v156, v94 ; 3E16BD9C v_mac_f32_e32 v12, v156, v95 ; 3E18BF9C v_mac_f32_e32 v28, v156, v96 ; 3E38C19C s_waitcnt ; BF8C077F v_mul_f32_e32 v29, v157, v34 ; 103A459D v_mul_f32_e32 v30, v157, v35 ; 103C479D v_mul_f32_e32 v34, v157, v36 ; 1044499D v_mac_f32_e32 v29, v156, v107 ; 3E3AD79C v_mac_f32_e32 v30, v156, v108 ; 3E3CD99C v_mac_f32_e32 v34, v156, v109 ; 3E44DB9C s_waitcnt vmcnt(13) ; BF8C077D v_mul_f32_e32 v22, v157, v22 ; 102C2D9D v_mul_f32_e32 v23, v157, v23 ; 102E2F9D v_mul_f32_e32 v24, v157, v24 ; 1030319D v_mac_f32_e32 v22, v156, v119 ; 3E2CEF9C v_mac_f32_e32 v23, v156, v120 ; 3E2EF19C v_mac_f32_e32 v24, v156, v121 ; 3E30F39C v_mac_f32_e32 v11, v155, v91 ; 3E16B79B v_mac_f32_e32 v12, v155, v92 ; 3E18B99B v_mac_f32_e32 v28, v155, v93 ; 3E38BB9B v_mac_f32_e32 v29, v155, v104 ; 3E3AD19B v_mac_f32_e32 v30, v155, v105 ; 3E3CD39B v_mac_f32_e32 v34, v155, v106 ; 3E44D59B v_mac_f32_e32 v22, v155, v116 ; 3E2CE99B v_mac_f32_e32 v23, v155, v117 ; 3E2EEB9B v_mac_f32_e32 v24, v155, v118 ; 3E30ED9B v_mac_f32_e32 v11, v21, v88 ; 3E16B115 v_mac_f32_e32 v12, v21, v89 ; 3E18B315 v_mac_f32_e32 v28, v21, v90 ; 3E38B515 v_mac_f32_e32 v29, v21, v31 ; 3E3A3F15 v_mac_f32_e32 v30, v21, v32 ; 3E3C4115 v_mac_f32_e32 v34, v21, v33 ; 3E444315 v_mac_f32_e32 v22, v21, v113 ; 3E2CE315 v_mac_f32_e32 v23, v21, v114 ; 3E2EE515 v_mac_f32_e32 v24, v21, v115 ; 3E30E715 v_mac_f32_e32 v11, v103, v85 ; 3E16AB67 v_mac_f32_e32 v12, v103, v86 ; 3E18AD67 v_mac_f32_e32 v28, v103, v87 ; 3E38AF67 v_mac_f32_e32 v29, v103, v25 ; 3E3A3367 v_mac_f32_e32 v30, v103, v26 ; 3E3C3567 v_mac_f32_e32 v34, v103, v27 ; 3E443767 v_mac_f32_e32 v22, v103, v110 ; 3E2CDD67 v_mac_f32_e32 v23, v103, v111 ; 3E2EDF67 v_mac_f32_e32 v24, v103, v112 ; 3E30E167 s_waitcnt vmcnt(8) ; BF8C0778 v_mul_f32_e32 v21, v158, v37 ; 102A4B9E v_mul_f32_e32 v25, v158, v38 ; 10324D9E v_mul_f32_e32 v26, v158, v39 ; 10344F9E v_cndmask_b32_e64 v27, 0, 1.0, s[2:3] ; D200001B 0009E480 v_mac_f32_e32 v21, v27, v131 ; 3E2B071B v_mac_f32_e32 v25, v27, v132 ; 3E33091B v_mac_f32_e32 v26, v27, v133 ; 3E350B1B s_waitcnt vmcnt(3) ; BF8C0773 v_mul_f32_e32 v31, v158, v100 ; 103EC99E v_mul_f32_e32 v32, v158, v101 ; 1040CB9E v_mul_f32_e32 v33, v158, v102 ; 1042CD9E v_mac_f32_e32 v31, v27, v143 ; 3E3F1F1B v_mac_f32_e32 v32, v27, v144 ; 3E41211B v_mac_f32_e32 v33, v27, v145 ; 3E43231B image_sample_l v[35:37], 7, 0, 0, 0, 0, 0, 0, 0, v[18:21], s[52:59], s[40:43] ; F0900700 014D2312 image_sample_l v[18:20], 7, 0, 0, 0, 0, 0, 0, 0, v[18:21], s[44:51], s[24:27] ; F0900700 00CB1212 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v18, v158, v18 ; 1024259E v_mul_f32_e32 v19, v158, v19 ; 1026279E v_mul_f32_e32 v20, v158, v20 ; 1028299E v_mac_f32_e32 v18, v27, v35 ; 3E24471B v_mac_f32_e32 v19, v27, v36 ; 3E26491B v_mac_f32_e32 v20, v27, v37 ; 3E284B1B v_cndmask_b32_e64 v27, 0, 1.0, s[0:1] ; D200001B 0001E480 v_mac_f32_e32 v21, v27, v128 ; 3E2B011B v_mac_f32_e32 v25, v27, v129 ; 3E33031B v_mac_f32_e32 v26, v27, v130 ; 3E35051B v_mac_f32_e32 v31, v27, v140 ; 3E3F191B v_mac_f32_e32 v32, v27, v141 ; 3E411B1B v_mac_f32_e32 v33, v27, v142 ; 3E431D1B v_mac_f32_e32 v18, v27, v152 ; 3E25311B v_mac_f32_e32 v19, v27, v153 ; 3E27331B v_mac_f32_e32 v20, v27, v154 ; 3E29351B v_cmp_eq_f32_e64 s[0:1], v14, v13 ; D0040000 00021B0E v_cndmask_b32_e64 v13, 0, 1.0, s[0:1] ; D200000D 0001E480 v_mac_f32_e32 v21, v13, v125 ; 3E2AFB0D v_mac_f32_e32 v25, v13, v126 ; 3E32FD0D v_mac_f32_e32 v26, v13, v127 ; 3E34FF0D v_mac_f32_e32 v31, v13, v137 ; 3E3F130D v_mac_f32_e32 v32, v13, v138 ; 3E41150D v_mac_f32_e32 v33, v13, v139 ; 3E43170D v_mac_f32_e32 v18, v13, v149 ; 3E252B0D v_mac_f32_e32 v19, v13, v150 ; 3E272D0D v_mac_f32_e32 v20, v13, v151 ; 3E292F0D v_cndmask_b32_e64 v13, 0, 1.0, vcc ; D200000D 01A9E480 v_mac_f32_e32 v21, v13, v122 ; 3E2AF50D v_mac_f32_e32 v25, v13, v123 ; 3E32F70D v_mac_f32_e32 v26, v13, v124 ; 3E34F90D v_mac_f32_e32 v31, v13, v134 ; 3E3F0D0D v_mac_f32_e32 v32, v13, v135 ; 3E410F0D v_mac_f32_e32 v33, v13, v136 ; 3E43110D v_mov_b32_e32 v14, 0x6f800000 ; 7E1C02FF 6F800000 v_cmp_gt_f32_e64 vcc, |v10|, v14 ; D008016A 00021D0A v_mov_b32_e32 v14, 0x2f800000 ; 7E1C02FF 2F800000 v_cndmask_b32_e32 v14, 1.0, v14 ; 001C1CF2 v_mul_f32_e32 v10, v14, v10 ; 1014150E v_rcp_f32_e32 v10, v10 ; 7E14550A v_mac_f32_e32 v18, v13, v146 ; 3E25250D v_mac_f32_e32 v19, v13, v147 ; 3E27270D v_mac_f32_e32 v20, v13, v148 ; 3E29290D v_mul_f32_e32 v6, v10, v6 ; 100C0D0A v_mul_f32_e32 v7, v10, v7 ; 100E0F0A v_mul_f32_e32 v35, v6, v14 ; 10461D06 v_mul_f32_e32 v36, v7, v14 ; 10481D07 s_load_dwordx4 s[0:3], s[4:5], 0x14 ; C0800514 s_load_dwordx8 s[4:11], s[6:7], 0x28 ; C0C20728 v_mov_b32_e32 v6, 0x40e00000 ; 7E0C02FF 40E00000 v_mul_f32_e32 v7, v6, v17 ; 100E2306 v_mul_f32_e32 v10, v6, v16 ; 10142106 v_mul_f32_e32 v6, v6, v15 ; 100C1F06 v_max_f32_e32 v7, 0, v7 ; 200E0E80 v_max_f32_e32 v10, 0, v10 ; 20141480 v_max_f32_e32 v6, 0, v6 ; 200C0C80 v_add_f32_e32 v13, v10, v7 ; 061A0F0A v_add_f32_e32 v13, v6, v13 ; 061A1B06 v_rcp_f32_e32 v13, v13 ; 7E1A550D s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[14:17], 15, 0, 0, 0, 0, 0, 0, 0, v[35:36], s[4:11], s[0:3] ; F0800F00 00010E23 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v14, v14 ; 7E1C4F0E v_log_f32_e32 v15, v15 ; 7E1E4F0F v_log_f32_e32 v16, v16 ; 7E204F10 v_log_f32_e32 v17, v17 ; 7E224F11 v_mul_f32_e32 v7, v13, v7 ; 100E0F0D v_mul_f32_e32 v6, v13, v6 ; 100C0D0D v_mul_f32_e32 v21, v6, v21 ; 102A2B06 v_mac_f32_e32 v21, v7, v31 ; 3E2A3F07 v_mul_f32_e32 v25, v6, v25 ; 10323306 v_mac_f32_e32 v25, v7, v32 ; 3E324107 v_mul_f32_e32 v26, v6, v26 ; 10343506 v_mac_f32_e32 v26, v7, v33 ; 3E344307 v_mul_f32_e32 v11, v6, v11 ; 10161706 v_mac_f32_e32 v11, v7, v29 ; 3E163B07 v_mul_f32_e32 v12, v6, v12 ; 10181906 v_mac_f32_e32 v12, v7, v30 ; 3E183D07 v_mul_f32_e32 v27, v6, v28 ; 10363906 v_mac_f32_e32 v27, v7, v34 ; 3E364507 v_mul_f32_e32 v28, v6, v58 ; 10387506 v_mul_f32_e32 v29, v6, v59 ; 103A7706 v_mul_f32_e32 v6, v6, v60 ; 100C7906 v_mac_f32_e32 v28, v7, v42 ; 3E385507 v_mac_f32_e32 v29, v7, v43 ; 3E3A5707 v_mac_f32_e32 v6, v7, v44 ; 3E0C5907 v_mul_f32_e32 v7, v13, v10 ; 100E150D v_mac_f32_e32 v21, v7, v18 ; 3E2A2507 v_mac_f32_e32 v25, v7, v19 ; 3E322707 v_mac_f32_e32 v26, v7, v20 ; 3E342907 v_mac_f32_e32 v11, v7, v22 ; 3E162D07 v_mac_f32_e32 v12, v7, v23 ; 3E182F07 v_mac_f32_e32 v27, v7, v24 ; 3E363107 v_mac_f32_e32 v28, v7, v55 ; 3E386F07 v_mac_f32_e32 v29, v7, v56 ; 3E3A7107 v_mac_f32_e32 v6, v7, v45 ; 3E0C5B07 v_mul_f32_e32 v7, v28, v3 ; 100E071C v_mul_f32_e32 v10, v29, v3 ; 1014071D v_mul_f32_e32 v3, v6, v3 ; 10060706 v_mac_f32_e32 v7, v11, v4 ; 3E0E090B v_mac_f32_e32 v10, v12, v4 ; 3E14090C v_mac_f32_e32 v3, v27, v4 ; 3E06091B v_mac_f32_e32 v7, v21, v5 ; 3E0E0B15 v_mac_f32_e32 v10, v25, v5 ; 3E140B19 v_mac_f32_e32 v3, v26, v5 ; 3E060B1A v_subrev_f32_e32 v4, v14, v8 ; 0A08110E v_subrev_f32_e32 v5, v15, v9 ; 0A0A130F v_subrev_f32_e32 v0, v16, v0 ; 0A000110 v_add_f32_e64 v6, s38, s38 ; D2060006 00004C26 v_mov_b32_e32 v8, 0x3f28f5c3 ; 7E1002FF 3F28F5C3 v_mul_f32_e32 v9, v8, v4 ; 10120908 v_mul_f32_e32 v9, v9, v9 ; 10121309 v_mul_f32_e32 v6, v9, v6 ; 100C0D09 v_add_f32_e64 v9, s37, s37 ; D2060009 00004A25 v_mul_f32_e32 v11, v8, v5 ; 10160B08 v_mul_f32_e32 v11, v11, v11 ; 1016170B v_mul_f32_e32 v9, v11, v9 ; 1012130B v_mul_f32_e32 v8, v8, v0 ; 10100108 v_add_f32_e64 v11, s36, s36 ; D206000B 00004824 v_mul_f32_e32 v8, v8, v8 ; 10101108 v_mul_f32_e32 v8, v8, v11 ; 10101708 v_max_f32_e32 v6, s33, v6 ; 200C0C21 v_max_f32_e32 v9, s34, v9 ; 20121222 v_max_f32_e32 v8, s35, v8 ; 20101023 v_min_f32_e32 v6, 1.0, v6 ; 1E0C0CF2 v_min_f32_e32 v9, 1.0, v9 ; 1E1212F2 v_min_f32_e32 v8, 1.0, v8 ; 1E1010F2 v_add_f32_e64 v11, 0, -v17 clamp ; D206080B 40022280 v_mul_f32_e32 v2, v2, v11 ; 10041702 v_mul_f32_e32 v11, 0.5, v4 ; 101608F0 v_mul_f32_e32 v12, 0.5, v5 ; 10180AF0 v_mul_f32_e32 v13, 0.5, v0 ; 101A00F0 v_mul_f32_e32 v11, v11, v7 ; 10160F0B v_mul_f32_e32 v12, v12, v10 ; 1018150C v_mul_f32_e32 v13, v13, v3 ; 101A070D v_mac_f32_e32 v11, v2, v4 ; 3E160902 v_mac_f32_e32 v12, v2, v5 ; 3E180B02 v_mac_f32_e32 v13, v2, v0 ; 3E1A0102 v_mac_f32_e32 v11, v7, v6 ; 3E160D07 v_mac_f32_e32 v12, v10, v9 ; 3E18130A v_mac_f32_e32 v13, v3, v8 ; 3E1A1103 v_sub_f32_e32 v0, 1.0, v1 ; 080002F2 v_mul_f32_e32 v2, s32, v0 ; 10040020 v_mac_f32_e32 v2, v11, v1 ; 3E04030B v_mul_f32_e32 v3, s31, v0 ; 1006001F v_mac_f32_e32 v3, v12, v1 ; 3E06030C v_mul_f32_e32 v0, s30, v0 ; 1000001E v_mac_f32_e32 v0, v13, v1 ; 3E00030D v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 v_cvt_pkrtz_f16_f32_e64 v0, v0, 1.0 ; D25E0000 0001E500 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 104 VGPRS: 160 Code Size: 3032 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL CONST[0..4] DCL TEMP[0..1], LOCAL 0: MUL TEMP[0], CONST[1], IN[0].xxxx 1: MAD TEMP[0], CONST[2], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[3], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[4], IN[0].wwww, TEMP[0] 4: MAD TEMP[1].xy, IN[1].xyyy, CONST[0].xyyy, CONST[0].zwww 5: MOV OUT[1], TEMP[1] 6: MOV OUT[0], TEMP[0] 7: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %34 = load <16 x i8>, <16 x i8> addrspace(2)* %33, align 16, !tbaa !0 %35 = add i32 %5, %7 %36 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %34, i32 0, i32 %35) %37 = extractelement <4 x float> %36, i32 0 %38 = extractelement <4 x float> %36, i32 1 %39 = extractelement <4 x float> %36, i32 2 %40 = extractelement <4 x float> %36, i32 3 %41 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0 %43 = add i32 %5, %7 %44 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %43) %45 = extractelement <4 x float> %44, i32 0 %46 = extractelement <4 x float> %44, i32 1 %47 = fmul float %17, %37 %48 = fmul float %18, %37 %49 = fmul float %19, %37 %50 = fmul float %20, %37 %51 = fmul float %21, %38 %52 = fadd float %51, %47 %53 = fmul float %22, %38 %54 = fadd float %53, %48 %55 = fmul float %23, %38 %56 = fadd float %55, %49 %57 = fmul float %24, %38 %58 = fadd float %57, %50 %59 = fmul float %25, %39 %60 = fadd float %59, %52 %61 = fmul float %26, %39 %62 = fadd float %61, %54 %63 = fmul float %27, %39 %64 = fadd float %63, %56 %65 = fmul float %28, %39 %66 = fadd float %65, %58 %67 = fmul float %29, %40 %68 = fadd float %67, %60 %69 = fmul float %30, %40 %70 = fadd float %69, %62 %71 = fmul float %31, %40 %72 = fadd float %71, %64 %73 = fmul float %32, %40 %74 = fadd float %73, %66 %75 = fmul float %45, %13 %76 = fadd float %75, %15 %77 = fmul float %46, %14 %78 = fadd float %77, %16 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %76, float %78, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %68, float %70, float %72, float %74) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[0:3], 0x0 ; C2060100 buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[8:11], 0 idxen ; E00C2000 80020500 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_buffer_load_dword s5, s[0:3], 0x2 ; C2028102 s_buffer_load_dword s6, s[0:3], 0x3 ; C2030103 s_buffer_load_dword s7, s[0:3], 0x4 ; C2038104 s_buffer_load_dword s8, s[0:3], 0x5 ; C2040105 s_buffer_load_dword s9, s[0:3], 0x6 ; C2048106 s_buffer_load_dword s10, s[0:3], 0x7 ; C2050107 s_buffer_load_dword s11, s[0:3], 0x8 ; C2058108 s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v0, s5 ; 7E000205 v_mov_b32_e32 v7, s6 ; 7E0E0206 v_mac_f32_e32 v0, s12, v5 ; 3E000A0C v_mac_f32_e32 v7, s4, v6 ; 3E0E0C04 s_buffer_load_dword s4, s[0:3], 0xb ; C202010B s_buffer_load_dword s5, s[0:3], 0xc ; C202810C s_buffer_load_dword s6, s[0:3], 0xd ; C203010D s_buffer_load_dword s12, s[0:3], 0xe ; C206010E s_buffer_load_dword s15, s[0:3], 0xf ; C207810F s_buffer_load_dword s16, s[0:3], 0x10 ; C2080110 s_buffer_load_dword s17, s[0:3], 0x11 ; C2088111 s_buffer_load_dword s18, s[0:3], 0x12 ; C2090112 s_buffer_load_dword s0, s[0:3], 0x13 ; C2000113 v_mul_f32_e32 v5, s7, v1 ; 100A0207 v_mac_f32_e32 v5, s11, v2 ; 3E0A040B v_mul_f32_e32 v6, s8, v1 ; 100C0208 v_mac_f32_e32 v6, s13, v2 ; 3E0C040D v_mul_f32_e32 v8, s9, v1 ; 10100209 v_mac_f32_e32 v8, s14, v2 ; 3E10040E v_mul_f32_e32 v1, s10, v1 ; 1002020A s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v1, s4, v2 ; 3E020404 v_mac_f32_e32 v5, s5, v3 ; 3E0A0605 v_mac_f32_e32 v6, s6, v3 ; 3E0C0606 v_mac_f32_e32 v8, s12, v3 ; 3E10060C v_mac_f32_e32 v1, s15, v3 ; 3E02060F v_mac_f32_e32 v5, s16, v4 ; 3E0A0810 v_mac_f32_e32 v6, s17, v4 ; 3E0C0811 v_mac_f32_e32 v8, s18, v4 ; 3E100812 v_mac_f32_e32 v1, s0, v4 ; 3E020800 v_mov_b32_e32 v2, 0 ; 7E040280 exp 15, 32, 0, 0, 0, v0, v7, v2, v2 ; F800020F 02020700 exp 15, 12, 0, 1, 0, v5, v6, v8, v1 ; F80008CF 01080605 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 228 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL CONST[0] DCL CONST[3..6] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 0.2813} IMM[1] FLT32 { 1.0000, 0.5000, 255.0000, 0.0039} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0].x, TEMP[0], SAMP[0], 2D 2: MAD TEMP[0].x, CONST[0].xxxx, TEMP[0].xxxx, CONST[0].yyyy 3: RCP TEMP[0].x, TEMP[0].xxxx 4: MOV TEMP[1].xy, IN[0].xyyy 5: TEX TEMP[1], TEMP[1], SAMP[1], 2D 6: MAD TEMP[1].xyz, TEMP[1], IMM[0].xxxx, IMM[0].yyyy 7: MUL TEMP[2].xyz, CONST[3].xyzz, TEMP[1].xxxx 8: MAD TEMP[2].xyz, CONST[4].xyzz, TEMP[1].yyyy, TEMP[2].xyzz 9: MAD TEMP[1].xyz, CONST[5].xyzz, TEMP[1].zzzz, TEMP[2].xyzz 10: MOV TEMP[2].z, -TEMP[1].zzzz 11: FSLT TEMP[3].x, TEMP[0].xxxx, IMM[0].zzzz 12: UIF TEMP[3].xxxx :0 13: ADD TEMP[2].x, TEMP[2].zzzz, IMM[1].xxxx 14: RCP TEMP[2].x, TEMP[2].xxxx 15: MUL TEMP[1].xy, TEMP[1].xyyy, TEMP[2].xxxx 16: MAD TEMP[1].xy, IMM[0].wwww, TEMP[1].xyyy, IMM[1].yyyy 17: MUL TEMP[0].xy, IMM[1].xzzz, TEMP[0].xxxx 18: FRC TEMP[0].xy, TEMP[0].xyyy 19: MOV TEMP[2].y, TEMP[0].yyyy 20: MUL TEMP[3].x, TEMP[0].yyyy, IMM[1].wwww 21: ADD TEMP[2].x, TEMP[0].xxxx, -TEMP[3].xxxx 22: MOV TEMP[1].zw, TEMP[2].yyxy 23: MOV TEMP[0], TEMP[1] 24: ELSE :0 25: MOV TEMP[0], IMM[1].yyxx 26: ENDIF 27: MOV OUT[0], TEMP[0] 28: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %27 = load <32 x i8>, <32 x i8> addrspace(2)* %26, align 32, !tbaa !0 %28 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %29 = load <16 x i8>, <16 x i8> addrspace(2)* %28, align 16, !tbaa !0 %30 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %31 = bitcast <8 x i32> addrspace(2)* %30 to <32 x i8> addrspace(2)* %32 = load <32 x i8>, <32 x i8> addrspace(2)* %31, align 32, !tbaa !0 %33 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %34 = bitcast <4 x i32> addrspace(2)* %33 to <16 x i8> addrspace(2)* %35 = load <16 x i8>, <16 x i8> addrspace(2)* %34, align 16, !tbaa !0 %36 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %37 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %38 = bitcast float %36 to i32 %39 = bitcast float %37 to i32 %40 = insertelement <2 x i32> undef, i32 %38, i32 0 %41 = insertelement <2 x i32> %40, i32 %39, i32 1 %42 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %41, <32 x i8> %27, <16 x i8> %29, i32 2) %43 = extractelement <4 x float> %42, i32 0 %44 = fmul float %24, %43 %45 = fadd float %44, %25 %46 = fdiv float 1.000000e+00, %45 %47 = bitcast float %36 to i32 %48 = bitcast float %37 to i32 %49 = insertelement <2 x i32> undef, i32 %47, i32 0 %50 = insertelement <2 x i32> %49, i32 %48, i32 1 %51 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %50, <32 x i8> %32, <16 x i8> %35, i32 2) %52 = extractelement <4 x float> %51, i32 0 %53 = extractelement <4 x float> %51, i32 1 %54 = extractelement <4 x float> %51, i32 2 %55 = fmul float %52, 2.000000e+00 %56 = fadd float %55, -1.000000e+00 %57 = fmul float %53, 2.000000e+00 %58 = fadd float %57, -1.000000e+00 %59 = fmul float %54, 2.000000e+00 %60 = fadd float %59, -1.000000e+00 %61 = fcmp olt float %46, 0x3FEFFFDFC0000000 br i1 %61, label %IF, label %ENDIF IF: ; preds = %main_body %62 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %63 = fmul float %62, %60 %64 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %65 = fmul float %64, %58 %66 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %67 = fmul float %66, %56 %68 = fadd float %65, %67 %69 = fadd float %63, %68 %70 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %71 = fmul float %70, %60 %72 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %73 = fmul float %72, %58 %74 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %75 = fmul float %74, %56 %76 = fadd float %73, %75 %77 = fadd float %71, %76 %78 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %79 = fmul float %78, %60 %80 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %81 = fmul float %80, %58 %82 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %83 = fmul float %82, %56 %84 = fadd float %81, %83 %85 = fadd float %79, %84 %86 = fsub float 1.000000e+00, %69 %87 = fdiv float 1.000000e+00, %86 %88 = fmul float %85, %87 %89 = fmul float %77, %87 %90 = fmul float %88, 0x3FD20033A0000000 %91 = fadd float %90, 5.000000e-01 %92 = fmul float %89, 0x3FD20033A0000000 %93 = fadd float %92, 5.000000e-01 %94 = fmul float %46, 2.550000e+02 %95 = call float @llvm.floor.f32(float %46) %96 = fsub float %46, %95 %97 = call float @llvm.floor.f32(float %94) %98 = fsub float %94, %97 %99 = fmul float %98, 0x3F70101020000000 %100 = fsub float %96, %99 br label %ENDIF ENDIF: ; preds = %main_body, %IF %temp.0 = phi float [ %91, %IF ], [ 5.000000e-01, %main_body ] %temp1.0 = phi float [ %93, %IF ], [ 5.000000e-01, %main_body ] %temp2.0 = phi float [ %100, %IF ], [ 1.000000e+00, %main_body ] %temp3.0 = phi float [ %98, %IF ], [ 1.000000e+00, %main_body ] %101 = call i32 @llvm.SI.packf16(float %temp.0, float %temp1.0) %102 = bitcast i32 %101 to float %103 = call i32 @llvm.SI.packf16(float %temp2.0, float %temp3.0) %104 = bitcast i32 %103 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %102, float %104, float %102, float %104) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.floor.f32(float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_mov_b32_e32 v6, 0x3f7ffefe ; 7E0C02FF 3F7FFEFE s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 v_mov_b32_e32 v3, 1.0 ; 7E0602F2 v_mov_b32_e32 v2, 0.5 ; 7E0402F0 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s20, s[0:3], 0x1 ; C20A0101 s_buffer_load_dword s21, s[0:3], 0x0 ; C20A8100 v_interp_p1_f32 v4, v0, 0, 0, [m0] ; C8100000 v_interp_p2_f32 v4, [v4], v1, 0, 0, [m0] ; C8110001 v_interp_p1_f32 v5, v0, 1, 0, [m0] ; C8140100 v_interp_p2_f32 v5, [v5], v1, 1, 0, [m0] ; C8150101 image_sample v0, 1, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[12:19], s[8:11] ; F0800100 00430004 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s20 ; 7E020214 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v1, s21, v0 ; 3E020015 v_rcp_f32_e32 v0, v1 ; 7E005501 v_mov_b32_e32 v1, 0.5 ; 7E0202F0 v_cmp_gt_f32_e32 vcc, v6, v0 ; 7C080106 v_mov_b32_e32 v6, 1.0 ; 7E0C02F2 s_and_saveexec_b64 s[8:9], vcc ; BE88246A s_xor_b64 s[8:9], exec, s[8:9] ; 8988087E s_cbranch_execz BB0_2 ; BF880000 s_buffer_load_dword s10, s[0:3], 0x12 ; C2050112 s_buffer_load_dword s11, s[0:3], 0x14 ; C2058114 s_buffer_load_dword s12, s[0:3], 0x15 ; C2060115 s_buffer_load_dword s13, s[0:3], 0x16 ; C2068116 s_buffer_load_dword s14, s[0:3], 0xc ; C207010C s_buffer_load_dword s15, s[0:3], 0xd ; C207810D s_buffer_load_dword s16, s[0:3], 0xe ; C208010E s_buffer_load_dword s17, s[0:3], 0x10 ; C2088110 s_buffer_load_dword s18, s[0:3], 0x11 ; C2090111 v_mov_b32_e32 v1, 0x3e90019d ; 7E0202FF 3E90019D v_mov_b32_e32 v3, 0x437f0000 ; 7E0602FF 437F0000 v_mul_f32_e32 v6, v3, v0 ; 100C0103 s_load_dwordx4 s[20:23], s[4:5], 0x4 ; C08A0504 s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[7:9], 7, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[24:31], s[20:23] ; F0800700 00A60704 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v2, 2.0, v7, -1.0 ; D2820002 03CE0EF4 v_mad_f32 v4, 2.0, v8, -1.0 ; D2820004 03CE10F4 v_mad_f32 v5, 2.0, v9, -1.0 ; D2820005 03CE12F4 v_mul_f32_e32 v7, s16, v2 ; 100E0410 v_mac_f32_e32 v7, s10, v4 ; 3E0E080A v_mac_f32_e32 v7, s13, v5 ; 3E0E0A0D v_mul_f32_e32 v8, s15, v2 ; 1010040F v_mac_f32_e32 v8, s18, v4 ; 3E100812 v_mul_f32_e32 v2, s14, v2 ; 1004040E v_sub_f32_e32 v7, 1.0, v7 ; 080E0EF2 v_rcp_f32_e32 v7, v7 ; 7E0E5507 v_mac_f32_e32 v2, s17, v4 ; 3E040811 v_mac_f32_e32 v8, s12, v5 ; 3E100A0C v_mac_f32_e32 v2, s11, v5 ; 3E040A0B v_mul_f32_e32 v2, v7, v2 ; 10040507 v_mul_f32_e32 v4, v7, v8 ; 10081107 v_mad_f32 v2, v2, v1, 0.5 ; D2820002 03C20302 v_mad_f32 v1, v4, v1, 0.5 ; D2820001 03C20304 v_floor_f32_e32 v4, v0 ; 7E084900 v_subrev_f32_e32 v4, v4, v0 ; 0A080104 v_floor_f32_e32 v5, v6 ; 7E0A4906 v_mad_f32 v6, v0, v3, -v5 ; D2820006 84160700 v_madmk_f32_e32 v3, v6, v4, 0xbb808081 ; 40060906 BB808081 s_or_b64 exec, exec, s[8:9] ; 88FE087E v_cvt_pkrtz_f16_f32_e32 v0, v2, v1 ; 5E000302 v_cvt_pkrtz_f16_f32_e32 v1, v3, v6 ; 5E020D03 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 12 Code Size: 340 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL CONST[0..3] DCL TEMP[0..1], LOCAL 0: MUL TEMP[0], CONST[0], IN[0].xxxx 1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0] 4: MOV TEMP[1].xyz, IN[0].xyzx 5: MOV OUT[1], TEMP[1] 6: MOV OUT[0], TEMP[0] 7: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = fmul float %13, %33 %38 = fmul float %14, %33 %39 = fmul float %15, %33 %40 = fmul float %16, %33 %41 = fmul float %17, %34 %42 = fadd float %41, %37 %43 = fmul float %18, %34 %44 = fadd float %43, %38 %45 = fmul float %19, %34 %46 = fadd float %45, %39 %47 = fmul float %20, %34 %48 = fadd float %47, %40 %49 = fmul float %21, %35 %50 = fadd float %49, %42 %51 = fmul float %22, %35 %52 = fadd float %51, %44 %53 = fmul float %23, %35 %54 = fadd float %53, %46 %55 = fmul float %24, %35 %56 = fadd float %55, %48 %57 = fmul float %25, %36 %58 = fadd float %57, %50 %59 = fmul float %26, %36 %60 = fadd float %59, %52 %61 = fmul float %27, %36 %62 = fadd float %61, %54 %63 = fmul float %28, %36 %64 = fadd float %63, %56 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %33, float %34, float %35, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %58, float %60, float %62, float %64) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_mov_b32_e32 v1, 0 ; 7E020280 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v2, v3, v4, v1 ; F800020F 01040302 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s4, v2 ; 10000404 v_mac_f32_e32 v0, s8, v3 ; 3E000608 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v1, s5, v2 ; 10020405 v_mac_f32_e32 v1, s9, v3 ; 3E020609 v_mul_f32_e32 v6, s6, v2 ; 100C0406 v_mul_f32_e32 v2, s7, v2 ; 10040407 v_mac_f32_e32 v6, s10, v3 ; 3E0C060A v_mac_f32_e32 v2, s11, v3 ; 3E04060B v_mac_f32_e32 v0, s12, v4 ; 3E00080C v_mac_f32_e32 v1, s13, v4 ; 3E02080D v_mac_f32_e32 v6, s14, v4 ; 3E0C080E v_mac_f32_e32 v2, s15, v4 ; 3E04080F v_mac_f32_e32 v0, s16, v5 ; 3E000A10 v_mac_f32_e32 v1, s17, v5 ; 3E020A11 v_mac_f32_e32 v6, s18, v5 ; 3E0C0A12 v_mac_f32_e32 v2, s0, v5 ; 3E040A00 exp 15, 12, 0, 1, 0, v0, v1, v6, v2 ; F80008CF 02060100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 8 Code Size: 188 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL CONST[0..10] DCL CONST[13..17] DCL TEMP[0..7], LOCAL IMM[0] FLT32 { 1.0000, 0.0400, 0.0000, 0.7000} IMM[1] FLT32 { 0.3000, 0.4000, 1.5000, -0.5000} IMM[2] FLT32 { 16.0000, 4.0000, -2.0000, 3.0000} IMM[3] FLT32 { 0.5000, -0.7000, 1.1000, 0.1000} IMM[4] FLT32 { 0.6000, 0.1500, -0.1500, 6.6666} IMM[5] FLT32 { 6.0000, 0.2500, 1000.0000, -0.3000} 0: DP3 TEMP[0].x, CONST[8].xyzz, CONST[8].xyzz 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MUL TEMP[0].xyz, CONST[8].xyzz, TEMP[0].xxxx 3: DP3 TEMP[1].x, IN[0].xyzz, IN[0].xyzz 4: RSQ TEMP[1].x, TEMP[1].xxxx 5: MUL TEMP[1].xyz, IN[0].xyzz, TEMP[1].xxxx 6: ADD TEMP[2].xyz, TEMP[1].xyzz, -TEMP[0].xyzz 7: ADD TEMP[3].x, IMM[0].xxxx, CONST[7].xxxx 8: DP3 TEMP[2].x, TEMP[2].xyzz, TEMP[2].xyzz 9: SQRT TEMP[2].x, TEMP[2].xxxx 10: MUL TEMP[4].x, CONST[6].xxxx, IMM[0].yyyy 11: RCP TEMP[4].x, TEMP[4].xxxx 12: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[4].xxxx 13: ADD TEMP[2].x, TEMP[3].xxxx, -TEMP[2].xxxx 14: RCP TEMP[3].x, CONST[7].xxxx 15: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[3].xxxx 16: MOV_SAT TEMP[2].x, TEMP[2].xxxx 17: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[2].xxxx 18: DP3 TEMP[0].x, TEMP[0].xyzz, TEMP[1].xyzz 19: FSLT TEMP[1].x, IMM[0].zzzz, TEMP[0].xxxx 20: UIF TEMP[1].xxxx :0 21: POW TEMP[0].x, TEMP[0].xxxx, CONST[9].xxxx 22: ELSE :0 23: MOV TEMP[0].x, IMM[0].zzzz 24: ENDIF 25: DP3 TEMP[1].x, IN[0].xyzz, IN[0].xyzz 26: RSQ TEMP[1].x, TEMP[1].xxxx 27: MUL TEMP[1].xyz, IN[0].xyzz, TEMP[1].xxxx 28: DP3 TEMP[3].x, CONST[8].xyzz, CONST[8].xyzz 29: RSQ TEMP[3].x, TEMP[3].xxxx 30: MUL TEMP[3].xyz, CONST[8].xyzz, TEMP[3].xxxx 31: DP3 TEMP[3].x, TEMP[3].xyzz, TEMP[1].xyzz 32: ADD TEMP[3].x, TEMP[3].xxxx, IMM[0].wwww 33: MUL TEMP[4].x, TEMP[3].xxxx, IMM[0].wwww 34: MOV_SAT TEMP[4].x, TEMP[4].xxxx 35: LRP TEMP[4], TEMP[4].xxxx, CONST[1], CONST[2] 36: MUL TEMP[2].xyz, CONST[5].xyzz, TEMP[2].xxxx 37: MUL TEMP[3].x, TEMP[3].xxxx, IMM[1].xxxx 38: MUL TEMP[0].x, TEMP[0].xxxx, IMM[1].yyyy 39: MOV_SAT TEMP[1].x, TEMP[1].yyyy 40: RCP TEMP[5].x, CONST[3].xxxx 41: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[5].xxxx 42: MOV_SAT TEMP[1].x, TEMP[1].xxxx 43: ADD TEMP[1].x, IMM[0].xxxx, -TEMP[1].xxxx 44: POW TEMP[1].x, TEMP[1].xxxx, IMM[1].zzzz 45: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[4].wwww 46: MOV_SAT TEMP[1].x, TEMP[1].xxxx 47: LRP TEMP[1].xyz, TEMP[1].xxxx, TEMP[4].xyzz, CONST[0].xyzz 48: MAD TEMP[0].xyz, CONST[4].xyzz, TEMP[0].xxxx, TEMP[1].xyzz 49: MAD TEMP[0].xyz, CONST[4].xyzz, TEMP[3].xxxx, TEMP[0].xyzz 50: MAD TEMP[0].xyz, TEMP[2].xyzz, CONST[10].xxxx, TEMP[0].xyzz 51: MOV TEMP[0].w, CONST[0].wwww 52: DP3 TEMP[1].x, IN[0].xyzz, CONST[15].xyzz 53: DP3 TEMP[2].x, IN[0].xyzz, CONST[16].xyzz 54: MOV TEMP[1].y, TEMP[2].xxxx 55: DP3 TEMP[2].x, IN[0].xyzz, CONST[17].xyzz 56: MOV TEMP[1].z, TEMP[2].xxxx 57: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz 58: RSQ TEMP[2].x, TEMP[2].xxxx 59: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx 60: MUL TEMP[1].xyz, TEMP[1].xyzz, CONST[13].xxxx 61: FSLT TEMP[2].x, TEMP[1].xxxx, IMM[0].zzzz 62: UIF TEMP[2].xxxx :0 63: FLR TEMP[2].x, -TEMP[1].xxxx 64: MOV TEMP[2].x, -TEMP[2].xxxx 65: ELSE :0 66: FLR TEMP[2].x, TEMP[1].xxxx 67: ENDIF 68: FSLT TEMP[3].x, TEMP[1].yyyy, IMM[0].zzzz 69: UIF TEMP[3].xxxx :0 70: FLR TEMP[3].x, -TEMP[1].yyyy 71: MOV TEMP[3].x, -TEMP[3].xxxx 72: ELSE :0 73: FLR TEMP[3].x, TEMP[1].yyyy 74: ENDIF 75: FSLT TEMP[4].x, TEMP[1].zzzz, IMM[0].zzzz 76: UIF TEMP[4].xxxx :0 77: FLR TEMP[4].x, -TEMP[1].zzzz 78: MOV TEMP[4].x, -TEMP[4].xxxx 79: ELSE :0 80: FLR TEMP[4].x, TEMP[1].zzzz 81: ENDIF 82: MOV TEMP[2].x, TEMP[2].xxxx 83: MOV TEMP[2].y, TEMP[3].xxxx 84: MOV TEMP[2].z, TEMP[4].xxxx 85: RCP TEMP[3].x, CONST[13].xxxx 86: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx 87: MOV TEMP[3].xy, TEMP[2].xyyy 88: TEX TEMP[3].xyz, TEMP[3], SAMP[1], 2D 89: ADD TEMP[3].xyz, TEMP[3].xyzz, IMM[1].wwww 90: MOV TEMP[4].xy, TEMP[2].yzzz 91: TEX TEMP[4].xyz, TEMP[4], SAMP[1], 2D 92: ADD TEMP[4].xyz, TEMP[4].xyzz, IMM[1].wwww 93: ADD TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xyzz 94: MOV TEMP[2].xy, TEMP[2].xzzz 95: TEX TEMP[2].xyz, TEMP[2], SAMP[1], 2D 96: ADD TEMP[2].xyz, TEMP[2].xyzz, IMM[1].wwww 97: ADD TEMP[2].xyz, TEMP[3].xyzz, TEMP[2].xyzz 98: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz 99: RSQ TEMP[3].x, TEMP[3].xxxx 100: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx 101: ADD TEMP[3].x, TEMP[2].xxxx, TEMP[2].yyyy 102: ADD TEMP[3].x, TEMP[3].xxxx, TEMP[2].zzzz 103: MUL TEMP[3].x, TEMP[3].xxxx, IMM[2].xxxx 104: FRC TEMP[3].x, TEMP[3].xxxx 105: MAD TEMP[3].x, TEMP[3].xxxx, IMM[2].yyyy, IMM[2].zzzz 106: MAX TEMP[3].x, TEMP[3].xxxx, IMM[0].zzzz 107: MIN TEMP[3].x, TEMP[3].xxxx, IMM[2].wwww 108: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[3].xxxx 109: ADD TEMP[4].xyz, TEMP[2].xyzz, IMM[1].wwww 110: FRC TEMP[1].xyz, TEMP[1].xyzz 111: ADD TEMP[1].xyz, TEMP[1].xyzz, IMM[3].yyyy 112: MAD TEMP[1].xyz, TEMP[4].xyzz, IMM[3].xxxx, TEMP[1].xyzz 113: MUL TEMP[4].x, TEMP[0].xxxx, IMM[1].xxxx 114: MAD TEMP[4].x, TEMP[0].yyyy, IMM[4].xxxx, TEMP[4].xxxx 115: MAD TEMP[4].x, TEMP[0].zzzz, IMM[3].wwww, TEMP[4].xxxx 116: ADD TEMP[4].x, IMM[3].zzzz, -TEMP[4].xxxx 117: MUL TEMP[5].x, TEMP[4].xxxx, TEMP[4].xxxx 118: LRP TEMP[2].xyz, CONST[14].xxxx, TEMP[2].xyzz, IMM[0].xxxx 119: MUL TEMP[2].xyz, TEMP[3].xxxx, TEMP[2].xyzz 120: DP3 TEMP[1].x, TEMP[1].xyzz, TEMP[1].xyzz 121: SQRT TEMP[1].x, TEMP[1].xxxx 122: ADD TEMP[1].x, TEMP[1].xxxx, IMM[4].zzzz 123: MUL TEMP[1].x, TEMP[1].xxxx, IMM[4].wwww 124: ADD TEMP[1].x, IMM[4].yyyy, -TEMP[1].xxxx 125: MOV_SAT TEMP[1].x, TEMP[1].xxxx 126: MUL TEMP[3].x, TEMP[3].xxxx, IMM[5].yyyy 127: POW TEMP[3].x, TEMP[3].xxxx, IMM[5].zzzz 128: MOV_SAT TEMP[3].x, TEMP[3].xxxx 129: MUL TEMP[3].x, TEMP[3].xxxx, IMM[2].wwww 130: ADD TEMP[3].x, IMM[5].xxxx, -TEMP[3].xxxx 131: POW TEMP[1].x, TEMP[1].xxxx, TEMP[3].xxxx 132: MUL TEMP[1].xyz, TEMP[2].xyzz, TEMP[1].xxxx 133: MOV_SAT TEMP[1].xyz, TEMP[1].xyzz 134: MAD TEMP[2].x, TEMP[5].xxxx, TEMP[5].xxxx, IMM[5].wwww 135: MOV_SAT TEMP[2].x, TEMP[2].xxxx 136: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx 137: MAD TEMP[0].xyz, TEMP[1].xyzz, CONST[10].xxxx, TEMP[0].xyzz 138: DP3 TEMP[1].x, IN[0].xyzz, IN[0].xyzz 139: RSQ TEMP[1].x, TEMP[1].xxxx 140: MUL TEMP[1].xyz, IN[0].xyzz, TEMP[1].xxxx 141: MUL TEMP[2].xy, TEMP[1].xzzz, IMM[3].xxxx 142: MOV TEMP[2].xy, TEMP[2].xyyy 143: TEX TEMP[2].xyz, TEMP[2], SAMP[0], 2D 144: ABS TEMP[3].x, TEMP[1].yyyy 145: ADD TEMP[3].x, TEMP[3].xxxx, IMM[5].wwww 146: MUL TEMP[5].xy, TEMP[1].yzzz, IMM[3].xxxx 147: MOV TEMP[5].xy, TEMP[5].xyyy 148: TEX TEMP[5].xyz, TEMP[5], SAMP[0], 2D 149: ABS TEMP[6].x, TEMP[1].xxxx 150: ADD TEMP[6].x, TEMP[6].xxxx, IMM[5].wwww 151: MUL TEMP[7].xy, TEMP[1].xyyy, IMM[3].xxxx 152: MOV TEMP[7].xy, TEMP[7].xyyy 153: TEX TEMP[7].xyz, TEMP[7], SAMP[0], 2D 154: ABS TEMP[1].x, TEMP[1].zzzz 155: ADD TEMP[1].x, TEMP[1].xxxx, IMM[5].wwww 156: MUL TEMP[1].xyz, TEMP[7].xyzz, TEMP[1].xxxx 157: MAD TEMP[1].xyz, TEMP[5].xyzz, TEMP[6].xxxx, TEMP[1].xyzz 158: MAD TEMP[1].xyz, TEMP[2].xyzz, TEMP[3].xxxx, TEMP[1].xyzz 159: ADD TEMP[2].x, TEMP[4].xxxx, IMM[1].wwww 160: MAX TEMP[2].x, TEMP[2].xxxx, IMM[0].zzzz 161: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx 162: MAD TEMP[0].xyz, TEMP[1].xyzz, IMM[0].yyyy, TEMP[0].xyzz 163: MOV OUT[0], TEMP[0] 164: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 244) %53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 248) %54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256) %55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 260) %56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 264) %57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 272) %58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 276) %59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 280) %60 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %61 = load <8 x i32>, <8 x i32> addrspace(2)* %60, align 32, !tbaa !0 %62 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %63 = load <4 x i32>, <4 x i32> addrspace(2)* %62, align 16, !tbaa !0 %64 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %65 = load <8 x i32>, <8 x i32> addrspace(2)* %64, align 32, !tbaa !0 %66 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %67 = load <4 x i32>, <4 x i32> addrspace(2)* %66, align 16, !tbaa !0 %68 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %69 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %70 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %71 = fmul float %45, %45 %72 = fmul float %46, %46 %73 = fadd float %72, %71 %74 = fmul float %47, %47 %75 = fadd float %73, %74 %76 = call float @llvm.AMDGPU.rsq.clamped.f32(float %75) %77 = fmul float %45, %76 %78 = fmul float %46, %76 %79 = fmul float %47, %76 %80 = fmul float %68, %68 %81 = fmul float %69, %69 %82 = fadd float %81, %80 %83 = fmul float %70, %70 %84 = fadd float %82, %83 %85 = call float @llvm.AMDGPU.rsq.clamped.f32(float %84) %86 = fmul float %68, %85 %87 = fmul float %69, %85 %88 = fmul float %70, %85 %89 = fsub float %86, %77 %90 = fsub float %87, %78 %91 = fsub float %88, %79 %92 = fadd float %44, 1.000000e+00 %93 = fmul float %89, %89 %94 = fmul float %90, %90 %95 = fadd float %94, %93 %96 = fmul float %91, %91 %97 = fadd float %95, %96 %98 = call float @llvm.sqrt.f32(float %97) %99 = fmul float %43, 0x3FA47AE140000000 %100 = fdiv float 1.000000e+00, %99 %101 = fmul float %98, %100 %102 = fsub float %92, %101 %103 = fdiv float 1.000000e+00, %44 %104 = fmul float %102, %103 %105 = call float @llvm.AMDIL.clamp.(float %104, float 0.000000e+00, float 1.000000e+00) %106 = fmul float %105, %105 %107 = fmul float %77, %86 %108 = fmul float %78, %87 %109 = fadd float %108, %107 %110 = fmul float %79, %88 %111 = fadd float %109, %110 %112 = fcmp ogt float %111, 0.000000e+00 br i1 %112, label %IF, label %ENDIF IF: ; preds = %main_body %113 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %114 = call float @llvm.pow.f32(float %111, float %113) %phitmp = fmul float %114, 0x3FD99999A0000000 br label %ENDIF ENDIF: ; preds = %main_body, %IF %temp.0 = phi float [ %phitmp, %IF ], [ 0.000000e+00, %main_body ] %115 = fmul float %68, %68 %116 = fmul float %69, %69 %117 = fadd float %116, %115 %118 = fmul float %70, %70 %119 = fadd float %117, %118 %120 = call float @llvm.AMDGPU.rsq.clamped.f32(float %119) %121 = fmul float %68, %120 %122 = fmul float %69, %120 %123 = fmul float %70, %120 %124 = fmul float %45, %45 %125 = fmul float %46, %46 %126 = fadd float %125, %124 %127 = fmul float %47, %47 %128 = fadd float %126, %127 %129 = call float @llvm.AMDGPU.rsq.clamped.f32(float %128) %130 = fmul float %45, %129 %131 = fmul float %46, %129 %132 = fmul float %47, %129 %133 = fmul float %130, %121 %134 = fmul float %131, %122 %135 = fadd float %134, %133 %136 = fmul float %132, %123 %137 = fadd float %135, %136 %138 = fadd float %137, 0x3FE6666660000000 %139 = fmul float %138, 0x3FE6666660000000 %140 = call float @llvm.AMDIL.clamp.(float %139, float 0.000000e+00, float 1.000000e+00) %141 = call float @llvm.AMDGPU.lrp(float %140, float %28, float %32) %142 = call float @llvm.AMDGPU.lrp(float %140, float %29, float %33) %143 = call float @llvm.AMDGPU.lrp(float %140, float %30, float %34) %144 = call float @llvm.AMDGPU.lrp(float %140, float %31, float %35) %145 = fmul float %40, %106 %146 = fmul float %41, %106 %147 = fmul float %42, %106 %148 = fmul float %138, 0x3FD3333340000000 %149 = call float @llvm.AMDIL.clamp.(float %122, float 0.000000e+00, float 1.000000e+00) %150 = fdiv float 1.000000e+00, %36 %151 = fmul float %149, %150 %152 = call float @llvm.AMDIL.clamp.(float %151, float 0.000000e+00, float 1.000000e+00) %153 = fsub float 1.000000e+00, %152 %154 = call float @llvm.pow.f32(float %153, float 1.500000e+00) %155 = fmul float %154, %144 %156 = call float @llvm.AMDIL.clamp.(float %155, float 0.000000e+00, float 1.000000e+00) %157 = call float @llvm.AMDGPU.lrp(float %156, float %141, float %24) %158 = call float @llvm.AMDGPU.lrp(float %156, float %142, float %25) %159 = call float @llvm.AMDGPU.lrp(float %156, float %143, float %26) %160 = fmul float %37, %temp.0 %161 = fadd float %160, %157 %162 = fmul float %38, %temp.0 %163 = fadd float %162, %158 %164 = fmul float %39, %temp.0 %165 = fadd float %164, %159 %166 = fmul float %37, %148 %167 = fadd float %166, %161 %168 = fmul float %38, %148 %169 = fadd float %168, %163 %170 = fmul float %39, %148 %171 = fadd float %170, %165 %172 = fmul float %145, %48 %173 = fadd float %172, %167 %174 = fmul float %146, %48 %175 = fadd float %174, %169 %176 = fmul float %147, %48 %177 = fadd float %176, %171 %178 = fmul float %68, %51 %179 = fmul float %69, %52 %180 = fadd float %179, %178 %181 = fmul float %70, %53 %182 = fadd float %180, %181 %183 = fmul float %68, %54 %184 = fmul float %69, %55 %185 = fadd float %184, %183 %186 = fmul float %70, %56 %187 = fadd float %185, %186 %188 = fmul float %68, %57 %189 = fmul float %69, %58 %190 = fadd float %189, %188 %191 = fmul float %70, %59 %192 = fadd float %190, %191 %193 = fmul float %182, %182 %194 = fmul float %187, %187 %195 = fadd float %194, %193 %196 = fmul float %192, %192 %197 = fadd float %195, %196 %198 = call float @llvm.AMDGPU.rsq.clamped.f32(float %197) %199 = fmul float %182, %198 %200 = fmul float %187, %198 %201 = fmul float %192, %198 %202 = fmul float %199, %49 %203 = fmul float %200, %49 %204 = fmul float %201, %49 %205 = fcmp olt float %202, 0.000000e+00 br i1 %205, label %IF33, label %ELSE34 IF33: ; preds = %ENDIF %206 = fsub float -0.000000e+00, %202 %207 = call float @llvm.floor.f32(float %206) %208 = fsub float -0.000000e+00, %207 br label %ENDIF32 ELSE34: ; preds = %ENDIF %209 = call float @llvm.floor.f32(float %202) br label %ENDIF32 ENDIF32: ; preds = %ELSE34, %IF33 %temp8.0 = phi float [ %208, %IF33 ], [ %209, %ELSE34 ] %210 = fcmp olt float %203, 0.000000e+00 br i1 %210, label %IF36, label %ELSE37 IF36: ; preds = %ENDIF32 %211 = fsub float -0.000000e+00, %203 %212 = call float @llvm.floor.f32(float %211) %213 = fsub float -0.000000e+00, %212 br label %ENDIF35 ELSE37: ; preds = %ENDIF32 %214 = call float @llvm.floor.f32(float %203) br label %ENDIF35 ENDIF35: ; preds = %ELSE37, %IF36 %temp12.0 = phi float [ %213, %IF36 ], [ %214, %ELSE37 ] %215 = fcmp olt float %204, 0.000000e+00 br i1 %215, label %IF39, label %ELSE40 IF39: ; preds = %ENDIF35 %216 = fsub float -0.000000e+00, %204 %217 = call float @llvm.floor.f32(float %216) %218 = fsub float -0.000000e+00, %217 br label %ENDIF38 ELSE40: ; preds = %ENDIF35 %219 = call float @llvm.floor.f32(float %204) br label %ENDIF38 ENDIF38: ; preds = %ELSE40, %IF39 %temp16.0 = phi float [ %218, %IF39 ], [ %219, %ELSE40 ] %220 = fdiv float 1.000000e+00, %49 %221 = fmul float %temp8.0, %220 %222 = fmul float %temp12.0, %220 %223 = fmul float %temp16.0, %220 %224 = bitcast float %221 to i32 %225 = bitcast float %222 to i32 %226 = insertelement <2 x i32> undef, i32 %224, i32 0 %227 = insertelement <2 x i32> %226, i32 %225, i32 1 %228 = bitcast <8 x i32> %65 to <32 x i8> %229 = bitcast <4 x i32> %67 to <16 x i8> %230 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %227, <32 x i8> %228, <16 x i8> %229, i32 2) %231 = extractelement <4 x float> %230, i32 0 %232 = extractelement <4 x float> %230, i32 1 %233 = extractelement <4 x float> %230, i32 2 %234 = fadd float %231, -5.000000e-01 %235 = fadd float %232, -5.000000e-01 %236 = fadd float %233, -5.000000e-01 %237 = bitcast float %222 to i32 %238 = bitcast float %223 to i32 %239 = insertelement <2 x i32> undef, i32 %237, i32 0 %240 = insertelement <2 x i32> %239, i32 %238, i32 1 %241 = bitcast <8 x i32> %65 to <32 x i8> %242 = bitcast <4 x i32> %67 to <16 x i8> %243 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %240, <32 x i8> %241, <16 x i8> %242, i32 2) %244 = extractelement <4 x float> %243, i32 0 %245 = extractelement <4 x float> %243, i32 1 %246 = extractelement <4 x float> %243, i32 2 %247 = fadd float %244, -5.000000e-01 %248 = fadd float %245, -5.000000e-01 %249 = fadd float %246, -5.000000e-01 %250 = fadd float %234, %247 %251 = fadd float %235, %248 %252 = fadd float %236, %249 %253 = bitcast float %221 to i32 %254 = bitcast float %223 to i32 %255 = insertelement <2 x i32> undef, i32 %253, i32 0 %256 = insertelement <2 x i32> %255, i32 %254, i32 1 %257 = bitcast <8 x i32> %65 to <32 x i8> %258 = bitcast <4 x i32> %67 to <16 x i8> %259 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %256, <32 x i8> %257, <16 x i8> %258, i32 2) %260 = extractelement <4 x float> %259, i32 0 %261 = extractelement <4 x float> %259, i32 1 %262 = extractelement <4 x float> %259, i32 2 %263 = fadd float %260, -5.000000e-01 %264 = fadd float %261, -5.000000e-01 %265 = fadd float %262, -5.000000e-01 %266 = fadd float %250, %263 %267 = fadd float %251, %264 %268 = fadd float %252, %265 %269 = fmul float %266, %266 %270 = fmul float %267, %267 %271 = fadd float %270, %269 %272 = fmul float %268, %268 %273 = fadd float %271, %272 %274 = call float @llvm.AMDGPU.rsq.clamped.f32(float %273) %275 = fmul float %266, %274 %276 = fmul float %267, %274 %277 = fmul float %268, %274 %278 = fadd float %275, %276 %279 = fadd float %278, %277 %280 = fmul float %279, 1.600000e+01 %281 = call float @llvm.floor.f32(float %280) %282 = fsub float %280, %281 %283 = fmul float %282, 4.000000e+00 %284 = fadd float %283, -2.000000e+00 %285 = call float @llvm.maxnum.f32(float %284, float 0.000000e+00) %286 = call float @llvm.minnum.f32(float %285, float 3.000000e+00) %287 = fmul float %286, %286 %288 = fadd float %275, -5.000000e-01 %289 = fadd float %276, -5.000000e-01 %290 = fadd float %277, -5.000000e-01 %291 = call float @llvm.floor.f32(float %202) %292 = fsub float %202, %291 %293 = call float @llvm.floor.f32(float %203) %294 = fsub float %203, %293 %295 = call float @llvm.floor.f32(float %204) %296 = fsub float %204, %295 %297 = fadd float %292, 0xBFE6666660000000 %298 = fadd float %294, 0xBFE6666660000000 %299 = fadd float %296, 0xBFE6666660000000 %300 = fmul float %288, 5.000000e-01 %301 = fadd float %300, %297 %302 = fmul float %289, 5.000000e-01 %303 = fadd float %302, %298 %304 = fmul float %290, 5.000000e-01 %305 = fadd float %304, %299 %306 = fmul float %173, 0x3FD3333340000000 %307 = fmul float %175, 0x3FE3333340000000 %308 = fadd float %307, %306 %309 = fmul float %177, 0x3FB99999A0000000 %310 = fadd float %309, %308 %311 = fsub float 0x3FF19999A0000000, %310 %312 = fmul float %311, %311 %313 = call float @llvm.AMDGPU.lrp(float %50, float %275, float 1.000000e+00) %314 = call float @llvm.AMDGPU.lrp(float %50, float %276, float 1.000000e+00) %315 = call float @llvm.AMDGPU.lrp(float %50, float %277, float 1.000000e+00) %316 = fmul float %287, %313 %317 = fmul float %287, %314 %318 = fmul float %287, %315 %319 = fmul float %301, %301 %320 = fmul float %303, %303 %321 = fadd float %320, %319 %322 = fmul float %305, %305 %323 = fadd float %321, %322 %324 = call float @llvm.sqrt.f32(float %323) %325 = fadd float %324, 0xBFC3333340000000 %326 = fmul float %325, 0x401AAA9940000000 %327 = fsub float 0x3FC3333340000000, %326 %328 = call float @llvm.AMDIL.clamp.(float %327, float 0.000000e+00, float 1.000000e+00) %329 = fmul float %287, 2.500000e-01 %330 = call float @llvm.pow.f32(float %329, float 1.000000e+03) %331 = call float @llvm.AMDIL.clamp.(float %330, float 0.000000e+00, float 1.000000e+00) %332 = fmul float %331, 3.000000e+00 %333 = fsub float 6.000000e+00, %332 %334 = call float @llvm.pow.f32(float %328, float %333) %335 = fmul float %316, %334 %336 = fmul float %317, %334 %337 = fmul float %318, %334 %338 = call float @llvm.AMDIL.clamp.(float %335, float 0.000000e+00, float 1.000000e+00) %339 = call float @llvm.AMDIL.clamp.(float %336, float 0.000000e+00, float 1.000000e+00) %340 = call float @llvm.AMDIL.clamp.(float %337, float 0.000000e+00, float 1.000000e+00) %341 = fmul float %312, %312 %342 = fadd float %341, 0xBFD3333340000000 %343 = call float @llvm.AMDIL.clamp.(float %342, float 0.000000e+00, float 1.000000e+00) %344 = fmul float %338, %343 %345 = fmul float %339, %343 %346 = fmul float %340, %343 %347 = fmul float %344, %48 %348 = fadd float %347, %173 %349 = fmul float %345, %48 %350 = fadd float %349, %175 %351 = fmul float %346, %48 %352 = fadd float %351, %177 %353 = fmul float %68, %68 %354 = fmul float %69, %69 %355 = fadd float %354, %353 %356 = fmul float %70, %70 %357 = fadd float %355, %356 %358 = call float @llvm.AMDGPU.rsq.clamped.f32(float %357) %359 = fmul float %68, %358 %360 = fmul float %69, %358 %361 = fmul float %70, %358 %362 = fmul float %359, 5.000000e-01 %363 = fmul float %361, 5.000000e-01 %364 = bitcast float %362 to i32 %365 = bitcast float %363 to i32 %366 = insertelement <2 x i32> undef, i32 %364, i32 0 %367 = insertelement <2 x i32> %366, i32 %365, i32 1 %368 = bitcast <8 x i32> %61 to <32 x i8> %369 = bitcast <4 x i32> %63 to <16 x i8> %370 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %367, <32 x i8> %368, <16 x i8> %369, i32 2) %371 = extractelement <4 x float> %370, i32 0 %372 = extractelement <4 x float> %370, i32 1 %373 = extractelement <4 x float> %370, i32 2 %374 = call float @llvm.fabs.f32(float %360) %375 = fadd float %374, 0xBFD3333340000000 %376 = fmul float %360, 5.000000e-01 %377 = fmul float %361, 5.000000e-01 %378 = bitcast float %376 to i32 %379 = bitcast float %377 to i32 %380 = insertelement <2 x i32> undef, i32 %378, i32 0 %381 = insertelement <2 x i32> %380, i32 %379, i32 1 %382 = bitcast <8 x i32> %61 to <32 x i8> %383 = bitcast <4 x i32> %63 to <16 x i8> %384 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %381, <32 x i8> %382, <16 x i8> %383, i32 2) %385 = extractelement <4 x float> %384, i32 0 %386 = extractelement <4 x float> %384, i32 1 %387 = extractelement <4 x float> %384, i32 2 %388 = call float @llvm.fabs.f32(float %359) %389 = fadd float %388, 0xBFD3333340000000 %390 = fmul float %359, 5.000000e-01 %391 = fmul float %360, 5.000000e-01 %392 = bitcast float %390 to i32 %393 = bitcast float %391 to i32 %394 = insertelement <2 x i32> undef, i32 %392, i32 0 %395 = insertelement <2 x i32> %394, i32 %393, i32 1 %396 = bitcast <8 x i32> %61 to <32 x i8> %397 = bitcast <4 x i32> %63 to <16 x i8> %398 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %395, <32 x i8> %396, <16 x i8> %397, i32 2) %399 = extractelement <4 x float> %398, i32 0 %400 = extractelement <4 x float> %398, i32 1 %401 = extractelement <4 x float> %398, i32 2 %402 = call float @llvm.fabs.f32(float %361) %403 = fadd float %402, 0xBFD3333340000000 %404 = fmul float %399, %403 %405 = fmul float %400, %403 %406 = fmul float %401, %403 %407 = fmul float %385, %389 %408 = fadd float %407, %404 %409 = fmul float %386, %389 %410 = fadd float %409, %405 %411 = fmul float %387, %389 %412 = fadd float %411, %406 %413 = fmul float %371, %375 %414 = fadd float %413, %408 %415 = fmul float %372, %375 %416 = fadd float %415, %410 %417 = fmul float %373, %375 %418 = fadd float %417, %412 %419 = fadd float %311, -5.000000e-01 %420 = call float @llvm.maxnum.f32(float %419, float 0.000000e+00) %421 = fmul float %414, %420 %422 = fmul float %416, %420 %423 = fmul float %418, %420 %424 = fmul float %421, 0x3FA47AE140000000 %425 = fadd float %424, %348 %426 = fmul float %422, 0x3FA47AE140000000 %427 = fadd float %426, %350 %428 = fmul float %423, 0x3FA47AE140000000 %429 = fadd float %428, %352 %430 = call i32 @llvm.SI.packf16(float %425, float %427) %431 = bitcast i32 %430 to float %432 = call i32 @llvm.SI.packf16(float %429, float %27) %433 = bitcast i32 %432 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %431, float %433, float %431, float %433) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.floor.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[12:15], s[2:3], 0x0 ; C0860300 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v0, v0, 2, 0, [m0] ; C8000200 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s1, s[12:15], 0x20 ; C2008D20 s_buffer_load_dword s2, s[12:15], 0x21 ; C2010D21 s_buffer_load_dword s8, s[12:15], 0x22 ; C2040D22 v_mul_f32_e32 v4, v2, v2 ; 10080502 v_interp_p2_f32 v0, [v0], v1, 2, 0, [m0] ; C8010201 v_mac_f32_e32 v4, v3, v3 ; 3E080703 v_mac_f32_e32 v4, v0, v0 ; 3E080100 s_buffer_load_dword s3, s[12:15], 0x34 ; C2018D34 s_buffer_load_dword s22, s[12:15], 0x3c ; C20B0D3C s_buffer_load_dword s23, s[12:15], 0x3d ; C20B8D3D s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e64 v1, s1, s1 ; D2100001 00000201 v_mac_f32_e64 v1, s2, s2 ; D23E0001 00000402 v_mac_f32_e64 v1, s8, s8 ; D23E0001 00001008 v_rsq_clamp_f32_e32 v1, v1 ; 7E025901 v_rsq_clamp_f32_e32 v4, v4 ; 7E085904 s_buffer_load_dword s0, s[12:15], 0x18 ; C2000D18 v_mul_f32_e32 v5, s1, v1 ; 100A0201 v_mad_f32 v6, v2, v4, -v5 ; D2820006 84160902 v_mul_f32_e32 v6, v6, v6 ; 100C0D06 s_buffer_load_dword s33, s[12:15], 0x1c ; C2108D1C v_mul_f32_e32 v7, s2, v1 ; 100E0202 v_mad_f32 v8, v3, v4, -v7 ; D2820008 841E0903 v_mac_f32_e32 v6, v8, v8 ; 3E0C1108 v_mov_b32_e32 v8, 0xbd23d70a ; 7E1002FF BD23D70A s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v8, s0, v8 ; 10101000 v_mul_f32_e32 v9, s8, v1 ; 10120208 v_rcp_f32_e32 v1, v8 ; 7E025508 v_mad_f32 v8, v0, v4, -v9 ; D2820008 84260900 v_mac_f32_e32 v6, v8, v8 ; 3E0C1108 v_sqrt_f32_e32 v6, v6 ; 7E0C6706 v_add_f32_e64 v8, 1.0, s33 ; D2060008 000042F2 v_mac_f32_e32 v8, v1, v6 ; 3E100D01 v_mul_f32_e32 v1, v4, v2 ; 10020504 v_mul_f32_e32 v1, v1, v5 ; 10020B01 s_buffer_load_dword s11, s[12:15], 0xb ; C2058D0B s_buffer_load_dword s25, s[12:15], 0xc ; C20C8D0C s_buffer_load_dword s10, s[12:15], 0x0 ; C2050D00 s_buffer_load_dword s9, s[12:15], 0x1 ; C2048D01 s_buffer_load_dword s0, s[12:15], 0x2 ; C2000D02 s_buffer_load_dword s20, s[12:15], 0x4 ; C20A0D04 s_buffer_load_dword s18, s[12:15], 0x5 ; C2090D05 s_buffer_load_dword s17, s[12:15], 0x6 ; C2088D06 s_buffer_load_dword s16, s[12:15], 0x7 ; C2080D07 s_buffer_load_dword s24, s[12:15], 0x8 ; C20C0D08 s_buffer_load_dword s21, s[12:15], 0x9 ; C20A8D09 s_buffer_load_dword s19, s[12:15], 0xa ; C2098D0A s_buffer_load_dword s26, s[12:15], 0x3e ; C20D0D3E s_buffer_load_dword s27, s[12:15], 0x40 ; C20D8D40 s_buffer_load_dword s28, s[12:15], 0x41 ; C20E0D41 s_buffer_load_dword s29, s[12:15], 0x42 ; C20E8D42 s_buffer_load_dword s30, s[12:15], 0x44 ; C20F0D44 s_buffer_load_dword s31, s[12:15], 0x45 ; C20F8D45 s_buffer_load_dword s32, s[12:15], 0x46 ; C2100D46 v_rcp_f32_e32 v5, s33 ; 7E0A5421 v_mul_f32_e32 v6, v4, v3 ; 100C0704 v_mac_f32_e32 v1, v6, v7 ; 3E020F06 v_mul_f32_e32 v4, v4, v0 ; 10080104 v_mac_f32_e32 v1, v4, v9 ; 3E021304 v_mul_f32_e32 v4, v5, v8 ; 10081105 v_add_f32_e64 v7, 0, v4 clamp ; D2060807 00020880 v_mov_b32_e32 v8, 0 ; 7E100280 v_cmp_lt_f32_e32 vcc, 0, v1 ; 7C020280 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[34:35], vcc ; BEA2246A s_xor_b64 s[34:35], exec, s[34:35] ; 89A2227E s_buffer_load_dword s33, s[12:15], 0x24 ; C2108D24 v_log_f32_e32 v1, v1 ; 7E024F01 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_legacy_f32_e32 v1, s33, v1 ; 0E020221 v_exp_f32_e32 v1, v1 ; 7E024B01 v_mul_f32_e32 v8, 0x3ecccccd, v1 ; 101002FF 3ECCCCCD s_or_b64 exec, exec, s[34:35] ; 88FE227E v_rcp_f32_e32 v1, s25 ; 7E025419 v_mul_f32_e32 v11, s22, v2 ; 10160416 v_mac_f32_e32 v11, s23, v3 ; 3E160617 v_mac_f32_e32 v11, s26, v0 ; 3E16001A v_mul_f32_e32 v10, s27, v2 ; 1014041B v_mac_f32_e32 v10, s28, v3 ; 3E14061C v_mac_f32_e32 v10, s29, v0 ; 3E14001D v_mul_f32_e32 v6, s30, v2 ; 100C041E v_mac_f32_e32 v6, s31, v3 ; 3E0C061F v_mac_f32_e32 v6, s32, v0 ; 3E0C0020 v_mul_f32_e64 v4, s1, s1 ; D2100004 00000201 v_mac_f32_e64 v4, s2, s2 ; D23E0004 00000402 v_mac_f32_e64 v4, s8, s8 ; D23E0004 00001008 v_rsq_clamp_f32_e32 v4, v4 ; 7E085904 v_mul_f32_e32 v5, v2, v2 ; 100A0502 v_mac_f32_e32 v5, v3, v3 ; 3E0A0703 v_mac_f32_e32 v5, v0, v0 ; 3E0A0100 v_rsq_clamp_f32_e32 v5, v5 ; 7E0A5905 v_mul_f32_e32 v9, s1, v4 ; 10120801 v_mul_f32_e32 v12, s2, v4 ; 10180802 v_mul_f32_e32 v4, s8, v4 ; 10080808 v_mul_f32_e32 v13, v5, v2 ; 101A0505 v_mul_f32_e32 v9, v13, v9 ; 1012130D v_mul_f32_e32 v13, v5, v3 ; 101A0705 v_mac_f32_e32 v9, v13, v12 ; 3E12190D v_mul_f32_e32 v5, v5, v0 ; 100A0105 v_mac_f32_e32 v9, v5, v4 ; 3E120905 v_mov_b32_e32 v4, 0x3f333333 ; 7E0802FF 3F333333 v_add_f32_e32 v9, v4, v9 ; 06121304 v_mul_f32_e32 v4, v4, v9 ; 10081304 v_add_f32_e64 v4, 0, v4 clamp ; D2060804 00020880 v_sub_f32_e32 v5, 1.0, v4 ; 080A08F2 v_mul_f32_e32 v12, s24, v5 ; 10180A18 v_mac_f32_e32 v12, s20, v4 ; 3E180814 v_mul_f32_e32 v15, s21, v5 ; 101E0A15 v_mac_f32_e32 v15, s18, v4 ; 3E1E0812 v_mul_f32_e32 v16, s19, v5 ; 10200A13 v_add_f32_e64 v13, 0, v13 clamp ; D206080D 00021A80 v_mul_f32_e32 v1, v1, v13 ; 10021B01 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_sub_f32_e32 v1, 1.0, v1 ; 080202F2 v_log_f32_e32 v1, v1 ; 7E024F01 v_mac_f32_e32 v16, s17, v4 ; 3E200811 v_mul_f32_e32 v5, s11, v5 ; 100A0A0B v_mac_f32_e32 v5, s16, v4 ; 3E0A0810 v_mul_legacy_f32_e32 v1, 0x3fc00000, v1 ; 0E0202FF 3FC00000 v_exp_f32_e32 v1, v1 ; 7E024B01 v_mul_f32_e32 v1, v5, v1 ; 10020305 v_add_f32_e64 v13, 0, v1 clamp ; D206080D 00020280 v_sub_f32_e32 v5, 1.0, v13 ; 080A1AF2 v_mul_f32_e32 v1, s10, v5 ; 10020A0A v_mac_f32_e32 v1, v12, v13 ; 3E021B0C v_mul_f32_e32 v4, s9, v5 ; 10080A09 v_mul_f32_e32 v12, v11, v11 ; 1018170B v_mac_f32_e32 v12, v10, v10 ; 3E18150A v_mac_f32_e32 v12, v6, v6 ; 3E180D06 v_rsq_clamp_f32_e32 v14, v12 ; 7E1C590C v_mac_f32_e32 v4, v15, v13 ; 3E081B0F v_mul_f32_e32 v5, s0, v5 ; 100A0A00 v_mac_f32_e32 v5, v16, v13 ; 3E0A1B10 v_mul_f32_e32 v11, v14, v11 ; 1016170E v_mul_f32_e32 v11, s3, v11 ; 10161603 v_cmp_ngt_f32_e32 vcc, 0, v11 ; 7C161680 s_and_saveexec_b64 s[0:1], vcc ; BE80246A s_xor_b64 s[0:1], exec, s[0:1] ; 8980007E v_floor_f32_e32 v12, v11 ; 7E18490B s_or_saveexec_b64 s[0:1], s[0:1] ; BE802500 s_xor_b64 exec, exec, s[0:1] ; 89FE007E v_floor_f32_e64 v12, -v11 ; D348000C 2000010B v_xor_b32_e32 v12, 0x80000000, v12 ; 3A1818FF 80000000 s_or_b64 exec, exec, s[0:1] ; 88FE007E v_mul_f32_e32 v10, v14, v10 ; 1014150E v_mul_f32_e32 v10, s3, v10 ; 10141403 v_cmp_ngt_f32_e32 vcc, 0, v10 ; 7C161480 s_and_saveexec_b64 s[0:1], vcc ; BE80246A s_xor_b64 s[0:1], exec, s[0:1] ; 8980007E v_floor_f32_e32 v13, v10 ; 7E1A490A s_or_saveexec_b64 s[0:1], s[0:1] ; BE802500 s_xor_b64 exec, exec, s[0:1] ; 89FE007E v_floor_f32_e64 v13, -v10 ; D348000D 2000010A v_xor_b32_e32 v13, 0x80000000, v13 ; 3A1A1AFF 80000000 s_or_b64 exec, exec, s[0:1] ; 88FE007E v_mul_f32_e32 v6, v14, v6 ; 100C0D0E v_mul_f32_e32 v6, s3, v6 ; 100C0C03 v_cmp_ngt_f32_e32 vcc, 0, v6 ; 7C160C80 s_and_saveexec_b64 s[8:9], vcc ; BE88246A s_xor_b64 s[8:9], exec, s[8:9] ; 8988087E v_floor_f32_e32 v14, v6 ; 7E1C4906 s_or_saveexec_b64 s[8:9], s[8:9] ; BE882508 s_buffer_load_dword s0, s[12:15], 0x3 ; C2000D03 s_buffer_load_dword s10, s[12:15], 0x10 ; C2050D10 s_buffer_load_dword s11, s[12:15], 0x11 ; C2058D11 s_buffer_load_dword s16, s[12:15], 0x12 ; C2080D12 s_buffer_load_dword s17, s[12:15], 0x14 ; C2088D14 s_buffer_load_dword s18, s[12:15], 0x15 ; C2090D15 s_buffer_load_dword s19, s[12:15], 0x16 ; C2098D16 s_buffer_load_dword s1, s[12:15], 0x28 ; C2008D28 s_buffer_load_dword s2, s[12:15], 0x38 ; C2010D38 s_waitcnt lgkmcnt(0) ; BF8C007F s_xor_b64 exec, exec, s[8:9] ; 89FE087E v_floor_f32_e64 v14, -v6 ; D348000E 20000106 v_xor_b32_e32 v14, 0x80000000, v14 ; 3A1C1CFF 80000000 s_or_b64 exec, exec, s[8:9] ; 88FE087E v_mul_f32_e32 v7, v7, v7 ; 100E0F07 v_mul_f32_e32 v15, s17, v7 ; 101E0E11 v_mul_f32_e32 v16, s18, v7 ; 10200E12 v_mul_f32_e32 v7, s19, v7 ; 100E0E13 v_mac_f32_e32 v1, s10, v8 ; 3E02100A v_mac_f32_e32 v4, s11, v8 ; 3E08100B v_mac_f32_e32 v5, s16, v8 ; 3E0A1010 v_rcp_f32_e32 v8, s3 ; 7E105403 v_mov_b32_e32 v17, 0x3e99999a ; 7E2202FF 3E99999A v_mul_f32_e32 v9, v17, v9 ; 10121311 v_mac_f32_e32 v1, s10, v9 ; 3E02120A v_mac_f32_e32 v4, s11, v9 ; 3E08120B v_mac_f32_e32 v5, s16, v9 ; 3E0A1210 v_mac_f32_e32 v1, s1, v15 ; 3E021E01 v_mac_f32_e32 v4, s1, v16 ; 3E082001 v_mac_f32_e32 v5, s1, v7 ; 3E0A0E01 v_mov_b32_e32 v7, 0x41800000 ; 7E0E02FF 41800000 v_mov_b32_e32 v9, 0x40400000 ; 7E1202FF 40400000 v_floor_f32_e32 v15, v11 ; 7E1E490B v_subrev_f32_e32 v11, v15, v11 ; 0A16170F v_floor_f32_e32 v15, v10 ; 7E1E490A v_subrev_f32_e32 v10, v15, v10 ; 0A14150F v_mul_f32_e32 v15, v2, v2 ; 101E0502 v_mac_f32_e32 v15, v3, v3 ; 3E1E0703 v_mac_f32_e32 v15, v0, v0 ; 3E1E0100 v_rsq_clamp_f32_e32 v15, v15 ; 7E1E590F v_mul_f32_e32 v18, v8, v12 ; 10241908 v_mul_f32_e32 v19, v8, v13 ; 10261B08 v_mul_f32_e32 v20, v8, v14 ; 10281D08 s_load_dwordx4 s[8:11], s[4:5], 0x4 ; C0840504 s_load_dwordx8 s[12:19], s[6:7], 0x8 ; C0C60708 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[12:14], 7, 0, 0, 0, 0, 0, 0, 0, v[18:19], s[12:19], s[8:11] ; F0800700 00430C12 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v8, -0.5, v12 ; 061018F1 v_add_f32_e32 v12, -0.5, v13 ; 06181AF1 v_add_f32_e32 v13, -0.5, v14 ; 061A1CF1 image_sample v[21:23], 7, 0, 0, 0, 0, 0, 0, 0, v[19:20], s[12:19], s[8:11] ; F0800700 00431513 v_mov_b32_e32 v19, v20 ; 7E260314 image_sample v[18:20], 7, 0, 0, 0, 0, 0, 0, 0, v[18:19], s[12:19], s[8:11] ; F0800700 00431212 s_waitcnt vmcnt(1) ; BF8C0771 v_add_f32_e32 v14, -0.5, v21 ; 061C2AF1 v_add_f32_e32 v16, -0.5, v22 ; 06202CF1 v_add_f32_e32 v21, -0.5, v23 ; 062A2EF1 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v18, -0.5, v18 ; 062424F1 v_add_f32_e32 v19, -0.5, v19 ; 062626F1 v_add_f32_e32 v20, -0.5, v20 ; 062828F1 v_add_f32_e32 v8, v14, v8 ; 0610110E v_add_f32_e32 v12, v16, v12 ; 06181910 v_add_f32_e32 v13, v21, v13 ; 061A1B15 v_add_f32_e32 v8, v18, v8 ; 06101112 v_add_f32_e32 v12, v19, v12 ; 06181913 v_add_f32_e32 v13, v20, v13 ; 061A1B14 v_mul_f32_e32 v14, v8, v8 ; 101C1108 v_mac_f32_e32 v14, v12, v12 ; 3E1C190C v_mac_f32_e32 v14, v13, v13 ; 3E1C1B0D v_rsq_clamp_f32_e32 v14, v14 ; 7E1C590E v_mul_f32_e32 v16, v14, v12 ; 1020190E v_mad_f32 v18, v14, v8, v16 ; D2820012 0442110E v_mac_f32_e32 v18, v14, v13 ; 3E241B0E v_mul_f32_e32 v19, v7, v18 ; 10262507 v_floor_f32_e32 v19, v19 ; 7E264913 v_mad_f32 v7, v18, v7, -v19 ; D2820007 844E0F12 v_mul_f32_e32 v2, v15, v2 ; 1004050F v_mul_f32_e32 v3, v15, v3 ; 1006070F v_mul_f32_e32 v0, v15, v0 ; 1000010F s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 v_mad_f32 v7, 4.0, v7, -2.0 ; D2820007 03D60EF6 v_max_f32_e32 v7, 0, v7 ; 200E0E80 v_min_f32_e32 v7, v9, v7 ; 1E0E0F09 v_mul_f32_e32 v7, v7, v7 ; 100E0F07 v_mul_f32_e32 v18, 0.5, v2 ; 102404F0 v_mul_f32_e32 v19, 0.5, v0 ; 102600F0 v_mul_f32_e32 v15, 0x3e800000, v7 ; 101E0EFF 3E800000 v_mul_f32_e32 v20, 0.5, v3 ; 102806F0 v_log_f32_e32 v15, v15 ; 7E1E4F0F v_mov_b32_e32 v21, v19 ; 7E2A0313 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[22:24], 7, 0, 0, 0, 0, 0, 0, 0, v[18:19], s[12:19], s[8:11] ; F0800700 00431612 v_mov_b32_e32 v19, v20 ; 7E260314 v_mov_b32_e32 v25, 0xbe99999a ; 7E3202FF BE99999A v_add_f32_e64 v0, |v0|, v25 ; D2060100 00023300 image_sample v[26:28], 7, 0, 0, 0, 0, 0, 0, 0, v[20:21], s[12:19], s[8:11] ; F0800700 00431A14 image_sample v[18:20], 7, 0, 0, 0, 0, 0, 0, 0, v[18:19], s[12:19], s[8:11] ; F0800700 00431212 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v18, v0, v18 ; 10242500 v_mul_f32_e32 v19, v0, v19 ; 10262700 v_mul_f32_e32 v0, v0, v20 ; 10002900 v_mul_legacy_f32_e32 v15, 0x447a0000, v15 ; 0E1E1EFF 447A0000 v_exp_f32_e32 v15, v15 ; 7E1E4B0F v_add_f32_e64 v15, 0, v15 clamp ; D206080F 00021E80 v_mov_b32_e32 v20, 0x40c00000 ; 7E2802FF 40C00000 v_mad_f32 v9, -v15, v9, v20 ; D2820009 2452130F v_mul_f32_e32 v15, v14, v8 ; 101E110E v_mad_f32 v8, v8, v14, -0.5 ; D2820008 03C61D08 v_mad_f32 v12, v12, v14, -0.5 ; D282000C 03C61D0C v_mul_f32_e32 v20, v14, v13 ; 10281B0E v_mad_f32 v13, v13, v14, -0.5 ; D282000D 03C61D0D v_floor_f32_e32 v14, v6 ; 7E1C4906 v_subrev_f32_e32 v6, v14, v6 ; 0A0C0D0E v_mov_b32_e32 v14, 0xbf333333 ; 7E1C02FF BF333333 v_add_f32_e32 v11, v14, v11 ; 0616170E v_add_f32_e32 v10, v14, v10 ; 0614150E v_add_f32_e32 v6, v14, v6 ; 060C0D0E v_mac_f32_e32 v11, 0.5, v8 ; 3E1610F0 v_mac_f32_e32 v10, 0.5, v12 ; 3E1418F0 v_mac_f32_e32 v6, 0.5, v13 ; 3E0C1AF0 v_sub_f32_e64 v8, 1.0, s2 ; D2080008 000004F2 v_mad_f32 v12, s2, v15, v8 ; D282000C 04221E02 v_mad_f32 v13, s2, v16, v8 ; D282000D 04222002 v_mac_f32_e32 v8, s2, v20 ; 3E102802 v_mul_f32_e32 v11, v11, v11 ; 1016170B v_mac_f32_e32 v11, v10, v10 ; 3E16150A v_mac_f32_e32 v11, v6, v6 ; 3E160D06 v_sqrt_f32_e32 v6, v11 ; 7E0C670B v_mov_b32_e32 v10, 0xbe19999a ; 7E1402FF BE19999A v_add_f32_e32 v6, v6, v10 ; 060C1506 v_mov_b32_e32 v10, 0x3e19999a ; 7E1402FF 3E19999A v_madmk_f32_e32 v6, v6, v10, 0xc0d554ca ; 400C1506 C0D554CA v_add_f32_e64 v6, 0, v6 clamp ; D2060806 00020C80 v_log_f32_e32 v6, v6 ; 7E0C4F06 v_mul_f32_e32 v10, v12, v7 ; 10140F0C v_mul_f32_e32 v11, v13, v7 ; 10160F0D v_mul_f32_e32 v7, v8, v7 ; 100E0F08 v_mul_legacy_f32_e32 v6, v9, v6 ; 0E0C0D09 v_exp_f32_e32 v6, v6 ; 7E0C4B06 v_mul_f32_e32 v8, v6, v10 ; 10101506 v_mul_f32_e32 v9, v6, v11 ; 10121706 v_mul_f32_e32 v6, v6, v7 ; 100C0F06 v_mul_f32_e32 v7, v17, v1 ; 100E0311 v_madmk_f32_e32 v7, v4, v7, 0x3f19999a ; 400E0F04 3F19999A v_madmk_f32_e32 v7, v5, v7, 0x3dcccccd ; 400E0F05 3DCCCCCD v_sub_f32_e32 v7, 0x3f8ccccd, v7 ; 080E0EFF 3F8CCCCD v_add_f32_e64 v8, 0, v8 clamp ; D2060808 00021080 v_add_f32_e64 v9, 0, v9 clamp ; D2060809 00021280 v_add_f32_e64 v6, 0, v6 clamp ; D2060806 00020C80 v_mul_f32_e32 v10, v7, v7 ; 10140F07 v_mad_f32 v10, v10, v10, v25 ; D282000A 0466150A v_add_f32_e64 v10, 0, v10 clamp ; D206080A 00021480 v_mul_f32_e32 v8, v10, v8 ; 1010110A v_mul_f32_e32 v9, v10, v9 ; 1012130A v_mul_f32_e32 v6, v10, v6 ; 100C0D0A v_mac_f32_e32 v1, s1, v8 ; 3E021001 v_mac_f32_e32 v4, s1, v9 ; 3E081201 v_mac_f32_e32 v5, s1, v6 ; 3E0A0C01 v_add_f32_e64 v3, |v3|, v25 ; D2060103 00023303 v_add_f32_e64 v2, |v2|, v25 ; D2060102 00023302 v_mac_f32_e32 v18, v2, v26 ; 3E243502 v_mac_f32_e32 v19, v2, v27 ; 3E263702 v_mac_f32_e32 v0, v2, v28 ; 3E003902 v_mac_f32_e32 v18, v3, v22 ; 3E242D03 v_mac_f32_e32 v19, v3, v23 ; 3E262F03 v_mac_f32_e32 v0, v3, v24 ; 3E003103 v_add_f32_e32 v2, -0.5, v7 ; 06040EF1 v_max_f32_e32 v2, 0, v2 ; 20040480 v_mul_f32_e32 v3, v2, v18 ; 10062502 v_mul_f32_e32 v6, v2, v19 ; 100C2702 v_mul_f32_e32 v0, v2, v0 ; 10000102 v_mov_b32_e32 v2, 0x3d23d70a ; 7E0402FF 3D23D70A v_mac_f32_e32 v1, v2, v3 ; 3E020702 v_mac_f32_e32 v4, v2, v6 ; 3E080D02 v_mac_f32_e32 v5, v2, v0 ; 3E0A0102 v_cvt_pkrtz_f16_f32_e32 v0, v1, v4 ; 5E000901 v_cvt_pkrtz_f16_f32_e64 v1, v5, s0 ; D25E0001 00000105 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 32 Code Size: 1688 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL CONST[0..3] DCL TEMP[0..1], LOCAL 0: MUL TEMP[0], CONST[0], IN[0].xxxx 1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0] 4: MOV TEMP[1].xyz, IN[0].xyzx 5: MOV OUT[1], TEMP[1] 6: MOV OUT[0], TEMP[0] 7: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = fmul float %13, %33 %38 = fmul float %14, %33 %39 = fmul float %15, %33 %40 = fmul float %16, %33 %41 = fmul float %17, %34 %42 = fadd float %41, %37 %43 = fmul float %18, %34 %44 = fadd float %43, %38 %45 = fmul float %19, %34 %46 = fadd float %45, %39 %47 = fmul float %20, %34 %48 = fadd float %47, %40 %49 = fmul float %21, %35 %50 = fadd float %49, %42 %51 = fmul float %22, %35 %52 = fadd float %51, %44 %53 = fmul float %23, %35 %54 = fadd float %53, %46 %55 = fmul float %24, %35 %56 = fadd float %55, %48 %57 = fmul float %25, %36 %58 = fadd float %57, %50 %59 = fmul float %26, %36 %60 = fadd float %59, %52 %61 = fmul float %27, %36 %62 = fadd float %61, %54 %63 = fmul float %28, %36 %64 = fadd float %63, %56 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %33, float %34, float %35, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %58, float %60, float %62, float %64) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_mov_b32_e32 v1, 0 ; 7E020280 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v2, v3, v4, v1 ; F800020F 01040302 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s4, v2 ; 10000404 v_mac_f32_e32 v0, s8, v3 ; 3E000608 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v1, s5, v2 ; 10020405 v_mac_f32_e32 v1, s9, v3 ; 3E020609 v_mul_f32_e32 v6, s6, v2 ; 100C0406 v_mul_f32_e32 v2, s7, v2 ; 10040407 v_mac_f32_e32 v6, s10, v3 ; 3E0C060A v_mac_f32_e32 v2, s11, v3 ; 3E04060B v_mac_f32_e32 v0, s12, v4 ; 3E00080C v_mac_f32_e32 v1, s13, v4 ; 3E02080D v_mac_f32_e32 v6, s14, v4 ; 3E0C080E v_mac_f32_e32 v2, s15, v4 ; 3E04080F v_mac_f32_e32 v0, s16, v5 ; 3E000A10 v_mac_f32_e32 v1, s17, v5 ; 3E020A11 v_mac_f32_e32 v6, s18, v5 ; 3E0C0A12 v_mac_f32_e32 v2, s0, v5 ; 3E040A00 exp 15, 12, 0, 1, 0, v0, v1, v6, v2 ; F80008CF 02060100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 8 Code Size: 188 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL CONST[0..1] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 1.0000, 1.5000, 0.0000, 0.0000} 0: MOV TEMP[0].xyz, CONST[0].xyzx 1: RCP TEMP[1].x, CONST[1].xxxx 2: MUL TEMP[1].x, IN[0].yyyy, TEMP[1].xxxx 3: MOV_SAT TEMP[1].x, TEMP[1].xxxx 4: ADD TEMP[1].x, IMM[0].xxxx, -TEMP[1].xxxx 5: POW TEMP[1].x, TEMP[1].xxxx, IMM[0].yyyy 6: MOV TEMP[0].w, TEMP[1].xxxx 7: MOV OUT[0], TEMP[0] 8: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %28 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %29 = fdiv float 1.000000e+00, %27 %30 = fmul float %28, %29 %31 = call float @llvm.AMDIL.clamp.(float %30, float 0.000000e+00, float 1.000000e+00) %32 = fsub float 1.000000e+00, %31 %33 = call float @llvm.pow.f32(float %32, float 1.500000e+00) %34 = call i32 @llvm.SI.packf16(float %24, float %25) %35 = bitcast i32 %34 to float %36 = call i32 @llvm.SI.packf16(float %26, float %33) %37 = bitcast i32 %36 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %35, float %37, float %35, float %37) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_mov_b32 m0, s9 ; BEFC0309 s_waitcnt lgkmcnt(0) ; BF8C007F v_rcp_f32_e32 v2, s4 ; 7E045404 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s0, s[0:3], 0x2 ; C2000102 v_interp_p1_f32 v0, v0, 1, 0, [m0] ; C8000100 v_interp_p2_f32 v0, [v0], v1, 1, 0, [m0] ; C8010101 v_mul_f32_e32 v0, v2, v0 ; 10000102 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_sub_f32_e32 v0, 1.0, v0 ; 080000F2 v_log_f32_e32 v0, v0 ; 7E004F00 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s5 ; 7E020205 v_mul_legacy_f32_e32 v0, 0x3fc00000, v0 ; 0E0000FF 3FC00000 v_exp_f32_e32 v0, v0 ; 7E004B00 v_cvt_pkrtz_f16_f32_e32 v1, s4, v1 ; 5E020204 v_cvt_pkrtz_f16_f32_e32 v0, s0, v0 ; 5E000000 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 4 Code Size: 104 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL OUT[5], GENERIC[4] DCL CONST[0..19] DCL TEMP[0..7], LOCAL IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MUL TEMP[0], CONST[5], IN[0].xxxx 1: MAD TEMP[0], CONST[6], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[7], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0].xyz, CONST[8], IN[0].wwww, TEMP[0] 4: MUL TEMP[1], CONST[16], IN[0].xxxx 5: MAD TEMP[1], CONST[17], IN[0].yyyy, TEMP[1] 6: MAD TEMP[1], CONST[18], IN[0].zzzz, TEMP[1] 7: MAD TEMP[1], CONST[19], IN[0].wwww, TEMP[1] 8: MAD TEMP[2].xy, IN[2].xyyy, CONST[13].xyyy, CONST[13].zwww 9: FSEQ TEMP[3].x, CONST[15].xxxx, IMM[0].xxxx 10: UIF TEMP[3].xxxx :0 11: MOV TEMP[3].xy, IN[2].xyxx 12: ELSE :0 13: MOV TEMP[3].xy, IN[3].xyxx 14: ENDIF 15: MAD TEMP[3].xy, TEMP[3].xyyy, CONST[14].xyyy, CONST[14].zwww 16: MOV TEMP[2].zw, TEMP[3].yyxy 17: MOV TEMP[3].x, CONST[9].xxxx 18: MOV TEMP[3].y, CONST[10].xxxx 19: MOV TEMP[3].z, CONST[11].xxxx 20: MOV TEMP[4].x, CONST[9].yyyy 21: MOV TEMP[4].y, CONST[10].yyyy 22: MOV TEMP[4].z, CONST[11].yyyy 23: MOV TEMP[5].x, CONST[9].zzzz 24: MOV TEMP[5].y, CONST[10].zzzz 25: MOV TEMP[5].z, CONST[11].zzzz 26: MUL TEMP[3].xyz, TEMP[3].xyzz, IN[1].xxxx 27: MAD TEMP[3].xyz, TEMP[4].xyzz, IN[1].yyyy, TEMP[3].xyzz 28: MAD TEMP[3].xyz, TEMP[5].xyzz, IN[1].zzzz, TEMP[3].xyzz 29: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz 30: RSQ TEMP[4].x, TEMP[4].xxxx 31: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx 32: MOV TEMP[4].xyz, TEMP[3].xyzx 33: MUL TEMP[5], TEMP[3].xyzz, TEMP[3].yzzx 34: DP4 TEMP[6].x, CONST[1], TEMP[5] 35: DP4 TEMP[7].x, CONST[2], TEMP[5] 36: MOV TEMP[6].y, TEMP[7].xxxx 37: DP4 TEMP[5].x, CONST[3], TEMP[5] 38: MOV TEMP[6].z, TEMP[5].xxxx 39: MUL TEMP[5].x, TEMP[3].yyyy, TEMP[3].yyyy 40: MAD TEMP[3].x, TEMP[3].xxxx, TEMP[3].xxxx, -TEMP[5].xxxx 41: MAD TEMP[3].xyz, CONST[4].xyzz, TEMP[3].xxxx, TEMP[6].xyzz 42: ADD TEMP[5].xyz, TEMP[0].xyzz, -CONST[0].xyzz 43: MOV TEMP[5].yzw, TEMP[5].yxyz 44: MOV TEMP[5].x, TEMP[1].zzzz 45: MOV TEMP[0].xyz, TEMP[0].xyzx 46: MOV OUT[5], TEMP[0] 47: MOV OUT[1], TEMP[2] 48: MOV OUT[2], TEMP[4] 49: MOV OUT[3], TEMP[3] 50: MOV OUT[0], TEMP[1] 51: MOV OUT[4], TEMP[5] 52: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236) %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240) %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256) %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260) %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264) %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 268) %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272) %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276) %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280) %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284) %72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288) %73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292) %74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296) %75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300) %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304) %77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308) %78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312) %79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316) %80 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %81 = load <16 x i8>, <16 x i8> addrspace(2)* %80, align 16, !tbaa !0 %82 = add i32 %5, %7 %83 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %81, i32 0, i32 %82) %84 = extractelement <4 x float> %83, i32 0 %85 = extractelement <4 x float> %83, i32 1 %86 = extractelement <4 x float> %83, i32 2 %87 = extractelement <4 x float> %83, i32 3 %88 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %89 = load <16 x i8>, <16 x i8> addrspace(2)* %88, align 16, !tbaa !0 %90 = add i32 %5, %7 %91 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %89, i32 0, i32 %90) %92 = extractelement <4 x float> %91, i32 0 %93 = extractelement <4 x float> %91, i32 1 %94 = extractelement <4 x float> %91, i32 2 %95 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %96 = load <16 x i8>, <16 x i8> addrspace(2)* %95, align 16, !tbaa !0 %97 = add i32 %5, %7 %98 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %96, i32 0, i32 %97) %99 = extractelement <4 x float> %98, i32 0 %100 = extractelement <4 x float> %98, i32 1 %101 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %102 = load <16 x i8>, <16 x i8> addrspace(2)* %101, align 16, !tbaa !0 %103 = add i32 %5, %7 %104 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %102, i32 0, i32 %103) %105 = extractelement <4 x float> %104, i32 0 %106 = extractelement <4 x float> %104, i32 1 %107 = fmul float %31, %84 %108 = fmul float %32, %84 %109 = fmul float %33, %84 %110 = fmul float %34, %84 %111 = fmul float %35, %85 %112 = fadd float %111, %107 %113 = fmul float %36, %85 %114 = fadd float %113, %108 %115 = fmul float %37, %85 %116 = fadd float %115, %109 %117 = fmul float %38, %85 %118 = fadd float %117, %110 %119 = fmul float %39, %86 %120 = fadd float %119, %112 %121 = fmul float %40, %86 %122 = fadd float %121, %114 %123 = fmul float %41, %86 %124 = fadd float %123, %116 %125 = fmul float %42, %86 %126 = fadd float %125, %118 %127 = fmul float %43, %87 %128 = fadd float %127, %120 %129 = fmul float %44, %87 %130 = fadd float %129, %122 %131 = fmul float %45, %87 %132 = fadd float %131, %124 %133 = fmul float %64, %84 %134 = fmul float %65, %84 %135 = fmul float %66, %84 %136 = fmul float %67, %84 %137 = fmul float %68, %85 %138 = fadd float %137, %133 %139 = fmul float %69, %85 %140 = fadd float %139, %134 %141 = fmul float %70, %85 %142 = fadd float %141, %135 %143 = fmul float %71, %85 %144 = fadd float %143, %136 %145 = fmul float %72, %86 %146 = fadd float %145, %138 %147 = fmul float %73, %86 %148 = fadd float %147, %140 %149 = fmul float %74, %86 %150 = fadd float %149, %142 %151 = fmul float %75, %86 %152 = fadd float %151, %144 %153 = fmul float %76, %87 %154 = fadd float %153, %146 %155 = fmul float %77, %87 %156 = fadd float %155, %148 %157 = fmul float %78, %87 %158 = fadd float %157, %150 %159 = fmul float %79, %87 %160 = fadd float %159, %152 %161 = fmul float %99, %55 %162 = fadd float %161, %57 %163 = fmul float %100, %56 %164 = fadd float %163, %58 %165 = fcmp oeq float %63, 0.000000e+00 %. = select i1 %165, float %99, float %105 %.32 = select i1 %165, float %100, float %106 %166 = fmul float %., %59 %167 = fadd float %166, %61 %168 = fmul float %.32, %60 %169 = fadd float %168, %62 %170 = fmul float %46, %92 %171 = fmul float %49, %92 %172 = fmul float %52, %92 %173 = fmul float %47, %93 %174 = fadd float %173, %170 %175 = fmul float %50, %93 %176 = fadd float %175, %171 %177 = fmul float %53, %93 %178 = fadd float %177, %172 %179 = fmul float %48, %94 %180 = fadd float %179, %174 %181 = fmul float %51, %94 %182 = fadd float %181, %176 %183 = fmul float %54, %94 %184 = fadd float %183, %178 %185 = fmul float %180, %180 %186 = fmul float %182, %182 %187 = fadd float %186, %185 %188 = fmul float %184, %184 %189 = fadd float %187, %188 %190 = call float @llvm.AMDGPU.rsq.clamped.f32(float %189) %191 = fmul float %180, %190 %192 = fmul float %182, %190 %193 = fmul float %184, %190 %194 = fmul float %191, %192 %195 = fmul float %192, %193 %196 = fmul float %193, %193 %197 = fmul float %193, %191 %198 = fmul float %16, %194 %199 = fmul float %17, %195 %200 = fadd float %198, %199 %201 = fmul float %18, %196 %202 = fadd float %200, %201 %203 = fmul float %19, %197 %204 = fadd float %202, %203 %205 = fmul float %20, %194 %206 = fmul float %21, %195 %207 = fadd float %205, %206 %208 = fmul float %22, %196 %209 = fadd float %207, %208 %210 = fmul float %23, %197 %211 = fadd float %209, %210 %212 = fmul float %24, %194 %213 = fmul float %25, %195 %214 = fadd float %212, %213 %215 = fmul float %26, %196 %216 = fadd float %214, %215 %217 = fmul float %27, %197 %218 = fadd float %216, %217 %219 = fmul float %192, %192 %220 = fmul float %191, %191 %221 = fsub float %220, %219 %222 = fmul float %28, %221 %223 = fadd float %222, %204 %224 = fmul float %29, %221 %225 = fadd float %224, %211 %226 = fmul float %30, %221 %227 = fadd float %226, %218 %228 = fsub float %128, %13 %229 = fsub float %130, %14 %230 = fsub float %132, %15 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %162, float %164, float %167, float %169) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %191, float %192, float %193, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %223, float %225, float %227, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %158, float %228, float %229, float %230) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %128, float %130, float %132, float %126) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %154, float %156, float %158, float %160) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0 ; 7E020280 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[20:23], s[2:3], 0x0 ; C08A0300 s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 s_load_dwordx4 s[12:15], s[8:9], 0x8 ; C0860908 s_load_dwordx4 s[8:11], s[8:9], 0xc ; C084090C s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s18, s[20:23], 0x20 ; C2091520 buffer_load_format_xyzw v[2:5], v0, s[0:3], 0 idxen ; E00C2000 80000200 buffer_load_format_xyzw v[6:9], v0, s[4:7], 0 idxen ; E00C2000 80010600 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[9:12], v0, s[12:15], 0 idxen ; E00C2000 80030900 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[11:14], v0, s[8:11], 0 idxen ; E00C2000 80020B00 s_buffer_load_dword s19, s[20:23], 0x21 ; C2099521 s_buffer_load_dword s24, s[20:23], 0x22 ; C20C1522 s_buffer_load_dword s25, s[20:23], 0x24 ; C20C9524 s_buffer_load_dword s26, s[20:23], 0x25 ; C20D1525 s_buffer_load_dword s27, s[20:23], 0x26 ; C20D9526 s_buffer_load_dword s28, s[20:23], 0x28 ; C20E1528 s_buffer_load_dword s29, s[20:23], 0x29 ; C20E9529 s_buffer_load_dword s30, s[20:23], 0x2a ; C20F152A s_buffer_load_dword s31, s[20:23], 0x2c ; C20F952C s_buffer_load_dword s32, s[20:23], 0x2d ; C210152D s_buffer_load_dword s33, s[20:23], 0x2e ; C210952E s_buffer_load_dword s34, s[20:23], 0x34 ; C2111534 s_buffer_load_dword s35, s[20:23], 0x35 ; C2119535 s_buffer_load_dword s5, s[20:23], 0x36 ; C2029536 s_buffer_load_dword s0, s[20:23], 0x0 ; C2001500 s_buffer_load_dword s1, s[20:23], 0x1 ; C2009501 s_buffer_load_dword s2, s[20:23], 0x2 ; C2011502 s_buffer_load_dword s6, s[20:23], 0x4 ; C2031504 s_buffer_load_dword s14, s[20:23], 0x5 ; C2071505 s_buffer_load_dword s4, s[20:23], 0x6 ; C2021506 s_buffer_load_dword s3, s[20:23], 0x7 ; C2019507 s_buffer_load_dword s12, s[20:23], 0x8 ; C2061508 s_buffer_load_dword s16, s[20:23], 0x9 ; C2081509 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v0, s5 ; 7E000205 s_buffer_load_dword s7, s[20:23], 0xa ; C203950A s_buffer_load_dword s5, s[20:23], 0xb ; C202950B s_buffer_load_dword s15, s[20:23], 0xc ; C207950C s_buffer_load_dword s17, s[20:23], 0xd ; C208950D s_buffer_load_dword s13, s[20:23], 0xe ; C206950E s_buffer_load_dword s8, s[20:23], 0xf ; C204150F s_buffer_load_dword s36, s[20:23], 0x3c ; C212153C s_buffer_load_dword s37, s[20:23], 0x40 ; C2129540 s_buffer_load_dword s38, s[20:23], 0x41 ; C2131541 s_buffer_load_dword s39, s[20:23], 0x42 ; C2139542 s_buffer_load_dword s40, s[20:23], 0x43 ; C2141543 s_buffer_load_dword s9, s[20:23], 0x10 ; C2049510 s_buffer_load_dword s10, s[20:23], 0x11 ; C2051511 s_buffer_load_dword s11, s[20:23], 0x12 ; C2059512 s_buffer_load_dword s41, s[20:23], 0x14 ; C2149514 s_buffer_load_dword s42, s[20:23], 0x15 ; C2151515 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_eq_f32_e64 vcc, 0, s36 ; D004006A 00004880 s_buffer_load_dword s36, s[20:23], 0x16 ; C2121516 s_buffer_load_dword s43, s[20:23], 0x17 ; C2159517 s_buffer_load_dword s44, s[20:23], 0x18 ; C2161518 s_buffer_load_dword s45, s[20:23], 0x19 ; C2169519 s_buffer_load_dword s46, s[20:23], 0x1a ; C217151A s_buffer_load_dword s47, s[20:23], 0x37 ; C2179537 s_buffer_load_dword s48, s[20:23], 0x38 ; C2181538 s_buffer_load_dword s49, s[20:23], 0x39 ; C2189539 s_buffer_load_dword s50, s[20:23], 0x3a ; C219153A s_buffer_load_dword s51, s[20:23], 0x3b ; C219953B s_buffer_load_dword s52, s[20:23], 0x1b ; C21A151B s_buffer_load_dword s53, s[20:23], 0x1c ; C21A951C s_buffer_load_dword s54, s[20:23], 0x1d ; C21B151D s_buffer_load_dword s55, s[20:23], 0x1e ; C21B951E s_buffer_load_dword s56, s[20:23], 0x1f ; C21C151F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v13, s47 ; 7E1A022F s_buffer_load_dword s47, s[20:23], 0x44 ; C2179544 s_buffer_load_dword s57, s[20:23], 0x45 ; C21C9545 s_buffer_load_dword s58, s[20:23], 0x46 ; C21D1546 v_mov_b32_e32 v14, s50 ; 7E1C0232 s_buffer_load_dword s50, s[20:23], 0x47 ; C2191547 v_mov_b32_e32 v15, s51 ; 7E1E0233 s_buffer_load_dword s51, s[20:23], 0x48 ; C2199548 s_buffer_load_dword s59, s[20:23], 0x49 ; C21D9549 s_buffer_load_dword s60, s[20:23], 0x4a ; C21E154A s_buffer_load_dword s61, s[20:23], 0x4b ; C21E954B s_buffer_load_dword s62, s[20:23], 0x4c ; C21F154C s_buffer_load_dword s63, s[20:23], 0x4d ; C21F954D s_buffer_load_dword s64, s[20:23], 0x4e ; C220154E s_buffer_load_dword s20, s[20:23], 0x4f ; C20A154F v_mul_f32_e32 v16, s41, v2 ; 10200429 v_mac_f32_e32 v0, s34, v9 ; 3E001222 v_mul_f32_e32 v17, s42, v2 ; 1022042A v_mul_f32_e32 v18, s36, v2 ; 10240424 v_mul_f32_e32 v19, s43, v2 ; 1026042B v_mac_f32_e32 v13, s35, v10 ; 3E1A1423 v_mul_f32_e32 v20, s25, v6 ; 10280C19 v_mul_f32_e32 v21, s28, v6 ; 102A0C1C v_mul_f32_e32 v6, s31, v6 ; 100C0C1F v_mac_f32_e32 v16, s44, v3 ; 3E20062C v_mac_f32_e32 v17, s45, v3 ; 3E22062D v_mac_f32_e32 v18, s46, v3 ; 3E24062E v_mac_f32_e32 v20, s26, v7 ; 3E280E1A v_mac_f32_e32 v21, s29, v7 ; 3E2A0E1D v_cndmask_b32_e32 v9, v11, v9 ; 0012130B v_cndmask_b32_e32 v10, v12, v10 ; 0014150C v_mac_f32_e32 v6, s32, v7 ; 3E0C0E20 v_mac_f32_e32 v20, s27, v8 ; 3E28101B v_mac_f32_e32 v21, s30, v8 ; 3E2A101E v_mac_f32_e32 v6, s33, v8 ; 3E0C1021 v_mac_f32_e32 v19, s52, v3 ; 3E260634 v_mul_f32_e32 v7, s37, v2 ; 100E0425 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v7, s47, v3 ; 3E0E062F v_mul_f32_e32 v8, s38, v2 ; 10100426 v_mac_f32_e32 v8, s57, v3 ; 3E100639 v_mul_f32_e32 v11, s39, v2 ; 10160427 v_mac_f32_e32 v11, s58, v3 ; 3E16063A v_mul_f32_e32 v2, s40, v2 ; 10040428 v_mac_f32_e32 v2, s50, v3 ; 3E040632 v_mac_f32_e32 v16, s53, v4 ; 3E200835 v_mac_f32_e32 v17, s54, v4 ; 3E220836 v_mac_f32_e32 v18, s55, v4 ; 3E240837 v_mac_f32_e32 v19, s56, v4 ; 3E260838 v_mac_f32_e32 v7, s51, v4 ; 3E0E0833 v_mac_f32_e32 v8, s59, v4 ; 3E10083B v_mac_f32_e32 v11, s60, v4 ; 3E16083C v_mac_f32_e32 v2, s61, v4 ; 3E04083D v_mac_f32_e32 v16, s18, v5 ; 3E200A12 v_mac_f32_e32 v17, s19, v5 ; 3E220A13 v_mac_f32_e32 v18, s24, v5 ; 3E240A18 v_mac_f32_e32 v7, s62, v5 ; 3E0E0A3E v_mac_f32_e32 v8, s63, v5 ; 3E100A3F v_mac_f32_e32 v11, s64, v5 ; 3E160A40 v_mac_f32_e32 v2, s20, v5 ; 3E040A14 v_mul_f32_e32 v3, v20, v20 ; 10062914 v_mac_f32_e32 v3, v21, v21 ; 3E062B15 v_mac_f32_e32 v3, v6, v6 ; 3E060D06 v_rsq_clamp_f32_e32 v3, v3 ; 7E065903 v_mac_f32_e32 v14, s48, v9 ; 3E1C1230 v_mac_f32_e32 v15, s49, v10 ; 3E1E1431 exp 15, 32, 0, 0, 0, v0, v13, v14, v15 ; F800020F 0F0E0D00 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, v3, v20 ; 10002903 v_mul_f32_e32 v4, v3, v21 ; 10082B03 v_mul_f32_e32 v3, v3, v6 ; 10060D03 v_mul_f32_e32 v5, v3, v4 ; 100A0903 v_mul_f32_e32 v6, s14, v5 ; 100C0A0E v_mul_f32_e32 v9, s16, v5 ; 10120A10 v_mul_f32_e32 v5, s17, v5 ; 100A0A11 v_mul_f32_e32 v10, v4, v0 ; 10140104 v_mac_f32_e32 v6, s6, v10 ; 3E0C1406 v_mac_f32_e32 v9, s12, v10 ; 3E12140C v_mac_f32_e32 v5, s15, v10 ; 3E0A140F v_mul_f32_e32 v10, v3, v3 ; 10140703 v_mac_f32_e32 v6, s4, v10 ; 3E0C1404 v_mac_f32_e32 v9, s7, v10 ; 3E121407 v_mac_f32_e32 v5, s13, v10 ; 3E0A140D v_mul_f32_e32 v10, v0, v3 ; 10140700 v_mac_f32_e32 v6, s3, v10 ; 3E0C1403 v_mac_f32_e32 v9, s5, v10 ; 3E121405 v_mac_f32_e32 v5, s8, v10 ; 3E0A1408 v_mul_f32_e32 v10, v4, v4 ; 10140904 v_mad_f32 v10, v0, v0, -v10 ; D282000A 842A0100 v_mac_f32_e32 v6, s9, v10 ; 3E0C1409 v_mac_f32_e32 v9, s10, v10 ; 3E12140A v_mac_f32_e32 v5, s11, v10 ; 3E0A140B v_subrev_f32_e32 v10, s0, v16 ; 0A142000 v_subrev_f32_e32 v12, s1, v17 ; 0A182201 v_subrev_f32_e32 v13, s2, v18 ; 0A1A2402 exp 15, 33, 0, 0, 0, v0, v4, v3, v1 ; F800021F 01030400 exp 15, 34, 0, 0, 0, v6, v9, v5, v1 ; F800022F 01050906 exp 15, 35, 0, 0, 0, v11, v10, v12, v13 ; F800023F 0D0C0A0B exp 15, 36, 0, 0, 0, v16, v17, v18, v19 ; F800024F 13121110 exp 15, 12, 0, 1, 0, v7, v8, v11, v2 ; F80008CF 020B0807 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 72 VGPRS: 24 Code Size: 748 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL IN[4], GENERIC[4], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SVIEW[0], CUBE, FLOAT DCL SVIEW[1], CUBE, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL SVIEW[4], 2D, FLOAT DCL CONST[0..5] DCL CONST[8..19] DCL CONST[21..22] DCL CONST[24..25] DCL TEMP[0..18], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 2.0000, 0.5000} IMM[1] FLT32 { 0.7500, 7.0000, 1.0000, 10.0000} IMM[2] FLT32 { 0.9680, 0.0300, 0.0001, -1.0000} 0: DP3 TEMP[0].x, IN[1].xyzz, IN[1].xyzz 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MUL TEMP[0].xyz, IN[1].xyzz, TEMP[0].xxxx 3: DP3 TEMP[1].x, IN[3].yzww, IN[3].yzww 4: RSQ TEMP[1].x, TEMP[1].xxxx 5: MUL TEMP[1].xyz, IN[3].yzww, TEMP[1].xxxx 6: MOV TEMP[2].xy, IN[0].xyyy 7: TEX TEMP[2].xyz, TEMP[2], SAMP[2], 2D 8: MUL TEMP[2].xyz, CONST[19].xyzz, TEMP[2].xyzz 9: LRP TEMP[3].xyz, CONST[21].xxxx, TEMP[2].xyzz, CONST[16].xyzz 10: MUL TEMP[4].x, CONST[21].xxxx, CONST[16].wwww 11: ADD TEMP[4].x, CONST[16].wwww, -TEMP[4].xxxx 12: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[4].xxxx 13: MOV TEMP[5].xy, IN[0].xyyy 14: TEX TEMP[5].y, TEMP[5], SAMP[3], 2D 15: ADD TEMP[6].x, IMM[0].xxxx, -CONST[24].xxxx 16: MAD TEMP[5].x, TEMP[5].yyyy, CONST[24].xxxx, TEMP[6].xxxx 17: DP3 TEMP[6].x, TEMP[0].xyzz, CONST[0].xyzz 18: MAX TEMP[6].x, IMM[0].yyyy, TEMP[6].xxxx 19: MOV TEMP[7].xyz, IMM[0].yyyy 20: MOV TEMP[8].w, IMM[0].xxxx 21: MOV TEMP[8].xyz, TEMP[0].xyzx 22: DP4 TEMP[9].x, CONST[1], TEMP[8] 23: DP4 TEMP[10].x, CONST[2], TEMP[8] 24: MOV TEMP[9].y, TEMP[10].xxxx 25: DP4 TEMP[8].x, CONST[3], TEMP[8] 26: MOV TEMP[9].z, TEMP[8].xxxx 27: ADD TEMP[8].xyz, IN[2].xyzz, TEMP[9].xyzz 28: MUL TEMP[8].xyz, TEMP[8].xyzz, TEMP[5].xxxx 29: DP3 TEMP[9].x, TEMP[0].xyzz, TEMP[1].xyzz 30: MUL TEMP[9].xyz, TEMP[9].xxxx, TEMP[0].xyzz 31: MUL TEMP[9].xyz, IMM[0].zzzz, TEMP[9].xyzz 32: ADD TEMP[9].xyz, TEMP[1].xyzz, -TEMP[9].xyzz 33: MOV TEMP[10].xyz, TEMP[9].xyzx 34: FSLT TEMP[11].x, IMM[0].yyyy, CONST[10].wwww 35: UIF TEMP[11].xxxx :0 36: DP3 TEMP[11].x, TEMP[9].xyzz, TEMP[9].xyzz 37: RSQ TEMP[11].x, TEMP[11].xxxx 38: MUL TEMP[11].xyz, TEMP[9].xyzz, TEMP[11].xxxx 39: MOV TEMP[12].xyz, -IN[4].xyzx 40: ADD TEMP[13].xyz, CONST[8].xyzz, TEMP[12].xyzz 41: RCP TEMP[14].x, TEMP[11].xxxx 42: RCP TEMP[14].y, TEMP[11].yyyy 43: RCP TEMP[14].z, TEMP[11].zzzz 44: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[14].xyzz 45: ADD TEMP[12].xyz, CONST[9].xyzz, TEMP[12].xyzz 46: RCP TEMP[14].x, TEMP[11].xxxx 47: RCP TEMP[14].y, TEMP[11].yyyy 48: RCP TEMP[14].z, TEMP[11].zzzz 49: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[14].xyzz 50: FSLT TEMP[14].xyz, IMM[0].yyyy, TEMP[11].xyzz 51: UIF TEMP[14].xxxx :0 52: MOV TEMP[15].x, TEMP[13].xxxx 53: ELSE :0 54: MOV TEMP[15].x, TEMP[12].xxxx 55: ENDIF 56: UIF TEMP[14].yyyy :0 57: MOV TEMP[16].x, TEMP[13].yyyy 58: ELSE :0 59: MOV TEMP[16].x, TEMP[12].yyyy 60: ENDIF 61: UIF TEMP[14].zzzz :0 62: MOV TEMP[13].x, TEMP[13].zzzz 63: ELSE :0 64: MOV TEMP[13].x, TEMP[12].zzzz 65: ENDIF 66: ADD TEMP[12].xyz, CONST[8].xyzz, CONST[9].xyzz 67: MUL TEMP[12].xyz, TEMP[12].xyzz, IMM[0].wwww 68: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[16].xxxx 69: MIN TEMP[13].x, TEMP[14].xxxx, TEMP[13].xxxx 70: ADD TEMP[14].xyz, TEMP[12].xyzz, -CONST[10].xyzz 71: ADD TEMP[14].xyz, TEMP[14].xyzz, IN[4].xyzz 72: MAD TEMP[11].xyz, TEMP[11].xyzz, TEMP[13].xxxx, TEMP[14].xyzz 73: ADD TEMP[10].xyz, TEMP[11].xyzz, -TEMP[12].xyzz 74: ENDIF 75: ADD TEMP[11].x, IMM[0].xxxx, -CONST[22].xxxx 76: POW TEMP[11].x, TEMP[11].xxxx, IMM[1].xxxx 77: MUL TEMP[11].x, TEMP[11].xxxx, IMM[1].yyyy 78: MOV TEMP[10].xyz, TEMP[10].xyzz 79: MOV TEMP[10].w, TEMP[11].xxxx 80: TXL TEMP[10], TEMP[10], SAMP[0], CUBE 81: POW TEMP[11].x, TEMP[10].wwww, CONST[11].yyyy 82: MUL TEMP[11].x, CONST[11].xxxx, TEMP[11].xxxx 83: MUL TEMP[10].xyz, TEMP[11].xxxx, TEMP[10].xyzz 84: FSLT TEMP[11].x, CONST[9].wwww, IMM[1].zzzz 85: UIF TEMP[11].xxxx :0 86: MOV TEMP[11].xyz, TEMP[9].xyzx 87: FSLT TEMP[12].x, IMM[0].yyyy, CONST[14].wwww 88: UIF TEMP[12].xxxx :0 89: DP3 TEMP[12].x, TEMP[9].xyzz, TEMP[9].xyzz 90: RSQ TEMP[12].x, TEMP[12].xxxx 91: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[12].xxxx 92: MOV TEMP[12].xyz, -IN[4].xyzx 93: ADD TEMP[13].xyz, CONST[12].xyzz, TEMP[12].xyzz 94: RCP TEMP[14].x, TEMP[9].xxxx 95: RCP TEMP[14].y, TEMP[9].yyyy 96: RCP TEMP[14].z, TEMP[9].zzzz 97: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[14].xyzz 98: ADD TEMP[12].xyz, CONST[13].xyzz, TEMP[12].xyzz 99: RCP TEMP[14].x, TEMP[9].xxxx 100: RCP TEMP[14].y, TEMP[9].yyyy 101: RCP TEMP[14].z, TEMP[9].zzzz 102: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[14].xyzz 103: FSLT TEMP[14].xyz, IMM[0].yyyy, TEMP[9].xyzz 104: UIF TEMP[14].xxxx :0 105: MOV TEMP[15].x, TEMP[13].xxxx 106: ELSE :0 107: MOV TEMP[15].x, TEMP[12].xxxx 108: ENDIF 109: UIF TEMP[14].yyyy :0 110: MOV TEMP[16].x, TEMP[13].yyyy 111: ELSE :0 112: MOV TEMP[16].x, TEMP[12].yyyy 113: ENDIF 114: UIF TEMP[14].zzzz :0 115: MOV TEMP[13].x, TEMP[13].zzzz 116: ELSE :0 117: MOV TEMP[13].x, TEMP[12].zzzz 118: ENDIF 119: ADD TEMP[12].xyz, CONST[12].xyzz, CONST[13].xyzz 120: MUL TEMP[12].xyz, TEMP[12].xyzz, IMM[0].wwww 121: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[16].xxxx 122: MIN TEMP[13].x, TEMP[14].xxxx, TEMP[13].xxxx 123: ADD TEMP[14].xyz, TEMP[12].xyzz, -CONST[14].xyzz 124: ADD TEMP[14].xyz, TEMP[14].xyzz, IN[4].xyzz 125: MAD TEMP[9].xyz, TEMP[9].xyzz, TEMP[13].xxxx, TEMP[14].xyzz 126: ADD TEMP[11].xyz, TEMP[9].xyzz, -TEMP[12].xyzz 127: ENDIF 128: ADD TEMP[9].x, IMM[0].xxxx, -CONST[22].xxxx 129: POW TEMP[9].x, TEMP[9].xxxx, IMM[1].xxxx 130: MUL TEMP[9].x, TEMP[9].xxxx, IMM[1].yyyy 131: MOV TEMP[11].xyz, TEMP[11].xyzz 132: MOV TEMP[11].w, TEMP[9].xxxx 133: TXL TEMP[9], TEMP[11], SAMP[1], CUBE 134: POW TEMP[11].x, TEMP[9].wwww, CONST[15].yyyy 135: MUL TEMP[11].x, CONST[15].xxxx, TEMP[11].xxxx 136: MUL TEMP[9].xyz, TEMP[11].xxxx, TEMP[9].xyzz 137: LRP TEMP[7].xyz, CONST[9].wwww, TEMP[10].xyzz, TEMP[9].xyzz 138: ELSE :0 139: MOV TEMP[7].xyz, TEMP[10].xyzx 140: ENDIF 141: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[5].xxxx 142: MOV TEMP[1].xyz, -TEMP[1].xyzx 143: ADD TEMP[5].x, IMM[0].xxxx, -CONST[22].xxxx 144: ADD TEMP[9].xyz, CONST[0].xyzz, TEMP[1].xyzz 145: DP3 TEMP[10].x, TEMP[9].xyzz, TEMP[9].xyzz 146: RSQ TEMP[10].x, TEMP[10].xxxx 147: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[10].xxxx 148: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[1].xyzz 149: MAX TEMP[1].x, IMM[0].yyyy, TEMP[1].xxxx 150: DP3 TEMP[10].x, CONST[0].xyzz, TEMP[9].xyzz 151: MAX TEMP[10].x, IMM[0].yyyy, TEMP[10].xxxx 152: MUL TEMP[11].x, TEMP[5].xxxx, TEMP[5].xxxx 153: MUL TEMP[11].x, TEMP[11].xxxx, CONST[18].wwww 154: ADD TEMP[12].x, IMM[0].xxxx, -TEMP[5].xxxx 155: MAD TEMP[12].x, TEMP[12].xxxx, IMM[2].xxxx, IMM[2].yyyy 156: LG2 TEMP[12].x, TEMP[12].xxxx 157: RCP TEMP[12].x, TEMP[12].xxxx 158: MUL TEMP[12].x, IMM[1].wwww, TEMP[12].xxxx 159: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[12].xxxx 160: ADD TEMP[13].x, IMM[0].xxxx, -TEMP[6].xxxx 161: ADD TEMP[14].x, IMM[0].xxxx, -TEMP[1].xxxx 162: MUL TEMP[15].x, IMM[0].zzzz, TEMP[10].xxxx 163: MUL TEMP[5].x, TEMP[10].xxxx, TEMP[5].xxxx 164: MAD TEMP[5].x, TEMP[15].xxxx, TEMP[5].xxxx, IMM[0].wwww 165: ADD TEMP[10].x, IMM[0].xxxx, -TEMP[10].xxxx 166: ADD TEMP[15].x, IMM[0].xxxx, -TEMP[1].xxxx 167: MOV TEMP[16].xy, IN[0].xyyy 168: TEX TEMP[16].xyz, TEMP[16], SAMP[4], 2D 169: ADD TEMP[4].x, IMM[0].xxxx, -TEMP[4].xxxx 170: ADD TEMP[4].x, CONST[22].xxxx, TEMP[4].xxxx 171: MOV_SAT TEMP[4].x, TEMP[4].xxxx 172: MUL TEMP[17].x, TEMP[15].xxxx, TEMP[15].xxxx 173: MUL TEMP[18].x, TEMP[15].xxxx, TEMP[15].xxxx 174: MUL TEMP[15].x, TEMP[18].xxxx, TEMP[15].xxxx 175: MUL TEMP[15].x, TEMP[17].xxxx, TEMP[15].xxxx 176: LRP TEMP[4].xyz, TEMP[15].xxxx, TEMP[4].xxxx, TEMP[3].xyzz 177: LRP TEMP[15].x, TEMP[6].xxxx, IMM[0].xxxx, TEMP[11].xxxx 178: LRP TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx, TEMP[11].xxxx 179: MAD TEMP[1].x, TEMP[15].xxxx, TEMP[1].xxxx, IMM[2].zzzz 180: RCP TEMP[1].x, TEMP[1].xxxx 181: DP3 TEMP[9].x, TEMP[0].xyzz, TEMP[9].xyzz 182: MAX TEMP[9].x, IMM[0].yyyy, TEMP[9].xxxx 183: POW TEMP[9].x, TEMP[9].xxxx, TEMP[12].xxxx 184: ADD TEMP[11].x, TEMP[12].xxxx, IMM[0].xxxx 185: MUL TEMP[11].x, TEMP[11].xxxx, CONST[18].yyyy 186: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[11].xxxx 187: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[9].xxxx 188: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[6].xxxx 189: MUL TEMP[1].x, TEMP[1].xxxx, CONST[18].xxxx 190: MAX TEMP[1].x, IMM[0].yyyy, TEMP[1].xxxx 191: MUL TEMP[1].xyz, TEMP[1].xxxx, CONST[17].xyzz 192: ADD TEMP[9].xyz, IMM[0].xxxx, -TEMP[3].xyzz 193: MUL TEMP[11].x, TEMP[10].xxxx, TEMP[10].xxxx 194: MUL TEMP[12].x, TEMP[10].xxxx, TEMP[10].xxxx 195: MUL TEMP[10].x, TEMP[12].xxxx, TEMP[10].xxxx 196: MUL TEMP[10].x, TEMP[11].xxxx, TEMP[10].xxxx 197: MAD TEMP[3].xyz, TEMP[9].xyzz, TEMP[10].xxxx, TEMP[3].xyzz 198: ADD TEMP[9].x, TEMP[5].xxxx, IMM[2].wwww 199: MUL TEMP[10].x, TEMP[13].xxxx, TEMP[13].xxxx 200: MUL TEMP[11].x, TEMP[13].xxxx, TEMP[13].xxxx 201: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[13].xxxx 202: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[11].xxxx 203: MAD TEMP[9].x, TEMP[9].xxxx, TEMP[10].xxxx, IMM[0].xxxx 204: ADD TEMP[5].x, TEMP[5].xxxx, IMM[2].wwww 205: MUL TEMP[10].x, TEMP[14].xxxx, TEMP[14].xxxx 206: MUL TEMP[11].x, TEMP[14].xxxx, TEMP[14].xxxx 207: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[14].xxxx 208: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[11].xxxx 209: MAD TEMP[5].x, TEMP[5].xxxx, TEMP[10].xxxx, IMM[0].xxxx 210: MUL TEMP[5].x, TEMP[9].xxxx, TEMP[5].xxxx 211: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[6].xxxx 212: MAD TEMP[5].xyz, CONST[17].xyzz, TEMP[5].xxxx, TEMP[8].xyzz 213: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[5].xyzz 214: MAD TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xyzz, TEMP[2].xyzz 215: MAD TEMP[1].xyz, TEMP[7].xyzz, TEMP[4].xyzz, TEMP[1].xyzz 216: MAD TEMP[0].xyz, TEMP[16].xyzz, CONST[25].xyzz, TEMP[1].xyzz 217: MAD TEMP[1].x, IN[3].xxxx, CONST[5].zzzz, CONST[5].wwww 218: MOV_SAT TEMP[1].x, TEMP[1].xxxx 219: LRP TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[4].xyzz 220: MOV TEMP[0].xyz, TEMP[0].xyzx 221: MOV TEMP[0].w, IMM[0].xxxx 222: MOV OUT[0], TEMP[0] 223: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 172) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200) %57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208) %58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212) %59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216) %60 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224) %61 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228) %62 = call float @llvm.SI.load.const(<16 x i8> %23, i32 232) %63 = call float @llvm.SI.load.const(<16 x i8> %23, i32 236) %64 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240) %65 = call float @llvm.SI.load.const(<16 x i8> %23, i32 244) %66 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256) %67 = call float @llvm.SI.load.const(<16 x i8> %23, i32 260) %68 = call float @llvm.SI.load.const(<16 x i8> %23, i32 264) %69 = call float @llvm.SI.load.const(<16 x i8> %23, i32 268) %70 = call float @llvm.SI.load.const(<16 x i8> %23, i32 272) %71 = call float @llvm.SI.load.const(<16 x i8> %23, i32 276) %72 = call float @llvm.SI.load.const(<16 x i8> %23, i32 280) %73 = call float @llvm.SI.load.const(<16 x i8> %23, i32 288) %74 = call float @llvm.SI.load.const(<16 x i8> %23, i32 292) %75 = call float @llvm.SI.load.const(<16 x i8> %23, i32 300) %76 = call float @llvm.SI.load.const(<16 x i8> %23, i32 304) %77 = call float @llvm.SI.load.const(<16 x i8> %23, i32 308) %78 = call float @llvm.SI.load.const(<16 x i8> %23, i32 312) %79 = call float @llvm.SI.load.const(<16 x i8> %23, i32 336) %80 = call float @llvm.SI.load.const(<16 x i8> %23, i32 352) %81 = call float @llvm.SI.load.const(<16 x i8> %23, i32 384) %82 = call float @llvm.SI.load.const(<16 x i8> %23, i32 400) %83 = call float @llvm.SI.load.const(<16 x i8> %23, i32 404) %84 = call float @llvm.SI.load.const(<16 x i8> %23, i32 408) %85 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %86 = load <32 x i8>, <32 x i8> addrspace(2)* %85, align 32, !tbaa !0 %87 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %88 = load <16 x i8>, <16 x i8> addrspace(2)* %87, align 16, !tbaa !0 %89 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %90 = bitcast <8 x i32> addrspace(2)* %89 to <32 x i8> addrspace(2)* %91 = load <32 x i8>, <32 x i8> addrspace(2)* %90, align 32, !tbaa !0 %92 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %93 = bitcast <4 x i32> addrspace(2)* %92 to <16 x i8> addrspace(2)* %94 = load <16 x i8>, <16 x i8> addrspace(2)* %93, align 16, !tbaa !0 %95 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %96 = bitcast <8 x i32> addrspace(2)* %95 to <32 x i8> addrspace(2)* %97 = load <32 x i8>, <32 x i8> addrspace(2)* %96, align 32, !tbaa !0 %98 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %99 = bitcast <4 x i32> addrspace(2)* %98 to <16 x i8> addrspace(2)* %100 = load <16 x i8>, <16 x i8> addrspace(2)* %99, align 16, !tbaa !0 %101 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %102 = bitcast <8 x i32> addrspace(2)* %101 to <32 x i8> addrspace(2)* %103 = load <32 x i8>, <32 x i8> addrspace(2)* %102, align 32, !tbaa !0 %104 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %105 = bitcast <4 x i32> addrspace(2)* %104 to <16 x i8> addrspace(2)* %106 = load <16 x i8>, <16 x i8> addrspace(2)* %105, align 16, !tbaa !0 %107 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %108 = bitcast <8 x i32> addrspace(2)* %107 to <32 x i8> addrspace(2)* %109 = load <32 x i8>, <32 x i8> addrspace(2)* %108, align 32, !tbaa !0 %110 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %111 = bitcast <4 x i32> addrspace(2)* %110 to <16 x i8> addrspace(2)* %112 = load <16 x i8>, <16 x i8> addrspace(2)* %111, align 16, !tbaa !0 %113 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %114 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %115 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %116 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %117 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %118 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %119 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %120 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %121 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %122 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %123 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %124 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7) %125 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %126 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %127 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %128 = fmul float %115, %115 %129 = fmul float %116, %116 %130 = fadd float %129, %128 %131 = fmul float %117, %117 %132 = fadd float %130, %131 %133 = call float @llvm.AMDGPU.rsq.clamped.f32(float %132) %134 = fmul float %115, %133 %135 = fmul float %116, %133 %136 = fmul float %117, %133 %137 = fmul float %122, %122 %138 = fmul float %123, %123 %139 = fadd float %138, %137 %140 = fmul float %124, %124 %141 = fadd float %139, %140 %142 = call float @llvm.AMDGPU.rsq.clamped.f32(float %141) %143 = fmul float %122, %142 %144 = fmul float %123, %142 %145 = fmul float %124, %142 %146 = bitcast float %113 to i32 %147 = bitcast float %114 to i32 %148 = insertelement <2 x i32> undef, i32 %146, i32 0 %149 = insertelement <2 x i32> %148, i32 %147, i32 1 %150 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %149, <32 x i8> %97, <16 x i8> %100, i32 2) %151 = extractelement <4 x float> %150, i32 0 %152 = extractelement <4 x float> %150, i32 1 %153 = extractelement <4 x float> %150, i32 2 %154 = fmul float %76, %151 %155 = fmul float %77, %152 %156 = fmul float %78, %153 %157 = call float @llvm.AMDGPU.lrp(float %79, float %154, float %66) %158 = call float @llvm.AMDGPU.lrp(float %79, float %155, float %67) %159 = call float @llvm.AMDGPU.lrp(float %79, float %156, float %68) %160 = fmul float %79, %69 %161 = fsub float %69, %160 %162 = fmul float %154, %161 %163 = fmul float %155, %161 %164 = fmul float %156, %161 %165 = bitcast float %113 to i32 %166 = bitcast float %114 to i32 %167 = insertelement <2 x i32> undef, i32 %165, i32 0 %168 = insertelement <2 x i32> %167, i32 %166, i32 1 %169 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %168, <32 x i8> %103, <16 x i8> %106, i32 2) %170 = extractelement <4 x float> %169, i32 1 %171 = fsub float 1.000000e+00, %81 %172 = fmul float %170, %81 %173 = fadd float %172, %171 %174 = fmul float %134, %24 %175 = fmul float %135, %25 %176 = fadd float %175, %174 %177 = fmul float %136, %26 %178 = fadd float %176, %177 %179 = call float @llvm.maxnum.f32(float %178, float 0.000000e+00) %180 = fmul float %27, %134 %181 = fmul float %28, %135 %182 = fadd float %180, %181 %183 = fmul float %29, %136 %184 = fadd float %182, %183 %185 = fadd float %184, %30 %186 = fmul float %31, %134 %187 = fmul float %32, %135 %188 = fadd float %186, %187 %189 = fmul float %33, %136 %190 = fadd float %188, %189 %191 = fadd float %190, %34 %192 = fmul float %35, %134 %193 = fmul float %36, %135 %194 = fadd float %192, %193 %195 = fmul float %37, %136 %196 = fadd float %194, %195 %197 = fadd float %196, %38 %198 = fadd float %118, %185 %199 = fadd float %119, %191 %200 = fadd float %120, %197 %201 = fmul float %198, %173 %202 = fmul float %199, %173 %203 = fmul float %200, %173 %204 = fmul float %134, %143 %205 = fmul float %135, %144 %206 = fadd float %205, %204 %207 = fmul float %136, %145 %208 = fadd float %206, %207 %209 = fmul float %208, %134 %210 = fmul float %208, %135 %211 = fmul float %208, %136 %212 = fmul float %209, 2.000000e+00 %213 = fmul float %210, 2.000000e+00 %214 = fmul float %211, 2.000000e+00 %215 = fsub float %143, %212 %216 = fsub float %144, %213 %217 = fsub float %145, %214 %218 = fcmp ogt float %51, 0.000000e+00 br i1 %218, label %IF, label %ENDIF IF: ; preds = %main_body %219 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %220 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %221 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %222 = fmul float %215, %215 %223 = fmul float %216, %216 %224 = fadd float %223, %222 %225 = fmul float %217, %217 %226 = fadd float %224, %225 %227 = call float @llvm.AMDGPU.rsq.clamped.f32(float %226) %228 = fmul float %215, %227 %229 = fmul float %216, %227 %230 = fmul float %217, %227 %231 = fsub float %44, %125 %232 = fsub float %45, %126 %233 = fsub float %46, %127 %234 = fdiv float 1.000000e+00, %228 %235 = fdiv float 1.000000e+00, %229 %236 = fdiv float 1.000000e+00, %230 %237 = fmul float %231, %234 %238 = fmul float %232, %235 %239 = fmul float %233, %236 %240 = fsub float %47, %125 %241 = fsub float %48, %126 %242 = fsub float %49, %127 %243 = fdiv float 1.000000e+00, %228 %244 = fdiv float 1.000000e+00, %229 %245 = fdiv float 1.000000e+00, %230 %246 = fmul float %240, %243 %247 = fmul float %241, %244 %248 = fmul float %242, %245 %249 = fcmp ogt float %228, 0.000000e+00 %250 = fcmp ogt float %229, 0.000000e+00 %251 = fcmp ogt float %230, 0.000000e+00 %. = select i1 %249, float %237, float %246 %temp64.0 = select i1 %250, float %238, float %247 %.100 = select i1 %251, float %239, float %248 %252 = fadd float %44, %47 %253 = fadd float %45, %48 %254 = fadd float %46, %49 %255 = fmul float %252, 5.000000e-01 %256 = fmul float %253, 5.000000e-01 %257 = fmul float %254, 5.000000e-01 %258 = call float @llvm.minnum.f32(float %., float %temp64.0) %259 = call float @llvm.minnum.f32(float %258, float %.100) %260 = fsub float %255, %221 %261 = fsub float %256, %220 %262 = fsub float %257, %219 %263 = fadd float %260, %125 %264 = fadd float %261, %126 %265 = fadd float %262, %127 %266 = fmul float %228, %259 %267 = fadd float %266, %263 %268 = fmul float %229, %259 %269 = fadd float %268, %264 %270 = fmul float %230, %259 %271 = fadd float %270, %265 %272 = fsub float %267, %255 %273 = fsub float %269, %256 %274 = fsub float %271, %257 br label %ENDIF ENDIF: ; preds = %main_body, %IF %temp40.0 = phi float [ %272, %IF ], [ %215, %main_body ] %temp41.0 = phi float [ %273, %IF ], [ %216, %main_body ] %temp42.0 = phi float [ %274, %IF ], [ %217, %main_body ] %275 = fsub float 1.000000e+00, %80 %276 = call float @llvm.pow.f32(float %275, float 7.500000e-01) %277 = fmul float %276, 7.000000e+00 %278 = insertelement <4 x float> undef, float %temp40.0, i32 0 %279 = insertelement <4 x float> %278, float %temp41.0, i32 1 %280 = insertelement <4 x float> %279, float %temp42.0, i32 2 %281 = insertelement <4 x float> %280, float %277, i32 3 %282 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %281) %283 = extractelement <4 x float> %282, i32 0 %284 = extractelement <4 x float> %282, i32 1 %285 = extractelement <4 x float> %282, i32 2 %286 = extractelement <4 x float> %282, i32 3 %287 = call float @llvm.fabs.f32(float %285) %288 = fdiv float 1.000000e+00, %287 %289 = fmul float %283, %288 %290 = fadd float %289, 1.500000e+00 %291 = fmul float %284, %288 %292 = fadd float %291, 1.500000e+00 %293 = bitcast float %292 to i32 %294 = bitcast float %290 to i32 %295 = bitcast float %286 to i32 %296 = bitcast float %277 to i32 %297 = insertelement <4 x i32> undef, i32 %293, i32 0 %298 = insertelement <4 x i32> %297, i32 %294, i32 1 %299 = insertelement <4 x i32> %298, i32 %295, i32 2 %300 = insertelement <4 x i32> %299, i32 %296, i32 3 %301 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %300, <32 x i8> %86, <16 x i8> %88, i32 4) %302 = extractelement <4 x float> %301, i32 0 %303 = extractelement <4 x float> %301, i32 1 %304 = extractelement <4 x float> %301, i32 2 %305 = extractelement <4 x float> %301, i32 3 %306 = call float @llvm.pow.f32(float %305, float %53) %307 = fmul float %52, %306 %308 = fmul float %307, %302 %309 = fmul float %307, %303 %310 = fmul float %307, %304 %311 = fcmp olt float %50, 0x3FEFFFEB00000000 br i1 %311, label %IF86, label %ENDIF85 IF86: ; preds = %ENDIF %312 = fcmp ogt float %63, 0.000000e+00 br i1 %312, label %IF89, label %ENDIF88 ENDIF85: ; preds = %ENDIF, %ENDIF88 %temp28.0 = phi float [ %554, %ENDIF88 ], [ %308, %ENDIF ] %temp29.0 = phi float [ %555, %ENDIF88 ], [ %309, %ENDIF ] %temp30.0 = phi float [ %556, %ENDIF88 ], [ %310, %ENDIF ] %313 = fmul float %temp28.0, %173 %314 = fmul float %temp29.0, %173 %315 = fmul float %temp30.0, %173 %316 = fsub float 1.000000e+00, %80 %317 = fsub float %24, %143 %318 = fsub float %25, %144 %319 = fsub float %26, %145 %320 = fmul float %317, %317 %321 = fmul float %318, %318 %322 = fadd float %321, %320 %323 = fmul float %319, %319 %324 = fadd float %322, %323 %325 = call float @llvm.AMDGPU.rsq.clamped.f32(float %324) %326 = fmul float %317, %325 %327 = fmul float %318, %325 %328 = fmul float %319, %325 %329 = fmul float %143, %134 %330 = fsub float -0.000000e+00, %329 %331 = fmul float %144, %135 %332 = fsub float %330, %331 %333 = fmul float %145, %136 %334 = fsub float %332, %333 %335 = call float @llvm.maxnum.f32(float %334, float 0.000000e+00) %336 = fmul float %24, %326 %337 = fmul float %25, %327 %338 = fadd float %337, %336 %339 = fmul float %26, %328 %340 = fadd float %338, %339 %341 = call float @llvm.maxnum.f32(float %340, float 0.000000e+00) %342 = fmul float %316, %316 %343 = fmul float %342, %75 %344 = fsub float 1.000000e+00, %316 %345 = fmul float %344, 0x3FEEF9DB20000000 %346 = fadd float %345, 0x3F9EB851E0000000 %347 = call float @llvm.log2.f32(float %346) %348 = fdiv float 1.000000e+00, %347 %349 = fmul float %348, 1.000000e+01 %350 = fmul float %349, %349 %351 = fsub float 1.000000e+00, %179 %352 = fsub float 1.000000e+00, %335 %353 = fmul float %341, 2.000000e+00 %354 = fmul float %341, %316 %355 = fmul float %353, %354 %356 = fadd float %355, 5.000000e-01 %357 = fsub float 1.000000e+00, %341 %358 = fsub float 1.000000e+00, %335 %359 = bitcast float %113 to i32 %360 = bitcast float %114 to i32 %361 = insertelement <2 x i32> undef, i32 %359, i32 0 %362 = insertelement <2 x i32> %361, i32 %360, i32 1 %363 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %362, <32 x i8> %109, <16 x i8> %112, i32 2) %364 = extractelement <4 x float> %363, i32 0 %365 = extractelement <4 x float> %363, i32 1 %366 = extractelement <4 x float> %363, i32 2 %367 = fsub float 1.000000e+00, %161 %368 = fadd float %80, %367 %369 = call float @llvm.AMDIL.clamp.(float %368, float 0.000000e+00, float 1.000000e+00) %370 = fmul float %358, %358 %371 = fmul float %358, %358 %372 = fmul float %371, %358 %373 = fmul float %370, %372 %374 = call float @llvm.AMDGPU.lrp(float %373, float %369, float %157) %375 = call float @llvm.AMDGPU.lrp(float %373, float %369, float %158) %376 = call float @llvm.AMDGPU.lrp(float %373, float %369, float %159) %377 = call float @llvm.AMDGPU.lrp(float %179, float 1.000000e+00, float %343) %378 = call float @llvm.AMDGPU.lrp(float %335, float 1.000000e+00, float %343) %379 = fmul float %377, %378 %380 = fadd float %379, 0x3F1A36E2E0000000 %381 = fdiv float 1.000000e+00, %380 %382 = fmul float %134, %326 %383 = fmul float %135, %327 %384 = fadd float %383, %382 %385 = fmul float %136, %328 %386 = fadd float %384, %385 %387 = call float @llvm.maxnum.f32(float %386, float 0.000000e+00) %388 = call float @llvm.pow.f32(float %387, float %350) %389 = fadd float %350, 1.000000e+00 %390 = fmul float %389, %74 %391 = fmul float %388, %390 %392 = fmul float %381, %391 %393 = fmul float %392, %179 %394 = fmul float %393, %73 %395 = call float @llvm.maxnum.f32(float %394, float 0.000000e+00) %396 = fmul float %395, %70 %397 = fmul float %395, %71 %398 = fmul float %395, %72 %399 = fsub float 1.000000e+00, %157 %400 = fsub float 1.000000e+00, %158 %401 = fsub float 1.000000e+00, %159 %402 = fmul float %357, %357 %403 = fmul float %357, %357 %404 = fmul float %403, %357 %405 = fmul float %402, %404 %406 = fmul float %399, %405 %407 = fadd float %406, %157 %408 = fmul float %400, %405 %409 = fadd float %408, %158 %410 = fmul float %401, %405 %411 = fadd float %410, %159 %412 = fadd float %356, -1.000000e+00 %413 = fmul float %351, %351 %414 = fmul float %351, %351 %415 = fmul float %414, %351 %416 = fmul float %413, %415 %417 = fmul float %412, %416 %418 = fadd float %417, 1.000000e+00 %419 = fadd float %356, -1.000000e+00 %420 = fmul float %352, %352 %421 = fmul float %352, %352 %422 = fmul float %421, %352 %423 = fmul float %420, %422 %424 = fmul float %419, %423 %425 = fadd float %424, 1.000000e+00 %426 = fmul float %418, %425 %427 = fmul float %426, %179 %428 = fmul float %70, %427 %429 = fadd float %428, %201 %430 = fmul float %71, %427 %431 = fadd float %430, %202 %432 = fmul float %72, %427 %433 = fadd float %432, %203 %434 = fmul float %162, %429 %435 = fmul float %163, %431 %436 = fmul float %164, %433 %437 = fmul float %396, %407 %438 = fadd float %437, %434 %439 = fmul float %397, %409 %440 = fadd float %439, %435 %441 = fmul float %398, %411 %442 = fadd float %441, %436 %443 = fmul float %313, %374 %444 = fadd float %443, %438 %445 = fmul float %314, %375 %446 = fadd float %445, %440 %447 = fmul float %315, %376 %448 = fadd float %447, %442 %449 = fmul float %364, %82 %450 = fadd float %449, %444 %451 = fmul float %365, %83 %452 = fadd float %451, %446 %453 = fmul float %366, %84 %454 = fadd float %453, %448 %455 = fmul float %121, %42 %456 = fadd float %455, %43 %457 = call float @llvm.AMDIL.clamp.(float %456, float 0.000000e+00, float 1.000000e+00) %458 = call float @llvm.AMDGPU.lrp(float %457, float %450, float %39) %459 = call float @llvm.AMDGPU.lrp(float %457, float %452, float %40) %460 = call float @llvm.AMDGPU.lrp(float %457, float %454, float %41) %461 = call i32 @llvm.SI.packf16(float %458, float %459) %462 = bitcast i32 %461 to float %463 = call i32 @llvm.SI.packf16(float %460, float 1.000000e+00) %464 = bitcast i32 %463 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %462, float %464, float %462, float %464) ret void IF89: ; preds = %IF86 %465 = fmul float %215, %215 %466 = fmul float %216, %216 %467 = fadd float %466, %465 %468 = fmul float %217, %217 %469 = fadd float %467, %468 %470 = call float @llvm.AMDGPU.rsq.clamped.f32(float %469) %471 = fmul float %215, %470 %472 = fmul float %216, %470 %473 = fmul float %217, %470 %474 = fsub float %54, %125 %475 = fsub float %55, %126 %476 = fsub float %56, %127 %477 = fdiv float 1.000000e+00, %471 %478 = fdiv float 1.000000e+00, %472 %479 = fdiv float 1.000000e+00, %473 %480 = fmul float %474, %477 %481 = fmul float %475, %478 %482 = fmul float %476, %479 %483 = fsub float %57, %125 %484 = fsub float %58, %126 %485 = fsub float %59, %127 %486 = fdiv float 1.000000e+00, %471 %487 = fdiv float 1.000000e+00, %472 %488 = fdiv float 1.000000e+00, %473 %489 = fmul float %483, %486 %490 = fmul float %484, %487 %491 = fmul float %485, %488 %492 = fcmp ogt float %471, 0.000000e+00 %493 = fcmp ogt float %472, 0.000000e+00 %494 = fcmp ogt float %473, 0.000000e+00 %.101 = select i1 %492, float %480, float %489 %temp64.1 = select i1 %493, float %481, float %490 %.102 = select i1 %494, float %482, float %491 %495 = fadd float %54, %57 %496 = fadd float %55, %58 %497 = fadd float %56, %59 %498 = fmul float %495, 5.000000e-01 %499 = fmul float %496, 5.000000e-01 %500 = fmul float %497, 5.000000e-01 %501 = call float @llvm.minnum.f32(float %.101, float %temp64.1) %502 = call float @llvm.minnum.f32(float %501, float %.102) %503 = fsub float %498, %60 %504 = fsub float %499, %61 %505 = fsub float %500, %62 %506 = fadd float %503, %125 %507 = fadd float %504, %126 %508 = fadd float %505, %127 %509 = fmul float %471, %502 %510 = fadd float %509, %506 %511 = fmul float %472, %502 %512 = fadd float %511, %507 %513 = fmul float %473, %502 %514 = fadd float %513, %508 %515 = fsub float %510, %498 %516 = fsub float %512, %499 %517 = fsub float %514, %500 br label %ENDIF88 ENDIF88: ; preds = %IF86, %IF89 %temp44.0 = phi float [ %515, %IF89 ], [ %215, %IF86 ] %temp45.0 = phi float [ %516, %IF89 ], [ %216, %IF86 ] %temp46.0 = phi float [ %517, %IF89 ], [ %217, %IF86 ] %518 = fsub float 1.000000e+00, %80 %519 = call float @llvm.pow.f32(float %518, float 7.500000e-01) %520 = fmul float %519, 7.000000e+00 %521 = insertelement <4 x float> undef, float %temp44.0, i32 0 %522 = insertelement <4 x float> %521, float %temp45.0, i32 1 %523 = insertelement <4 x float> %522, float %temp46.0, i32 2 %524 = insertelement <4 x float> %523, float %520, i32 3 %525 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %524) %526 = extractelement <4 x float> %525, i32 0 %527 = extractelement <4 x float> %525, i32 1 %528 = extractelement <4 x float> %525, i32 2 %529 = extractelement <4 x float> %525, i32 3 %530 = call float @llvm.fabs.f32(float %528) %531 = fdiv float 1.000000e+00, %530 %532 = fmul float %526, %531 %533 = fadd float %532, 1.500000e+00 %534 = fmul float %527, %531 %535 = fadd float %534, 1.500000e+00 %536 = bitcast float %535 to i32 %537 = bitcast float %533 to i32 %538 = bitcast float %529 to i32 %539 = bitcast float %520 to i32 %540 = insertelement <4 x i32> undef, i32 %536, i32 0 %541 = insertelement <4 x i32> %540, i32 %537, i32 1 %542 = insertelement <4 x i32> %541, i32 %538, i32 2 %543 = insertelement <4 x i32> %542, i32 %539, i32 3 %544 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %543, <32 x i8> %91, <16 x i8> %94, i32 4) %545 = extractelement <4 x float> %544, i32 0 %546 = extractelement <4 x float> %544, i32 1 %547 = extractelement <4 x float> %544, i32 2 %548 = extractelement <4 x float> %544, i32 3 %549 = call float @llvm.pow.f32(float %548, float %65) %550 = fmul float %64, %549 %551 = fmul float %550, %545 %552 = fmul float %550, %546 %553 = fmul float %550, %547 %554 = call float @llvm.AMDGPU.lrp(float %50, float %308, float %551) %555 = call float @llvm.AMDGPU.lrp(float %50, float %309, float %552) %556 = call float @llvm.AMDGPU.lrp(float %50, float %310, float %553) br label %ENDIF85 } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400 v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401 v_interp_p1_f32 v5, v0, 1, 1, [m0] ; C8140500 v_interp_p2_f32 v5, [v5], v1, 1, 1, [m0] ; C8150501 v_interp_p1_f32 v9, v0, 2, 1, [m0] ; C8240600 v_interp_p2_f32 v9, [v9], v1, 2, 1, [m0] ; C8250601 v_interp_p1_f32 v11, v0, 0, 2, [m0] ; C82C0800 v_interp_p2_f32 v11, [v11], v1, 0, 2, [m0] ; C82D0801 v_interp_p1_f32 v12, v0, 1, 2, [m0] ; C8300900 v_interp_p2_f32 v12, [v12], v1, 1, 2, [m0] ; C8310901 v_interp_p1_f32 v13, v0, 2, 2, [m0] ; C8340A00 v_interp_p2_f32 v13, [v13], v1, 2, 2, [m0] ; C8350A01 v_interp_p1_f32 v8, v0, 0, 3, [m0] ; C8200C00 v_interp_p2_f32 v8, [v8], v1, 0, 3, [m0] ; C8210C01 v_interp_p1_f32 v15, v0, 1, 3, [m0] ; C83C0D00 v_interp_p2_f32 v15, [v15], v1, 1, 3, [m0] ; C83D0D01 v_interp_p1_f32 v16, v0, 2, 3, [m0] ; C8400E00 v_interp_p2_f32 v16, [v16], v1, 2, 3, [m0] ; C8410E01 v_interp_p1_f32 v17, v0, 3, 3, [m0] ; C8440F00 v_interp_p2_f32 v17, [v17], v1, 3, 3, [m0] ; C8450F01 v_mul_f32_e32 v6, v4, v4 ; 100C0904 v_mac_f32_e32 v6, v5, v5 ; 3E0C0B05 v_mac_f32_e32 v6, v9, v9 ; 3E0C1309 v_rsq_clamp_f32_e32 v10, v6 ; 7E145906 v_mul_f32_e32 v6, v15, v15 ; 100C1F0F v_mac_f32_e32 v6, v16, v16 ; 3E0C2110 v_mac_f32_e32 v6, v17, v17 ; 3E0C2311 v_rsq_clamp_f32_e32 v18, v6 ; 7E245906 v_mul_f32_e32 v7, v10, v4 ; 100E090A v_mul_f32_e32 v6, v10, v5 ; 100C0B0A v_mul_f32_e32 v5, v10, v9 ; 100A130A v_mul_f32_e32 v10, v18, v15 ; 10141F12 v_mul_f32_e32 v9, v18, v16 ; 10122112 v_mul_f32_e32 v4, v10, v7 ; 10080F0A v_mac_f32_e32 v4, v9, v6 ; 3E080D09 v_mul_f32_e32 v14, v18, v17 ; 101C2312 v_mac_f32_e32 v4, v14, v5 ; 3E080B0E v_mul_f32_e32 v19, v7, v4 ; 10260907 v_mac_f32_e32 v19, v7, v4 ; 3E260907 v_mul_f32_e32 v20, v6, v4 ; 10280906 v_mac_f32_e32 v20, v6, v4 ; 3E280906 v_mad_f32 v22, v15, v18, -v19 ; D2820016 844E250F v_mad_f32 v23, v16, v18, -v20 ; D2820017 84522510 v_mul_f32_e32 v15, v5, v4 ; 101E0905 v_mac_f32_e32 v15, v5, v4 ; 3E1E0905 s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300 v_mad_f32 v24, v17, v18, -v15 ; D2820018 843E2511 v_interp_p1_f32 v26, v0, 0, 4, [m0] ; C8681000 v_interp_p2_f32 v26, [v26], v1, 0, 4, [m0] ; C8691001 s_load_dwordx4 s[0:3], s[4:5], 0x8 ; C0800508 s_load_dwordx8 s[12:19], s[6:7], 0x10 ; C0C60710 v_interp_p1_f32 v25, v0, 1, 4, [m0] ; C8641100 v_interp_p2_f32 v25, [v25], v1, 1, 4, [m0] ; C8651101 v_interp_p1_f32 v27, v0, 2, 4, [m0] ; C86C1200 v_interp_p2_f32 v27, [v27], v1, 2, 4, [m0] ; C86D1201 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s20, s[8:11], 0x4c ; C20A094C s_buffer_load_dword s21, s[8:11], 0x4d ; C20A894D s_buffer_load_dword s22, s[8:11], 0x4e ; C20B094E s_load_dwordx4 s[24:27], s[4:5], 0xc ; C08C050C s_load_dwordx8 s[32:39], s[6:7], 0x18 ; C0D00718 image_sample v[15:17], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800700 00030F02 s_buffer_load_dword s1, s[8:11], 0x41 ; C2008941 s_buffer_load_dword s2, s[8:11], 0x42 ; C2010942 s_buffer_load_dword s28, s[8:11], 0x54 ; C20E0954 s_buffer_load_dword s3, s[8:11], 0x40 ; C2018940 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v19, s20, v15 ; 10261E14 v_mul_f32_e32 v20, s21, v16 ; 10282015 v_mul_f32_e32 v21, s22, v17 ; 102A2216 s_buffer_load_dword s12, s[8:11], 0x27 ; C2060927 s_buffer_load_dword s13, s[8:11], 0x2b ; C206892B s_buffer_load_dword s29, s[8:11], 0x2c ; C20E892C s_buffer_load_dword s30, s[8:11], 0x2d ; C20F092D s_buffer_load_dword s0, s[8:11], 0x58 ; C2000958 v_sub_f32_e64 v0, 1.0, s28 ; D2080000 000038F2 v_mul_f32_e32 v4, s3, v0 ; 10080003 v_mul_f32_e32 v1, s1, v0 ; 10020001 v_mul_f32_e32 v0, s2, v0 ; 10000002 v_mac_f32_e32 v4, s28, v19 ; 3E08261C v_mov_b32_e32 v28, v22 ; 7E380316 v_mac_f32_e32 v1, s28, v20 ; 3E02281C v_mov_b32_e32 v29, v23 ; 7E3A0317 v_mac_f32_e32 v0, s28, v21 ; 3E002A1C v_mov_b32_e32 v30, v24 ; 7E3C0318 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_lt_f32_e64 s[2:3], 0, s13 ; D0020002 00001A80 image_sample v[15:18], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[32:39], s[24:27] ; F0800F00 00C80F02 s_waitcnt vmcnt(0) ; BF8C0770 s_and_saveexec_b64 s[20:21], s[2:3] ; BE942402 s_xor_b64 s[20:21], exec, s[20:21] ; 8994147E s_cbranch_execz BB0_2 ; BF880000 s_buffer_load_dword s1, s[8:11], 0x20 ; C2008920 s_buffer_load_dword s2, s[8:11], 0x21 ; C2010921 s_buffer_load_dword s3, s[8:11], 0x22 ; C2018922 s_buffer_load_dword s13, s[8:11], 0x24 ; C2068924 s_buffer_load_dword s14, s[8:11], 0x25 ; C2070925 v_mul_f32_e32 v15, v22, v22 ; 101E2D16 v_mac_f32_e32 v15, v23, v23 ; 3E1E2F17 v_mac_f32_e32 v15, v24, v24 ; 3E1E3118 v_rsq_clamp_f32_e32 v15, v15 ; 7E1E590F s_buffer_load_dword s15, s[8:11], 0x26 ; C2078926 s_buffer_load_dword s16, s[8:11], 0x28 ; C2080928 s_buffer_load_dword s17, s[8:11], 0x29 ; C2088929 s_buffer_load_dword s18, s[8:11], 0x2a ; C209092A v_mul_f32_e32 v17, v15, v22 ; 10222D0F v_mul_f32_e32 v18, v15, v23 ; 10242F0F v_mul_f32_e32 v15, v15, v24 ; 101E310F v_rcp_f32_e32 v28, v17 ; 7E385511 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v29, s1, v26 ; 083A3401 v_sub_f32_e32 v30, s2, v25 ; 083C3202 v_rcp_f32_e32 v31, v18 ; 7E3E5512 v_mul_f32_e32 v29, v28, v29 ; 103A3B1C v_sub_f32_e32 v32, s13, v26 ; 0840340D v_mul_f32_e32 v28, v28, v32 ; 1038411C v_cmp_lt_f32_e32 vcc, 0, v17 ; 7C022280 v_cndmask_b32_e32 v28, v28, v29 ; 00383B1C v_rcp_f32_e32 v29, v15 ; 7E3A550F v_mul_f32_e32 v30, v31, v30 ; 103C3D1F v_sub_f32_e32 v32, s14, v25 ; 0840320E v_mul_f32_e32 v31, v31, v32 ; 103E411F v_cmp_lt_f32_e32 vcc, 0, v18 ; 7C022480 v_cndmask_b32_e32 v30, v31, v30 ; 003C3D1F v_sub_f32_e32 v31, s3, v27 ; 083E3603 v_mul_f32_e32 v31, v29, v31 ; 103E3F1D v_sub_f32_e32 v32, s15, v27 ; 0840360F v_mul_f32_e32 v29, v29, v32 ; 103A411D v_cmp_lt_f32_e32 vcc, 0, v15 ; 7C021E80 v_cndmask_b32_e32 v29, v29, v31 ; 003A3F1D v_min3_f32 v28, v28, v30, v29 ; D2A2001C 04763D1C v_mov_b32_e32 v29, s13 ; 7E3A020D v_add_f32_e32 v29, s1, v29 ; 063A3A01 v_mov_b32_e32 v30, s14 ; 7E3C020E v_add_f32_e32 v30, s2, v30 ; 063C3C02 v_mov_b32_e32 v31, s15 ; 7E3E020F v_add_f32_e32 v31, s3, v31 ; 063E3E03 v_mad_f32 v32, 0.5, v29, -s16 ; D2820020 80423AF0 v_add_f32_e32 v32, v26, v32 ; 0640411A v_mac_f32_e32 v32, v28, v17 ; 3E40231C v_mad_f32 v17, 0.5, v30, -s17 ; D2820011 80463CF0 v_add_f32_e32 v17, v25, v17 ; 06222319 v_mac_f32_e32 v17, v28, v18 ; 3E22251C v_mad_f32 v18, 0.5, v31, -s18 ; D2820012 804A3EF0 v_add_f32_e32 v18, v27, v18 ; 0624251B v_mac_f32_e32 v18, v28, v15 ; 3E241F1C v_mad_f32 v28, 0.5, -v29, v32 ; D282001C 44823AF0 v_mad_f32 v29, 0.5, -v30, v17 ; D282001D 44463CF0 v_mad_f32 v30, 0.5, -v31, v18 ; D282001E 444A3EF0 s_or_b64 exec, exec, s[20:21] ; 88FE147E s_buffer_load_dword s14, s[8:11], 0x17 ; C2070917 s_buffer_load_dword s15, s[8:11], 0x43 ; C2078943 s_buffer_load_dword s13, s[8:11], 0x60 ; C2068960 s_buffer_load_dword s1, s[8:11], 0x0 ; C2008900 s_buffer_load_dword s2, s[8:11], 0x1 ; C2010901 s_buffer_load_dword s3, s[8:11], 0x2 ; C2018902 s_buffer_load_dword s16, s[8:11], 0x4 ; C2080904 s_buffer_load_dword s17, s[8:11], 0x5 ; C2088905 s_buffer_load_dword s18, s[8:11], 0x6 ; C2090906 s_buffer_load_dword s20, s[8:11], 0x7 ; C20A0907 s_buffer_load_dword s19, s[8:11], 0x8 ; C2098908 s_buffer_load_dword s21, s[8:11], 0x9 ; C20A8909 s_buffer_load_dword s22, s[8:11], 0xa ; C20B090A s_buffer_load_dword s23, s[8:11], 0xb ; C20B890B s_buffer_load_dword s24, s[8:11], 0xc ; C20C090C s_buffer_load_dword s25, s[8:11], 0xd ; C20C890D s_buffer_load_dword s26, s[8:11], 0xe ; C20D090E s_buffer_load_dword s27, s[8:11], 0xf ; C20D890F v_sub_f32_e64 v15, 1.0, s0 ; D208000F 000000F2 v_log_f32_e32 v15, v15 ; 7E1E4F0F s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500 s_load_dwordx8 s[36:43], s[6:7], 0x0 ; C0D20700 v_mul_legacy_f32_e32 v15, 0x3f400000, v15 ; 0E1E1EFF 3F400000 v_exp_f32_e32 v15, v15 ; 7E1E4B0F v_mul_f32_e32 v31, 0x40e00000, v15 ; 103E1EFF 40E00000 v_cubeid_f32 v35, v28, v29, v30 ; D2880023 047A3B1C v_cubema_f32 v34, v28, v29, v30 ; D28E0022 047A3B1C v_cubesc_f32 v33, v28, v29, v30 ; D28A0021 047A3B1C v_cubetc_f32 v32, v28, v29, v30 ; D28C0020 047A3B1C v_mov_b32_e32 v28, 0x3fc00000 ; 7E3802FF 3FC00000 v_rcp_f32_e64 v15, |v34| ; D354010F 00000122 v_mad_f32 v29, v15, v32, v28 ; D282001D 0472410F v_mac_f32_e32 v28, v15, v33 ; 3E38430F v_mov_b32_e32 v30, v35 ; 7E3C0323 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[28:31], 15, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[36:43], s[32:35] ; F0900F00 01091C1C s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v15, v31 ; 7E1E4F1F v_mul_legacy_f32_e32 v15, s30, v15 ; 0E1E1E1E v_exp_f32_e32 v15, v15 ; 7E1E4B0F v_mul_f32_e32 v15, s29, v15 ; 101E1E1D v_mul_f32_e32 v18, v28, v15 ; 10241F1C v_mul_f32_e32 v17, v29, v15 ; 10221F1D v_mul_f32_e32 v15, v30, v15 ; 101E1F1E v_mov_b32_e32 v28, s28 ; 7E38021C v_mov_b32_e32 v29, 0x3f7fff58 ; 7E3A02FF 3F7FFF58 v_cmp_lt_f32_e32 vcc, s12, v29 ; 7C023A0C s_and_saveexec_b64 s[28:29], vcc ; BE9C246A s_xor_b64 s[28:29], exec, s[28:29] ; 899C1C7E s_cbranch_execz BB0_6 ; BF880000 s_buffer_load_dword s32, s[8:11], 0x3b ; C210093B s_buffer_load_dword s30, s[8:11], 0x3c ; C20F093C s_buffer_load_dword s31, s[8:11], 0x3d ; C20F893D s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_lt_f32_e64 s[32:33], 0, s32 ; D0020020 00004080 s_and_saveexec_b64 s[32:33], s[32:33] ; BEA02420 s_xor_b64 s[32:33], exec, s[32:33] ; 89A0207E s_cbranch_execz BB0_7 ; BF880000 s_buffer_load_dword s34, s[8:11], 0x36 ; C2110936 s_buffer_load_dword s35, s[8:11], 0x38 ; C2118938 s_buffer_load_dword s36, s[8:11], 0x39 ; C2120939 s_buffer_load_dword s37, s[8:11], 0x3a ; C212893A s_buffer_load_dword s38, s[8:11], 0x30 ; C2130930 s_buffer_load_dword s39, s[8:11], 0x31 ; C2138931 s_buffer_load_dword s40, s[8:11], 0x32 ; C2140932 s_buffer_load_dword s41, s[8:11], 0x34 ; C2148934 s_buffer_load_dword s42, s[8:11], 0x35 ; C2150935 v_mul_f32_e32 v29, v22, v22 ; 103A2D16 v_mac_f32_e32 v29, v23, v23 ; 3E3A2F17 v_mac_f32_e32 v29, v24, v24 ; 3E3A3118 v_rsq_clamp_f32_e32 v29, v29 ; 7E3A591D s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v30, s34, v27 ; 083C3622 v_mov_b32_e32 v31, s34 ; 7E3E0222 v_sub_f32_e32 v32, s38, v26 ; 08403426 v_sub_f32_e32 v33, s39, v25 ; 08423227 v_add_f32_e32 v31, s40, v31 ; 063E3E28 v_sub_f32_e32 v34, s40, v27 ; 08443628 v_mad_f32 v35, 0.5, v31, -s37 ; D2820023 80963EF0 v_add_f32_e32 v27, v27, v35 ; 0636471B v_mul_f32_e32 v22, v29, v22 ; 102C2D1D v_mul_f32_e32 v23, v29, v23 ; 102E2F1D v_mul_f32_e32 v24, v29, v24 ; 1030311D v_rcp_f32_e32 v29, v22 ; 7E3A5516 v_rcp_f32_e32 v35, v23 ; 7E465517 v_rcp_f32_e32 v36, v24 ; 7E485518 v_sub_f32_e32 v37, s41, v26 ; 084A3429 v_mov_b32_e32 v38, s41 ; 7E4C0229 v_add_f32_e32 v38, s38, v38 ; 064C4C26 v_mul_f32_e32 v32, v29, v32 ; 1040411D v_mul_f32_e32 v29, v29, v37 ; 103A4B1D v_mul_f32_e32 v33, v35, v33 ; 10424323 v_mul_f32_e32 v34, v36, v34 ; 10444524 v_mul_f32_e32 v30, v36, v30 ; 103C3D24 v_mad_f32 v36, 0.5, v38, -s35 ; D2820024 808E4CF0 v_add_f32_e32 v26, v26, v36 ; 0634491A v_sub_f32_e32 v36, s42, v25 ; 0848322A v_mov_b32_e32 v37, s42 ; 7E4A022A v_mul_f32_e32 v35, v35, v36 ; 10464923 v_add_f32_e32 v36, s39, v37 ; 06484A27 v_cmp_lt_f32_e32 vcc, 0, v22 ; 7C022C80 v_cndmask_b32_e32 v29, v29, v32 ; 003A411D v_cmp_lt_f32_e32 vcc, 0, v23 ; 7C022E80 v_cndmask_b32_e32 v32, v35, v33 ; 00404323 v_cmp_lt_f32_e32 vcc, 0, v24 ; 7C023080 v_cndmask_b32_e32 v30, v30, v34 ; 003C451E v_min3_f32 v29, v29, v32, v30 ; D2A2001D 047A411D v_mad_f32 v30, 0.5, v36, -s36 ; D282001E 809248F0 v_add_f32_e32 v25, v25, v30 ; 06323D19 v_mac_f32_e32 v26, v29, v22 ; 3E342D1D v_mac_f32_e32 v25, v29, v23 ; 3E322F1D v_mac_f32_e32 v27, v29, v24 ; 3E36311D v_mad_f32 v22, 0.5, -v38, v26 ; D2820016 446A4CF0 v_mad_f32 v23, 0.5, -v36, v25 ; D2820017 446648F0 v_mad_f32 v24, 0.5, -v31, v27 ; D2820018 446E3EF0 s_or_b64 exec, exec, s[32:33] ; 88FE207E v_sub_f32_e64 v25, 1.0, s0 ; D2080019 000000F2 v_log_f32_e32 v25, v25 ; 7E324F19 s_load_dwordx4 s[32:35], s[4:5], 0x4 ; C0900504 v_mul_legacy_f32_e32 v25, 0x3f400000, v25 ; 0E3232FF 3F400000 v_exp_f32_e32 v25, v25 ; 7E324B19 v_mul_f32_e32 v25, 0x40e00000, v25 ; 103232FF 40E00000 v_cubeid_f32 v32, v22, v23, v24 ; D2880020 04622F16 v_cubema_f32 v31, v22, v23, v24 ; D28E001F 04622F16 s_load_dwordx8 s[36:43], s[6:7], 0x8 ; C0D20708 v_cubesc_f32 v30, v22, v23, v24 ; D28A001E 04622F16 v_cubetc_f32 v29, v22, v23, v24 ; D28C001D 04622F16 v_rcp_f32_e64 v24, |v31| ; D3540118 0000011F v_mov_b32_e32 v22, 0x3fc00000 ; 7E2C02FF 3FC00000 v_mad_f32 v23, v24, v29, v22 ; D2820017 045A3B18 v_mac_f32_e32 v22, v24, v30 ; 3E2C3D18 v_mov_b32_e32 v24, v32 ; 7E300320 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[22:25], 15, 0, 0, 0, 0, 0, 0, 0, v[22:25], s[36:43], s[32:35] ; F0900F00 01091616 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v25, v25 ; 7E324F19 v_sub_f32_e64 v26, 1.0, s12 ; D208001A 000018F2 v_mul_legacy_f32_e32 v25, s31, v25 ; 0E32321F v_exp_f32_e32 v25, v25 ; 7E324B19 v_mul_f32_e32 v25, s30, v25 ; 1032321E v_mul_f32_e32 v22, v22, v25 ; 102C3316 v_mul_f32_e32 v23, v23, v25 ; 102E3317 v_mul_f32_e32 v24, v24, v25 ; 10303318 v_mul_f32_e32 v22, v22, v26 ; 102C3516 v_mul_f32_e32 v23, v23, v26 ; 102E3517 v_mul_f32_e32 v24, v24, v26 ; 10303518 v_mac_f32_e32 v22, s12, v18 ; 3E2C240C v_mac_f32_e32 v23, s12, v17 ; 3E2E220C v_mac_f32_e32 v24, s12, v15 ; 3E301E0C v_mov_b32_e32 v15, v24 ; 7E1E0318 v_mov_b32_e32 v17, v23 ; 7E220317 v_mov_b32_e32 v18, v22 ; 7E240316 s_or_b64 exec, exec, s[28:29] ; 88FE1C7E v_mad_f32 v24, -v28, s15, s15 ; D2820018 203C1F1C v_mov_b32_e32 v22, s14 ; 7E2C020E v_mul_f32_e32 v23, v24, v19 ; 102E2718 v_mul_f32_e32 v20, v24, v20 ; 10282918 v_mul_f32_e32 v19, v24, v21 ; 10262B18 v_mul_f32_e32 v21, s17, v6 ; 102A0C11 v_mac_f32_e32 v21, s16, v7 ; 3E2A0E10 v_mac_f32_e32 v21, s18, v5 ; 3E2A0A12 v_add_f32_e32 v21, s20, v21 ; 062A2A14 v_add_f32_e32 v25, v21, v11 ; 06321715 v_mul_f32_e32 v11, s21, v6 ; 10160C15 v_mac_f32_e32 v11, s19, v7 ; 3E160E13 v_mac_f32_e32 v11, s22, v5 ; 3E160A16 v_add_f32_e32 v11, s23, v11 ; 06161617 v_add_f32_e32 v12, v11, v12 ; 0618190B v_mul_f32_e32 v11, s25, v6 ; 10160C19 v_mac_f32_e32 v11, s24, v7 ; 3E160E18 v_mac_f32_e32 v11, s26, v5 ; 3E160A1A v_add_f32_e32 v11, s27, v11 ; 0616161B v_add_f32_e32 v26, v11, v13 ; 06341B0B s_buffer_load_dword s15, s[8:11], 0x10 ; C2078910 s_buffer_load_dword s14, s[8:11], 0x11 ; C2070911 s_buffer_load_dword s12, s[8:11], 0x12 ; C2060912 s_buffer_load_dword s36, s[8:11], 0x16 ; C2120916 s_buffer_load_dword s20, s[8:11], 0x44 ; C20A0944 s_buffer_load_dword s19, s[8:11], 0x45 ; C2098945 s_buffer_load_dword s18, s[8:11], 0x46 ; C2090946 s_buffer_load_dword s21, s[8:11], 0x48 ; C20A8948 s_buffer_load_dword s22, s[8:11], 0x49 ; C20B0949 s_buffer_load_dword s23, s[8:11], 0x4b ; C20B894B s_buffer_load_dword s17, s[8:11], 0x64 ; C2088964 s_buffer_load_dword s16, s[8:11], 0x65 ; C2080965 s_buffer_load_dword s8, s[8:11], 0x66 ; C2040966 s_load_dwordx4 s[24:27], s[4:5], 0x10 ; C08C0510 s_load_dwordx8 s[28:35], s[6:7], 0x20 ; C0CE0720 v_sub_f32_e64 v21, 1.0, s13 ; D2080015 00001AF2 v_mac_f32_e32 v21, s13, v16 ; 3E2A200D v_mul_f32_e32 v11, s1, v7 ; 10160E01 v_mac_f32_e32 v11, s2, v6 ; 3E160C02 v_mac_f32_e32 v11, s3, v5 ; 3E160A03 v_max_f32_e32 v11, 0, v11 ; 20161680 v_mul_f32_e32 v16, v21, v25 ; 10203315 v_mul_f32_e32 v13, v21, v12 ; 101A1915 v_mul_f32_e32 v12, v21, v26 ; 10183515 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v22, s36, v8 ; 3E2C1024 v_mul_f32_e32 v8, v21, v18 ; 10102515 v_mul_f32_e32 v17, v21, v17 ; 10222315 v_mul_f32_e32 v15, v21, v15 ; 101E1F15 v_sub_f32_e32 v18, 1.0, v24 ; 082430F2 v_add_f32_e32 v18, s0, v18 ; 06242400 v_add_f32_e64 v18, 0, v18 clamp ; D2060812 00022480 v_sub_f32_e32 v21, s1, v10 ; 082A1401 v_sub_f32_e32 v24, s2, v9 ; 08301202 v_mul_f32_e32 v25, v21, v21 ; 10322B15 v_mac_f32_e32 v25, v24, v24 ; 3E323118 v_sub_f32_e32 v26, s3, v14 ; 08341C03 v_mac_f32_e32 v25, v26, v26 ; 3E32351A v_rsq_clamp_f32_e32 v25, v25 ; 7E325919 v_mul_f32_e32 v21, v25, v21 ; 102A2B19 v_mul_f32_e32 v24, v25, v24 ; 10303119 v_mul_f32_e32 v25, v25, v26 ; 10323519 v_mul_f32_e32 v10, v10, v7 ; 10140F0A v_mad_f32 v9, -v9, v6, -v10 ; D2820009 A42A0D09 v_mad_f32 v9, -v14, v5, v9 ; D2820009 24260B0E v_mul_f32_e32 v7, v21, v7 ; 100E0F15 v_mac_f32_e32 v7, v24, v6 ; 3E0E0D18 v_mul_f32_e32 v6, s1, v21 ; 100C2A01 v_mac_f32_e32 v6, s2, v24 ; 3E0C3002 v_mac_f32_e32 v6, s3, v25 ; 3E0C3203 v_mac_f32_e32 v7, v25, v5 ; 3E0E0B19 v_max_f32_e32 v5, 0, v6 ; 200A0C80 v_sub_f32_e32 v6, 1.0, v5 ; 080C0AF2 v_mul_f32_e32 v10, v6, v6 ; 10140D06 v_mul_f32_e32 v6, v6, v10 ; 100C1506 v_mul_f32_e32 v6, v6, v10 ; 100C1506 v_max_f32_e32 v9, 0, v9 ; 20121280 v_sub_f32_e32 v10, 1.0, v9 ; 081412F2 v_mul_f32_e32 v14, v10, v10 ; 101C150A v_mul_f32_e32 v21, v10, v14 ; 102A1D0A v_mad_f32 v24, -v14, v21, 1.0 ; D2820018 23CA2B0E v_mul_f32_e32 v25, v4, v24 ; 10323104 v_sub_f32_e32 v26, 1.0, v4 ; 083408F2 v_mac_f32_e32 v4, v6, v26 ; 3E083506 v_mul_f32_e32 v26, v1, v24 ; 10343101 v_sub_f32_e32 v27, 1.0, v1 ; 083602F2 v_mac_f32_e32 v1, v6, v27 ; 3E023706 v_mul_f32_e32 v24, v0, v24 ; 10303100 v_sub_f32_e32 v27, 1.0, v0 ; 083600F2 v_mac_f32_e32 v0, v6, v27 ; 3E003706 v_sub_f32_e64 v6, 1.0, s0 ; D2080006 000000F2 v_sub_f32_e32 v27, 1.0, v6 ; 08360CF2 v_mov_b32_e32 v28, 0x3cf5c28f ; 7E3802FF 3CF5C28F v_madmk_f32_e32 v27, v27, v28, 0x3f77ced9 ; 4036391B 3F77CED9 v_add_f32_e32 v28, v5, v5 ; 06380B05 v_mul_f32_e32 v5, v6, v5 ; 100A0B06 v_mad_f32 v5, v28, v5, 0.5 ; D2820005 03C20B1C v_mul_f32_e32 v14, v21, v14 ; 101C1D15 v_mac_f32_e32 v25, v18, v14 ; 3E321D12 v_mac_f32_e32 v26, v18, v14 ; 3E341D12 v_mac_f32_e32 v24, v18, v14 ; 3E301D12 v_mul_f32_e32 v6, v6, v6 ; 100C0D06 v_log_f32_e32 v18, v27 ; 7E244F1B v_mul_f32_e32 v6, s23, v6 ; 100C0C17 v_mul_f32_e32 v10, v6, v10 ; 10141506 v_mac_f32_e32 v10, 1.0, v9 ; 3E1412F2 v_rcp_f32_e32 v9, v18 ; 7E125512 v_sub_f32_e32 v18, 1.0, v11 ; 082416F2 v_mul_f32_e32 v6, v6, v18 ; 100C2506 v_mac_f32_e32 v6, 1.0, v11 ; 3E0C16F2 v_max_f32_e32 v7, 0, v7 ; 200E0E80 v_log_f32_e32 v7, v7 ; 7E0E4F07 v_madak_f32_e32 v6, v6, v10, 0x38d1b717 ; 420C1506 38D1B717 v_mul_f32_e32 v9, 0x41200000, v9 ; 101212FF 41200000 v_mul_f32_e32 v10, v9, v9 ; 10141309 v_mul_legacy_f32_e32 v7, v10, v7 ; 0E0E0F0A v_rcp_f32_e32 v6, v6 ; 7E0C5506 v_mad_f32 v9, v9, v9, 1.0 ; D2820009 03CA1309 v_mul_f32_e32 v9, s22, v9 ; 10121216 v_exp_f32_e32 v7, v7 ; 7E0E4B07 v_mul_f32_e32 v7, v9, v7 ; 100E0F09 v_mul_f32_e32 v6, v7, v6 ; 100C0D07 v_mul_f32_e32 v7, v18, v18 ; 100E2512 v_mul_f32_e32 v9, v18, v7 ; 10120F12 v_mul_f32_e32 v7, v9, v7 ; 100E0F09 v_add_f32_e32 v5, -1.0, v5 ; 060A0AF3 v_mad_f32 v7, v5, v7, 1.0 ; D2820007 03CA0F05 v_mad_f32 v5, v5, v14, 1.0 ; D2820005 03CA1D05 v_mul_f32_e32 v5, v5, v7 ; 100A0F05 image_sample v[27:29], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[28:35], s[24:27] ; F0800700 00C71B02 v_mul_f32_e32 v2, v11, v6 ; 10040D0B v_mul_f32_e32 v2, s21, v2 ; 10040415 v_mul_f32_e32 v3, v11, v5 ; 10060B0B v_mac_f32_e32 v16, s20, v3 ; 3E200614 v_mul_f32_e32 v5, v16, v23 ; 100A2F10 v_max_f32_e32 v2, 0, v2 ; 20040480 v_mul_f32_e32 v6, s20, v2 ; 100C0414 v_mac_f32_e32 v5, v4, v6 ; 3E0A0D04 v_mac_f32_e32 v13, s19, v3 ; 3E1A0613 v_mul_f32_e32 v4, v13, v20 ; 1008290D v_mul_f32_e32 v6, s19, v2 ; 100C0413 v_mac_f32_e32 v4, v1, v6 ; 3E080D01 v_mac_f32_e32 v12, s18, v3 ; 3E180612 v_mul_f32_e32 v1, s18, v2 ; 10020412 v_mul_f32_e32 v2, v12, v19 ; 1004270C v_mac_f32_e32 v2, v0, v1 ; 3E040300 v_mac_f32_e32 v5, v25, v8 ; 3E0A1119 v_mac_f32_e32 v4, v26, v17 ; 3E08231A v_mac_f32_e32 v2, v24, v15 ; 3E041F18 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v5, s17, v27 ; 3E0A3611 v_mac_f32_e32 v4, s16, v28 ; 3E083810 v_mac_f32_e32 v2, s8, v29 ; 3E043A08 v_add_f32_e64 v0, 0, v22 clamp ; D2060800 00022C80 v_sub_f32_e32 v1, 1.0, v0 ; 080200F2 v_mul_f32_e32 v3, s15, v1 ; 1006020F v_mac_f32_e32 v3, v5, v0 ; 3E060105 v_mul_f32_e32 v5, s14, v1 ; 100A020E v_mac_f32_e32 v5, v4, v0 ; 3E0A0104 v_mul_f32_e32 v1, s12, v1 ; 1002020C v_mac_f32_e32 v1, v2, v0 ; 3E020102 v_cvt_pkrtz_f16_f32_e32 v0, v3, v5 ; 5E000B03 v_cvt_pkrtz_f16_f32_e64 v1, v1, 1.0 ; D25E0001 0001E501 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 48 VGPRS: 40 Code Size: 2156 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL OUT[5], GENERIC[4] DCL CONST[0..19] DCL TEMP[0..7], LOCAL IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MUL TEMP[0], CONST[5], IN[0].xxxx 1: MAD TEMP[0], CONST[6], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[7], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0].xyz, CONST[8], IN[0].wwww, TEMP[0] 4: MUL TEMP[1], CONST[16], IN[0].xxxx 5: MAD TEMP[1], CONST[17], IN[0].yyyy, TEMP[1] 6: MAD TEMP[1], CONST[18], IN[0].zzzz, TEMP[1] 7: MAD TEMP[1], CONST[19], IN[0].wwww, TEMP[1] 8: MAD TEMP[2].xy, IN[2].xyyy, CONST[13].xyyy, CONST[13].zwww 9: FSEQ TEMP[3].x, CONST[15].xxxx, IMM[0].xxxx 10: UIF TEMP[3].xxxx :0 11: MOV TEMP[3].xy, IN[2].xyxx 12: ELSE :0 13: MOV TEMP[3].xy, IN[3].xyxx 14: ENDIF 15: MAD TEMP[3].xy, TEMP[3].xyyy, CONST[14].xyyy, CONST[14].zwww 16: MOV TEMP[2].zw, TEMP[3].yyxy 17: MOV TEMP[3].x, CONST[9].xxxx 18: MOV TEMP[3].y, CONST[10].xxxx 19: MOV TEMP[3].z, CONST[11].xxxx 20: MOV TEMP[4].x, CONST[9].yyyy 21: MOV TEMP[4].y, CONST[10].yyyy 22: MOV TEMP[4].z, CONST[11].yyyy 23: MOV TEMP[5].x, CONST[9].zzzz 24: MOV TEMP[5].y, CONST[10].zzzz 25: MOV TEMP[5].z, CONST[11].zzzz 26: MUL TEMP[3].xyz, TEMP[3].xyzz, IN[1].xxxx 27: MAD TEMP[3].xyz, TEMP[4].xyzz, IN[1].yyyy, TEMP[3].xyzz 28: MAD TEMP[3].xyz, TEMP[5].xyzz, IN[1].zzzz, TEMP[3].xyzz 29: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz 30: RSQ TEMP[4].x, TEMP[4].xxxx 31: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx 32: MOV TEMP[4].xyz, TEMP[3].xyzx 33: MUL TEMP[5], TEMP[3].xyzz, TEMP[3].yzzx 34: DP4 TEMP[6].x, CONST[1], TEMP[5] 35: DP4 TEMP[7].x, CONST[2], TEMP[5] 36: MOV TEMP[6].y, TEMP[7].xxxx 37: DP4 TEMP[5].x, CONST[3], TEMP[5] 38: MOV TEMP[6].z, TEMP[5].xxxx 39: MUL TEMP[5].x, TEMP[3].yyyy, TEMP[3].yyyy 40: MAD TEMP[3].x, TEMP[3].xxxx, TEMP[3].xxxx, -TEMP[5].xxxx 41: MAD TEMP[3].xyz, CONST[4].xyzz, TEMP[3].xxxx, TEMP[6].xyzz 42: ADD TEMP[5].xyz, TEMP[0].xyzz, -CONST[0].xyzz 43: MOV TEMP[5].yzw, TEMP[5].yxyz 44: MOV TEMP[5].x, TEMP[1].zzzz 45: MOV TEMP[0].xyz, TEMP[0].xyzx 46: MOV OUT[5], TEMP[0] 47: MOV OUT[1], TEMP[2] 48: MOV OUT[2], TEMP[4] 49: MOV OUT[3], TEMP[3] 50: MOV OUT[0], TEMP[1] 51: MOV OUT[4], TEMP[5] 52: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236) %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240) %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256) %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260) %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264) %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 268) %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272) %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276) %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280) %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284) %72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288) %73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292) %74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296) %75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300) %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304) %77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308) %78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312) %79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316) %80 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %81 = load <16 x i8>, <16 x i8> addrspace(2)* %80, align 16, !tbaa !0 %82 = add i32 %5, %7 %83 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %81, i32 0, i32 %82) %84 = extractelement <4 x float> %83, i32 0 %85 = extractelement <4 x float> %83, i32 1 %86 = extractelement <4 x float> %83, i32 2 %87 = extractelement <4 x float> %83, i32 3 %88 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %89 = load <16 x i8>, <16 x i8> addrspace(2)* %88, align 16, !tbaa !0 %90 = add i32 %5, %7 %91 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %89, i32 0, i32 %90) %92 = extractelement <4 x float> %91, i32 0 %93 = extractelement <4 x float> %91, i32 1 %94 = extractelement <4 x float> %91, i32 2 %95 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %96 = load <16 x i8>, <16 x i8> addrspace(2)* %95, align 16, !tbaa !0 %97 = add i32 %5, %7 %98 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %96, i32 0, i32 %97) %99 = extractelement <4 x float> %98, i32 0 %100 = extractelement <4 x float> %98, i32 1 %101 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %102 = load <16 x i8>, <16 x i8> addrspace(2)* %101, align 16, !tbaa !0 %103 = add i32 %5, %7 %104 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %102, i32 0, i32 %103) %105 = extractelement <4 x float> %104, i32 0 %106 = extractelement <4 x float> %104, i32 1 %107 = fmul float %31, %84 %108 = fmul float %32, %84 %109 = fmul float %33, %84 %110 = fmul float %34, %84 %111 = fmul float %35, %85 %112 = fadd float %111, %107 %113 = fmul float %36, %85 %114 = fadd float %113, %108 %115 = fmul float %37, %85 %116 = fadd float %115, %109 %117 = fmul float %38, %85 %118 = fadd float %117, %110 %119 = fmul float %39, %86 %120 = fadd float %119, %112 %121 = fmul float %40, %86 %122 = fadd float %121, %114 %123 = fmul float %41, %86 %124 = fadd float %123, %116 %125 = fmul float %42, %86 %126 = fadd float %125, %118 %127 = fmul float %43, %87 %128 = fadd float %127, %120 %129 = fmul float %44, %87 %130 = fadd float %129, %122 %131 = fmul float %45, %87 %132 = fadd float %131, %124 %133 = fmul float %64, %84 %134 = fmul float %65, %84 %135 = fmul float %66, %84 %136 = fmul float %67, %84 %137 = fmul float %68, %85 %138 = fadd float %137, %133 %139 = fmul float %69, %85 %140 = fadd float %139, %134 %141 = fmul float %70, %85 %142 = fadd float %141, %135 %143 = fmul float %71, %85 %144 = fadd float %143, %136 %145 = fmul float %72, %86 %146 = fadd float %145, %138 %147 = fmul float %73, %86 %148 = fadd float %147, %140 %149 = fmul float %74, %86 %150 = fadd float %149, %142 %151 = fmul float %75, %86 %152 = fadd float %151, %144 %153 = fmul float %76, %87 %154 = fadd float %153, %146 %155 = fmul float %77, %87 %156 = fadd float %155, %148 %157 = fmul float %78, %87 %158 = fadd float %157, %150 %159 = fmul float %79, %87 %160 = fadd float %159, %152 %161 = fmul float %99, %55 %162 = fadd float %161, %57 %163 = fmul float %100, %56 %164 = fadd float %163, %58 %165 = fcmp oeq float %63, 0.000000e+00 %. = select i1 %165, float %99, float %105 %.32 = select i1 %165, float %100, float %106 %166 = fmul float %., %59 %167 = fadd float %166, %61 %168 = fmul float %.32, %60 %169 = fadd float %168, %62 %170 = fmul float %46, %92 %171 = fmul float %49, %92 %172 = fmul float %52, %92 %173 = fmul float %47, %93 %174 = fadd float %173, %170 %175 = fmul float %50, %93 %176 = fadd float %175, %171 %177 = fmul float %53, %93 %178 = fadd float %177, %172 %179 = fmul float %48, %94 %180 = fadd float %179, %174 %181 = fmul float %51, %94 %182 = fadd float %181, %176 %183 = fmul float %54, %94 %184 = fadd float %183, %178 %185 = fmul float %180, %180 %186 = fmul float %182, %182 %187 = fadd float %186, %185 %188 = fmul float %184, %184 %189 = fadd float %187, %188 %190 = call float @llvm.AMDGPU.rsq.clamped.f32(float %189) %191 = fmul float %180, %190 %192 = fmul float %182, %190 %193 = fmul float %184, %190 %194 = fmul float %191, %192 %195 = fmul float %192, %193 %196 = fmul float %193, %193 %197 = fmul float %193, %191 %198 = fmul float %16, %194 %199 = fmul float %17, %195 %200 = fadd float %198, %199 %201 = fmul float %18, %196 %202 = fadd float %200, %201 %203 = fmul float %19, %197 %204 = fadd float %202, %203 %205 = fmul float %20, %194 %206 = fmul float %21, %195 %207 = fadd float %205, %206 %208 = fmul float %22, %196 %209 = fadd float %207, %208 %210 = fmul float %23, %197 %211 = fadd float %209, %210 %212 = fmul float %24, %194 %213 = fmul float %25, %195 %214 = fadd float %212, %213 %215 = fmul float %26, %196 %216 = fadd float %214, %215 %217 = fmul float %27, %197 %218 = fadd float %216, %217 %219 = fmul float %192, %192 %220 = fmul float %191, %191 %221 = fsub float %220, %219 %222 = fmul float %28, %221 %223 = fadd float %222, %204 %224 = fmul float %29, %221 %225 = fadd float %224, %211 %226 = fmul float %30, %221 %227 = fadd float %226, %218 %228 = fsub float %128, %13 %229 = fsub float %130, %14 %230 = fsub float %132, %15 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %162, float %164, float %167, float %169) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %191, float %192, float %193, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %223, float %225, float %227, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %158, float %228, float %229, float %230) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %128, float %130, float %132, float %126) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %154, float %156, float %158, float %160) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0 ; 7E020280 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[20:23], s[2:3], 0x0 ; C08A0300 s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 s_load_dwordx4 s[12:15], s[8:9], 0x8 ; C0860908 s_load_dwordx4 s[8:11], s[8:9], 0xc ; C084090C s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s18, s[20:23], 0x20 ; C2091520 buffer_load_format_xyzw v[2:5], v0, s[0:3], 0 idxen ; E00C2000 80000200 buffer_load_format_xyzw v[6:9], v0, s[4:7], 0 idxen ; E00C2000 80010600 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[9:12], v0, s[12:15], 0 idxen ; E00C2000 80030900 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[11:14], v0, s[8:11], 0 idxen ; E00C2000 80020B00 s_buffer_load_dword s19, s[20:23], 0x21 ; C2099521 s_buffer_load_dword s24, s[20:23], 0x22 ; C20C1522 s_buffer_load_dword s25, s[20:23], 0x24 ; C20C9524 s_buffer_load_dword s26, s[20:23], 0x25 ; C20D1525 s_buffer_load_dword s27, s[20:23], 0x26 ; C20D9526 s_buffer_load_dword s28, s[20:23], 0x28 ; C20E1528 s_buffer_load_dword s29, s[20:23], 0x29 ; C20E9529 s_buffer_load_dword s30, s[20:23], 0x2a ; C20F152A s_buffer_load_dword s31, s[20:23], 0x2c ; C20F952C s_buffer_load_dword s32, s[20:23], 0x2d ; C210152D s_buffer_load_dword s33, s[20:23], 0x2e ; C210952E s_buffer_load_dword s34, s[20:23], 0x34 ; C2111534 s_buffer_load_dword s35, s[20:23], 0x35 ; C2119535 s_buffer_load_dword s5, s[20:23], 0x36 ; C2029536 s_buffer_load_dword s0, s[20:23], 0x0 ; C2001500 s_buffer_load_dword s1, s[20:23], 0x1 ; C2009501 s_buffer_load_dword s2, s[20:23], 0x2 ; C2011502 s_buffer_load_dword s6, s[20:23], 0x4 ; C2031504 s_buffer_load_dword s14, s[20:23], 0x5 ; C2071505 s_buffer_load_dword s4, s[20:23], 0x6 ; C2021506 s_buffer_load_dword s3, s[20:23], 0x7 ; C2019507 s_buffer_load_dword s12, s[20:23], 0x8 ; C2061508 s_buffer_load_dword s16, s[20:23], 0x9 ; C2081509 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v0, s5 ; 7E000205 s_buffer_load_dword s7, s[20:23], 0xa ; C203950A s_buffer_load_dword s5, s[20:23], 0xb ; C202950B s_buffer_load_dword s15, s[20:23], 0xc ; C207950C s_buffer_load_dword s17, s[20:23], 0xd ; C208950D s_buffer_load_dword s13, s[20:23], 0xe ; C206950E s_buffer_load_dword s8, s[20:23], 0xf ; C204150F s_buffer_load_dword s36, s[20:23], 0x3c ; C212153C s_buffer_load_dword s37, s[20:23], 0x40 ; C2129540 s_buffer_load_dword s38, s[20:23], 0x41 ; C2131541 s_buffer_load_dword s39, s[20:23], 0x42 ; C2139542 s_buffer_load_dword s40, s[20:23], 0x43 ; C2141543 s_buffer_load_dword s9, s[20:23], 0x10 ; C2049510 s_buffer_load_dword s10, s[20:23], 0x11 ; C2051511 s_buffer_load_dword s11, s[20:23], 0x12 ; C2059512 s_buffer_load_dword s41, s[20:23], 0x14 ; C2149514 s_buffer_load_dword s42, s[20:23], 0x15 ; C2151515 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_eq_f32_e64 vcc, 0, s36 ; D004006A 00004880 s_buffer_load_dword s36, s[20:23], 0x16 ; C2121516 s_buffer_load_dword s43, s[20:23], 0x17 ; C2159517 s_buffer_load_dword s44, s[20:23], 0x18 ; C2161518 s_buffer_load_dword s45, s[20:23], 0x19 ; C2169519 s_buffer_load_dword s46, s[20:23], 0x1a ; C217151A s_buffer_load_dword s47, s[20:23], 0x37 ; C2179537 s_buffer_load_dword s48, s[20:23], 0x38 ; C2181538 s_buffer_load_dword s49, s[20:23], 0x39 ; C2189539 s_buffer_load_dword s50, s[20:23], 0x3a ; C219153A s_buffer_load_dword s51, s[20:23], 0x3b ; C219953B s_buffer_load_dword s52, s[20:23], 0x1b ; C21A151B s_buffer_load_dword s53, s[20:23], 0x1c ; C21A951C s_buffer_load_dword s54, s[20:23], 0x1d ; C21B151D s_buffer_load_dword s55, s[20:23], 0x1e ; C21B951E s_buffer_load_dword s56, s[20:23], 0x1f ; C21C151F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v13, s47 ; 7E1A022F s_buffer_load_dword s47, s[20:23], 0x44 ; C2179544 s_buffer_load_dword s57, s[20:23], 0x45 ; C21C9545 s_buffer_load_dword s58, s[20:23], 0x46 ; C21D1546 v_mov_b32_e32 v14, s50 ; 7E1C0232 s_buffer_load_dword s50, s[20:23], 0x47 ; C2191547 v_mov_b32_e32 v15, s51 ; 7E1E0233 s_buffer_load_dword s51, s[20:23], 0x48 ; C2199548 s_buffer_load_dword s59, s[20:23], 0x49 ; C21D9549 s_buffer_load_dword s60, s[20:23], 0x4a ; C21E154A s_buffer_load_dword s61, s[20:23], 0x4b ; C21E954B s_buffer_load_dword s62, s[20:23], 0x4c ; C21F154C s_buffer_load_dword s63, s[20:23], 0x4d ; C21F954D s_buffer_load_dword s64, s[20:23], 0x4e ; C220154E s_buffer_load_dword s20, s[20:23], 0x4f ; C20A154F v_mul_f32_e32 v16, s41, v2 ; 10200429 v_mac_f32_e32 v0, s34, v9 ; 3E001222 v_mul_f32_e32 v17, s42, v2 ; 1022042A v_mul_f32_e32 v18, s36, v2 ; 10240424 v_mul_f32_e32 v19, s43, v2 ; 1026042B v_mac_f32_e32 v13, s35, v10 ; 3E1A1423 v_mul_f32_e32 v20, s25, v6 ; 10280C19 v_mul_f32_e32 v21, s28, v6 ; 102A0C1C v_mul_f32_e32 v6, s31, v6 ; 100C0C1F v_mac_f32_e32 v16, s44, v3 ; 3E20062C v_mac_f32_e32 v17, s45, v3 ; 3E22062D v_mac_f32_e32 v18, s46, v3 ; 3E24062E v_mac_f32_e32 v20, s26, v7 ; 3E280E1A v_mac_f32_e32 v21, s29, v7 ; 3E2A0E1D v_cndmask_b32_e32 v9, v11, v9 ; 0012130B v_cndmask_b32_e32 v10, v12, v10 ; 0014150C v_mac_f32_e32 v6, s32, v7 ; 3E0C0E20 v_mac_f32_e32 v20, s27, v8 ; 3E28101B v_mac_f32_e32 v21, s30, v8 ; 3E2A101E v_mac_f32_e32 v6, s33, v8 ; 3E0C1021 v_mac_f32_e32 v19, s52, v3 ; 3E260634 v_mul_f32_e32 v7, s37, v2 ; 100E0425 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v7, s47, v3 ; 3E0E062F v_mul_f32_e32 v8, s38, v2 ; 10100426 v_mac_f32_e32 v8, s57, v3 ; 3E100639 v_mul_f32_e32 v11, s39, v2 ; 10160427 v_mac_f32_e32 v11, s58, v3 ; 3E16063A v_mul_f32_e32 v2, s40, v2 ; 10040428 v_mac_f32_e32 v2, s50, v3 ; 3E040632 v_mac_f32_e32 v16, s53, v4 ; 3E200835 v_mac_f32_e32 v17, s54, v4 ; 3E220836 v_mac_f32_e32 v18, s55, v4 ; 3E240837 v_mac_f32_e32 v19, s56, v4 ; 3E260838 v_mac_f32_e32 v7, s51, v4 ; 3E0E0833 v_mac_f32_e32 v8, s59, v4 ; 3E10083B v_mac_f32_e32 v11, s60, v4 ; 3E16083C v_mac_f32_e32 v2, s61, v4 ; 3E04083D v_mac_f32_e32 v16, s18, v5 ; 3E200A12 v_mac_f32_e32 v17, s19, v5 ; 3E220A13 v_mac_f32_e32 v18, s24, v5 ; 3E240A18 v_mac_f32_e32 v7, s62, v5 ; 3E0E0A3E v_mac_f32_e32 v8, s63, v5 ; 3E100A3F v_mac_f32_e32 v11, s64, v5 ; 3E160A40 v_mac_f32_e32 v2, s20, v5 ; 3E040A14 v_mul_f32_e32 v3, v20, v20 ; 10062914 v_mac_f32_e32 v3, v21, v21 ; 3E062B15 v_mac_f32_e32 v3, v6, v6 ; 3E060D06 v_rsq_clamp_f32_e32 v3, v3 ; 7E065903 v_mac_f32_e32 v14, s48, v9 ; 3E1C1230 v_mac_f32_e32 v15, s49, v10 ; 3E1E1431 exp 15, 32, 0, 0, 0, v0, v13, v14, v15 ; F800020F 0F0E0D00 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, v3, v20 ; 10002903 v_mul_f32_e32 v4, v3, v21 ; 10082B03 v_mul_f32_e32 v3, v3, v6 ; 10060D03 v_mul_f32_e32 v5, v3, v4 ; 100A0903 v_mul_f32_e32 v6, s14, v5 ; 100C0A0E v_mul_f32_e32 v9, s16, v5 ; 10120A10 v_mul_f32_e32 v5, s17, v5 ; 100A0A11 v_mul_f32_e32 v10, v4, v0 ; 10140104 v_mac_f32_e32 v6, s6, v10 ; 3E0C1406 v_mac_f32_e32 v9, s12, v10 ; 3E12140C v_mac_f32_e32 v5, s15, v10 ; 3E0A140F v_mul_f32_e32 v10, v3, v3 ; 10140703 v_mac_f32_e32 v6, s4, v10 ; 3E0C1404 v_mac_f32_e32 v9, s7, v10 ; 3E121407 v_mac_f32_e32 v5, s13, v10 ; 3E0A140D v_mul_f32_e32 v10, v0, v3 ; 10140700 v_mac_f32_e32 v6, s3, v10 ; 3E0C1403 v_mac_f32_e32 v9, s5, v10 ; 3E121405 v_mac_f32_e32 v5, s8, v10 ; 3E0A1408 v_mul_f32_e32 v10, v4, v4 ; 10140904 v_mad_f32 v10, v0, v0, -v10 ; D282000A 842A0100 v_mac_f32_e32 v6, s9, v10 ; 3E0C1409 v_mac_f32_e32 v9, s10, v10 ; 3E12140A v_mac_f32_e32 v5, s11, v10 ; 3E0A140B v_subrev_f32_e32 v10, s0, v16 ; 0A142000 v_subrev_f32_e32 v12, s1, v17 ; 0A182201 v_subrev_f32_e32 v13, s2, v18 ; 0A1A2402 exp 15, 33, 0, 0, 0, v0, v4, v3, v1 ; F800021F 01030400 exp 15, 34, 0, 0, 0, v6, v9, v5, v1 ; F800022F 01050906 exp 15, 35, 0, 0, 0, v11, v10, v12, v13 ; F800023F 0D0C0A0B exp 15, 36, 0, 0, 0, v16, v17, v18, v19 ; F800024F 13121110 exp 15, 12, 0, 1, 0, v7, v8, v11, v2 ; F80008CF 020B0807 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 72 VGPRS: 24 Code Size: 748 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL IN[4], GENERIC[4], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SVIEW[0], CUBE, FLOAT DCL SVIEW[1], CUBE, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL CONST[0..5] DCL CONST[8..19] DCL CONST[21..22] DCL CONST[24] DCL TEMP[0..17], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 2.0000, 0.5000} IMM[1] FLT32 { 0.7500, 7.0000, 1.0000, 10.0000} IMM[2] FLT32 { 0.9680, 0.0300, 0.0001, -1.0000} 0: DP3 TEMP[0].x, IN[1].xyzz, IN[1].xyzz 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MUL TEMP[0].xyz, IN[1].xyzz, TEMP[0].xxxx 3: DP3 TEMP[1].x, IN[3].yzww, IN[3].yzww 4: RSQ TEMP[1].x, TEMP[1].xxxx 5: MUL TEMP[1].xyz, IN[3].yzww, TEMP[1].xxxx 6: MOV TEMP[2].xy, IN[0].xyyy 7: TEX TEMP[2].xyz, TEMP[2], SAMP[2], 2D 8: MUL TEMP[2].xyz, CONST[19].xyzz, TEMP[2].xyzz 9: LRP TEMP[3].xyz, CONST[21].xxxx, TEMP[2].xyzz, CONST[16].xyzz 10: MUL TEMP[4].x, CONST[21].xxxx, CONST[16].wwww 11: ADD TEMP[4].x, CONST[16].wwww, -TEMP[4].xxxx 12: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[4].xxxx 13: MOV TEMP[5].xy, IN[0].xyyy 14: TEX TEMP[5].y, TEMP[5], SAMP[3], 2D 15: ADD TEMP[6].x, IMM[0].xxxx, -CONST[24].xxxx 16: MAD TEMP[5].x, TEMP[5].yyyy, CONST[24].xxxx, TEMP[6].xxxx 17: DP3 TEMP[6].x, TEMP[0].xyzz, CONST[0].xyzz 18: MAX TEMP[6].x, IMM[0].yyyy, TEMP[6].xxxx 19: MOV TEMP[7].xyz, IMM[0].yyyy 20: MOV TEMP[8].w, IMM[0].xxxx 21: MOV TEMP[8].xyz, TEMP[0].xyzx 22: DP4 TEMP[9].x, CONST[1], TEMP[8] 23: DP4 TEMP[10].x, CONST[2], TEMP[8] 24: MOV TEMP[9].y, TEMP[10].xxxx 25: DP4 TEMP[8].x, CONST[3], TEMP[8] 26: MOV TEMP[9].z, TEMP[8].xxxx 27: ADD TEMP[8].xyz, IN[2].xyzz, TEMP[9].xyzz 28: MUL TEMP[8].xyz, TEMP[8].xyzz, TEMP[5].xxxx 29: DP3 TEMP[9].x, TEMP[0].xyzz, TEMP[1].xyzz 30: MUL TEMP[9].xyz, TEMP[9].xxxx, TEMP[0].xyzz 31: MUL TEMP[9].xyz, IMM[0].zzzz, TEMP[9].xyzz 32: ADD TEMP[9].xyz, TEMP[1].xyzz, -TEMP[9].xyzz 33: MOV TEMP[10].xyz, TEMP[9].xyzx 34: FSLT TEMP[11].x, IMM[0].yyyy, CONST[10].wwww 35: UIF TEMP[11].xxxx :0 36: DP3 TEMP[11].x, TEMP[9].xyzz, TEMP[9].xyzz 37: RSQ TEMP[11].x, TEMP[11].xxxx 38: MUL TEMP[11].xyz, TEMP[9].xyzz, TEMP[11].xxxx 39: MOV TEMP[12].xyz, -IN[4].xyzx 40: ADD TEMP[13].xyz, CONST[8].xyzz, TEMP[12].xyzz 41: RCP TEMP[14].x, TEMP[11].xxxx 42: RCP TEMP[14].y, TEMP[11].yyyy 43: RCP TEMP[14].z, TEMP[11].zzzz 44: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[14].xyzz 45: ADD TEMP[12].xyz, CONST[9].xyzz, TEMP[12].xyzz 46: RCP TEMP[14].x, TEMP[11].xxxx 47: RCP TEMP[14].y, TEMP[11].yyyy 48: RCP TEMP[14].z, TEMP[11].zzzz 49: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[14].xyzz 50: FSLT TEMP[14].xyz, IMM[0].yyyy, TEMP[11].xyzz 51: UIF TEMP[14].xxxx :0 52: MOV TEMP[15].x, TEMP[13].xxxx 53: ELSE :0 54: MOV TEMP[15].x, TEMP[12].xxxx 55: ENDIF 56: UIF TEMP[14].yyyy :0 57: MOV TEMP[16].x, TEMP[13].yyyy 58: ELSE :0 59: MOV TEMP[16].x, TEMP[12].yyyy 60: ENDIF 61: UIF TEMP[14].zzzz :0 62: MOV TEMP[13].x, TEMP[13].zzzz 63: ELSE :0 64: MOV TEMP[13].x, TEMP[12].zzzz 65: ENDIF 66: ADD TEMP[12].xyz, CONST[8].xyzz, CONST[9].xyzz 67: MUL TEMP[12].xyz, TEMP[12].xyzz, IMM[0].wwww 68: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[16].xxxx 69: MIN TEMP[13].x, TEMP[14].xxxx, TEMP[13].xxxx 70: ADD TEMP[14].xyz, TEMP[12].xyzz, -CONST[10].xyzz 71: ADD TEMP[14].xyz, TEMP[14].xyzz, IN[4].xyzz 72: MAD TEMP[11].xyz, TEMP[11].xyzz, TEMP[13].xxxx, TEMP[14].xyzz 73: ADD TEMP[10].xyz, TEMP[11].xyzz, -TEMP[12].xyzz 74: ENDIF 75: ADD TEMP[11].x, IMM[0].xxxx, -CONST[22].xxxx 76: POW TEMP[11].x, TEMP[11].xxxx, IMM[1].xxxx 77: MUL TEMP[11].x, TEMP[11].xxxx, IMM[1].yyyy 78: MOV TEMP[10].xyz, TEMP[10].xyzz 79: MOV TEMP[10].w, TEMP[11].xxxx 80: TXL TEMP[10], TEMP[10], SAMP[0], CUBE 81: POW TEMP[11].x, TEMP[10].wwww, CONST[11].yyyy 82: MUL TEMP[11].x, CONST[11].xxxx, TEMP[11].xxxx 83: MUL TEMP[10].xyz, TEMP[11].xxxx, TEMP[10].xyzz 84: FSLT TEMP[11].x, CONST[9].wwww, IMM[1].zzzz 85: UIF TEMP[11].xxxx :0 86: MOV TEMP[11].xyz, TEMP[9].xyzx 87: FSLT TEMP[12].x, IMM[0].yyyy, CONST[14].wwww 88: UIF TEMP[12].xxxx :0 89: DP3 TEMP[12].x, TEMP[9].xyzz, TEMP[9].xyzz 90: RSQ TEMP[12].x, TEMP[12].xxxx 91: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[12].xxxx 92: MOV TEMP[12].xyz, -IN[4].xyzx 93: ADD TEMP[13].xyz, CONST[12].xyzz, TEMP[12].xyzz 94: RCP TEMP[14].x, TEMP[9].xxxx 95: RCP TEMP[14].y, TEMP[9].yyyy 96: RCP TEMP[14].z, TEMP[9].zzzz 97: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[14].xyzz 98: ADD TEMP[12].xyz, CONST[13].xyzz, TEMP[12].xyzz 99: RCP TEMP[14].x, TEMP[9].xxxx 100: RCP TEMP[14].y, TEMP[9].yyyy 101: RCP TEMP[14].z, TEMP[9].zzzz 102: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[14].xyzz 103: FSLT TEMP[14].xyz, IMM[0].yyyy, TEMP[9].xyzz 104: UIF TEMP[14].xxxx :0 105: MOV TEMP[15].x, TEMP[13].xxxx 106: ELSE :0 107: MOV TEMP[15].x, TEMP[12].xxxx 108: ENDIF 109: UIF TEMP[14].yyyy :0 110: MOV TEMP[16].x, TEMP[13].yyyy 111: ELSE :0 112: MOV TEMP[16].x, TEMP[12].yyyy 113: ENDIF 114: UIF TEMP[14].zzzz :0 115: MOV TEMP[13].x, TEMP[13].zzzz 116: ELSE :0 117: MOV TEMP[13].x, TEMP[12].zzzz 118: ENDIF 119: ADD TEMP[12].xyz, CONST[12].xyzz, CONST[13].xyzz 120: MUL TEMP[12].xyz, TEMP[12].xyzz, IMM[0].wwww 121: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[16].xxxx 122: MIN TEMP[13].x, TEMP[14].xxxx, TEMP[13].xxxx 123: ADD TEMP[14].xyz, TEMP[12].xyzz, -CONST[14].xyzz 124: ADD TEMP[14].xyz, TEMP[14].xyzz, IN[4].xyzz 125: MAD TEMP[9].xyz, TEMP[9].xyzz, TEMP[13].xxxx, TEMP[14].xyzz 126: ADD TEMP[11].xyz, TEMP[9].xyzz, -TEMP[12].xyzz 127: ENDIF 128: ADD TEMP[9].x, IMM[0].xxxx, -CONST[22].xxxx 129: POW TEMP[9].x, TEMP[9].xxxx, IMM[1].xxxx 130: MUL TEMP[9].x, TEMP[9].xxxx, IMM[1].yyyy 131: MOV TEMP[11].xyz, TEMP[11].xyzz 132: MOV TEMP[11].w, TEMP[9].xxxx 133: TXL TEMP[9], TEMP[11], SAMP[1], CUBE 134: POW TEMP[11].x, TEMP[9].wwww, CONST[15].yyyy 135: MUL TEMP[11].x, CONST[15].xxxx, TEMP[11].xxxx 136: MUL TEMP[9].xyz, TEMP[11].xxxx, TEMP[9].xyzz 137: LRP TEMP[7].xyz, CONST[9].wwww, TEMP[10].xyzz, TEMP[9].xyzz 138: ELSE :0 139: MOV TEMP[7].xyz, TEMP[10].xyzx 140: ENDIF 141: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[5].xxxx 142: MOV TEMP[1].xyz, -TEMP[1].xyzx 143: ADD TEMP[5].x, IMM[0].xxxx, -CONST[22].xxxx 144: ADD TEMP[9].xyz, CONST[0].xyzz, TEMP[1].xyzz 145: DP3 TEMP[10].x, TEMP[9].xyzz, TEMP[9].xyzz 146: RSQ TEMP[10].x, TEMP[10].xxxx 147: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[10].xxxx 148: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[1].xyzz 149: MAX TEMP[1].x, IMM[0].yyyy, TEMP[1].xxxx 150: DP3 TEMP[10].x, CONST[0].xyzz, TEMP[9].xyzz 151: MAX TEMP[10].x, IMM[0].yyyy, TEMP[10].xxxx 152: MUL TEMP[11].x, TEMP[5].xxxx, TEMP[5].xxxx 153: MUL TEMP[11].x, TEMP[11].xxxx, CONST[18].wwww 154: ADD TEMP[12].x, IMM[0].xxxx, -TEMP[5].xxxx 155: MAD TEMP[12].x, TEMP[12].xxxx, IMM[2].xxxx, IMM[2].yyyy 156: LG2 TEMP[12].x, TEMP[12].xxxx 157: RCP TEMP[12].x, TEMP[12].xxxx 158: MUL TEMP[12].x, IMM[1].wwww, TEMP[12].xxxx 159: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[12].xxxx 160: ADD TEMP[13].x, IMM[0].xxxx, -TEMP[6].xxxx 161: ADD TEMP[14].x, IMM[0].xxxx, -TEMP[1].xxxx 162: MUL TEMP[15].x, IMM[0].zzzz, TEMP[10].xxxx 163: MUL TEMP[5].x, TEMP[10].xxxx, TEMP[5].xxxx 164: MAD TEMP[5].x, TEMP[15].xxxx, TEMP[5].xxxx, IMM[0].wwww 165: ADD TEMP[10].x, IMM[0].xxxx, -TEMP[10].xxxx 166: ADD TEMP[15].x, IMM[0].xxxx, -TEMP[1].xxxx 167: ADD TEMP[4].x, IMM[0].xxxx, -TEMP[4].xxxx 168: ADD TEMP[4].x, CONST[22].xxxx, TEMP[4].xxxx 169: MOV_SAT TEMP[4].x, TEMP[4].xxxx 170: MUL TEMP[16].x, TEMP[15].xxxx, TEMP[15].xxxx 171: MUL TEMP[17].x, TEMP[15].xxxx, TEMP[15].xxxx 172: MUL TEMP[15].x, TEMP[17].xxxx, TEMP[15].xxxx 173: MUL TEMP[15].x, TEMP[16].xxxx, TEMP[15].xxxx 174: LRP TEMP[4].xyz, TEMP[15].xxxx, TEMP[4].xxxx, TEMP[3].xyzz 175: LRP TEMP[15].x, TEMP[6].xxxx, IMM[0].xxxx, TEMP[11].xxxx 176: LRP TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx, TEMP[11].xxxx 177: MAD TEMP[1].x, TEMP[15].xxxx, TEMP[1].xxxx, IMM[2].zzzz 178: RCP TEMP[1].x, TEMP[1].xxxx 179: DP3 TEMP[9].x, TEMP[0].xyzz, TEMP[9].xyzz 180: MAX TEMP[9].x, IMM[0].yyyy, TEMP[9].xxxx 181: POW TEMP[9].x, TEMP[9].xxxx, TEMP[12].xxxx 182: ADD TEMP[11].x, TEMP[12].xxxx, IMM[0].xxxx 183: MUL TEMP[11].x, TEMP[11].xxxx, CONST[18].yyyy 184: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[11].xxxx 185: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[9].xxxx 186: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[6].xxxx 187: MUL TEMP[1].x, TEMP[1].xxxx, CONST[18].xxxx 188: MAX TEMP[1].x, IMM[0].yyyy, TEMP[1].xxxx 189: MUL TEMP[1].xyz, TEMP[1].xxxx, CONST[17].xyzz 190: ADD TEMP[9].xyz, IMM[0].xxxx, -TEMP[3].xyzz 191: MUL TEMP[11].x, TEMP[10].xxxx, TEMP[10].xxxx 192: MUL TEMP[12].x, TEMP[10].xxxx, TEMP[10].xxxx 193: MUL TEMP[10].x, TEMP[12].xxxx, TEMP[10].xxxx 194: MUL TEMP[10].x, TEMP[11].xxxx, TEMP[10].xxxx 195: MAD TEMP[3].xyz, TEMP[9].xyzz, TEMP[10].xxxx, TEMP[3].xyzz 196: ADD TEMP[9].x, TEMP[5].xxxx, IMM[2].wwww 197: MUL TEMP[10].x, TEMP[13].xxxx, TEMP[13].xxxx 198: MUL TEMP[11].x, TEMP[13].xxxx, TEMP[13].xxxx 199: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[13].xxxx 200: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[11].xxxx 201: MAD TEMP[9].x, TEMP[9].xxxx, TEMP[10].xxxx, IMM[0].xxxx 202: ADD TEMP[5].x, TEMP[5].xxxx, IMM[2].wwww 203: MUL TEMP[10].x, TEMP[14].xxxx, TEMP[14].xxxx 204: MUL TEMP[11].x, TEMP[14].xxxx, TEMP[14].xxxx 205: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[14].xxxx 206: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[11].xxxx 207: MAD TEMP[5].x, TEMP[5].xxxx, TEMP[10].xxxx, IMM[0].xxxx 208: MUL TEMP[5].x, TEMP[9].xxxx, TEMP[5].xxxx 209: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[6].xxxx 210: MAD TEMP[5].xyz, CONST[17].xyzz, TEMP[5].xxxx, TEMP[8].xyzz 211: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[5].xyzz 212: MAD TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xyzz, TEMP[2].xyzz 213: MAD TEMP[0].xyz, TEMP[7].xyzz, TEMP[4].xyzz, TEMP[1].xyzz 214: MOV TEMP[0].xyz, TEMP[0].xyzx 215: MAD TEMP[1].x, IN[3].xxxx, CONST[5].zzzz, CONST[5].wwww 216: MOV_SAT TEMP[1].x, TEMP[1].xxxx 217: LRP TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[4].xyzz 218: MOV TEMP[0].xyz, TEMP[0].xyzx 219: MOV TEMP[0].w, IMM[0].xxxx 220: MOV OUT[0], TEMP[0] 221: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 172) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200) %57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208) %58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212) %59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216) %60 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224) %61 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228) %62 = call float @llvm.SI.load.const(<16 x i8> %23, i32 232) %63 = call float @llvm.SI.load.const(<16 x i8> %23, i32 236) %64 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240) %65 = call float @llvm.SI.load.const(<16 x i8> %23, i32 244) %66 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256) %67 = call float @llvm.SI.load.const(<16 x i8> %23, i32 260) %68 = call float @llvm.SI.load.const(<16 x i8> %23, i32 264) %69 = call float @llvm.SI.load.const(<16 x i8> %23, i32 268) %70 = call float @llvm.SI.load.const(<16 x i8> %23, i32 272) %71 = call float @llvm.SI.load.const(<16 x i8> %23, i32 276) %72 = call float @llvm.SI.load.const(<16 x i8> %23, i32 280) %73 = call float @llvm.SI.load.const(<16 x i8> %23, i32 288) %74 = call float @llvm.SI.load.const(<16 x i8> %23, i32 292) %75 = call float @llvm.SI.load.const(<16 x i8> %23, i32 300) %76 = call float @llvm.SI.load.const(<16 x i8> %23, i32 304) %77 = call float @llvm.SI.load.const(<16 x i8> %23, i32 308) %78 = call float @llvm.SI.load.const(<16 x i8> %23, i32 312) %79 = call float @llvm.SI.load.const(<16 x i8> %23, i32 336) %80 = call float @llvm.SI.load.const(<16 x i8> %23, i32 352) %81 = call float @llvm.SI.load.const(<16 x i8> %23, i32 384) %82 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %83 = load <32 x i8>, <32 x i8> addrspace(2)* %82, align 32, !tbaa !0 %84 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %85 = load <16 x i8>, <16 x i8> addrspace(2)* %84, align 16, !tbaa !0 %86 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %87 = bitcast <8 x i32> addrspace(2)* %86 to <32 x i8> addrspace(2)* %88 = load <32 x i8>, <32 x i8> addrspace(2)* %87, align 32, !tbaa !0 %89 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %90 = bitcast <4 x i32> addrspace(2)* %89 to <16 x i8> addrspace(2)* %91 = load <16 x i8>, <16 x i8> addrspace(2)* %90, align 16, !tbaa !0 %92 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %93 = bitcast <8 x i32> addrspace(2)* %92 to <32 x i8> addrspace(2)* %94 = load <32 x i8>, <32 x i8> addrspace(2)* %93, align 32, !tbaa !0 %95 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %96 = bitcast <4 x i32> addrspace(2)* %95 to <16 x i8> addrspace(2)* %97 = load <16 x i8>, <16 x i8> addrspace(2)* %96, align 16, !tbaa !0 %98 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %99 = bitcast <8 x i32> addrspace(2)* %98 to <32 x i8> addrspace(2)* %100 = load <32 x i8>, <32 x i8> addrspace(2)* %99, align 32, !tbaa !0 %101 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %102 = bitcast <4 x i32> addrspace(2)* %101 to <16 x i8> addrspace(2)* %103 = load <16 x i8>, <16 x i8> addrspace(2)* %102, align 16, !tbaa !0 %104 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %105 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %106 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %107 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %108 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %109 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %110 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %111 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %112 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %113 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %114 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %115 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7) %116 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %117 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %118 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %119 = fmul float %106, %106 %120 = fmul float %107, %107 %121 = fadd float %120, %119 %122 = fmul float %108, %108 %123 = fadd float %121, %122 %124 = call float @llvm.AMDGPU.rsq.clamped.f32(float %123) %125 = fmul float %106, %124 %126 = fmul float %107, %124 %127 = fmul float %108, %124 %128 = fmul float %113, %113 %129 = fmul float %114, %114 %130 = fadd float %129, %128 %131 = fmul float %115, %115 %132 = fadd float %130, %131 %133 = call float @llvm.AMDGPU.rsq.clamped.f32(float %132) %134 = fmul float %113, %133 %135 = fmul float %114, %133 %136 = fmul float %115, %133 %137 = bitcast float %104 to i32 %138 = bitcast float %105 to i32 %139 = insertelement <2 x i32> undef, i32 %137, i32 0 %140 = insertelement <2 x i32> %139, i32 %138, i32 1 %141 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %140, <32 x i8> %94, <16 x i8> %97, i32 2) %142 = extractelement <4 x float> %141, i32 0 %143 = extractelement <4 x float> %141, i32 1 %144 = extractelement <4 x float> %141, i32 2 %145 = fmul float %76, %142 %146 = fmul float %77, %143 %147 = fmul float %78, %144 %148 = call float @llvm.AMDGPU.lrp(float %79, float %145, float %66) %149 = call float @llvm.AMDGPU.lrp(float %79, float %146, float %67) %150 = call float @llvm.AMDGPU.lrp(float %79, float %147, float %68) %151 = fmul float %79, %69 %152 = fsub float %69, %151 %153 = fmul float %145, %152 %154 = fmul float %146, %152 %155 = fmul float %147, %152 %156 = bitcast float %104 to i32 %157 = bitcast float %105 to i32 %158 = insertelement <2 x i32> undef, i32 %156, i32 0 %159 = insertelement <2 x i32> %158, i32 %157, i32 1 %160 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %159, <32 x i8> %100, <16 x i8> %103, i32 2) %161 = extractelement <4 x float> %160, i32 1 %162 = fsub float 1.000000e+00, %81 %163 = fmul float %161, %81 %164 = fadd float %163, %162 %165 = fmul float %125, %24 %166 = fmul float %126, %25 %167 = fadd float %166, %165 %168 = fmul float %127, %26 %169 = fadd float %167, %168 %170 = call float @llvm.maxnum.f32(float %169, float 0.000000e+00) %171 = fmul float %27, %125 %172 = fmul float %28, %126 %173 = fadd float %171, %172 %174 = fmul float %29, %127 %175 = fadd float %173, %174 %176 = fadd float %175, %30 %177 = fmul float %31, %125 %178 = fmul float %32, %126 %179 = fadd float %177, %178 %180 = fmul float %33, %127 %181 = fadd float %179, %180 %182 = fadd float %181, %34 %183 = fmul float %35, %125 %184 = fmul float %36, %126 %185 = fadd float %183, %184 %186 = fmul float %37, %127 %187 = fadd float %185, %186 %188 = fadd float %187, %38 %189 = fadd float %109, %176 %190 = fadd float %110, %182 %191 = fadd float %111, %188 %192 = fmul float %189, %164 %193 = fmul float %190, %164 %194 = fmul float %191, %164 %195 = fmul float %125, %134 %196 = fmul float %126, %135 %197 = fadd float %196, %195 %198 = fmul float %127, %136 %199 = fadd float %197, %198 %200 = fmul float %199, %125 %201 = fmul float %199, %126 %202 = fmul float %199, %127 %203 = fmul float %200, 2.000000e+00 %204 = fmul float %201, 2.000000e+00 %205 = fmul float %202, 2.000000e+00 %206 = fsub float %134, %203 %207 = fsub float %135, %204 %208 = fsub float %136, %205 %209 = fcmp ogt float %51, 0.000000e+00 br i1 %209, label %IF, label %ENDIF IF: ; preds = %main_body %210 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %211 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %212 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %213 = fmul float %206, %206 %214 = fmul float %207, %207 %215 = fadd float %214, %213 %216 = fmul float %208, %208 %217 = fadd float %215, %216 %218 = call float @llvm.AMDGPU.rsq.clamped.f32(float %217) %219 = fmul float %206, %218 %220 = fmul float %207, %218 %221 = fmul float %208, %218 %222 = fsub float %44, %116 %223 = fsub float %45, %117 %224 = fsub float %46, %118 %225 = fdiv float 1.000000e+00, %219 %226 = fdiv float 1.000000e+00, %220 %227 = fdiv float 1.000000e+00, %221 %228 = fmul float %222, %225 %229 = fmul float %223, %226 %230 = fmul float %224, %227 %231 = fsub float %47, %116 %232 = fsub float %48, %117 %233 = fsub float %49, %118 %234 = fdiv float 1.000000e+00, %219 %235 = fdiv float 1.000000e+00, %220 %236 = fdiv float 1.000000e+00, %221 %237 = fmul float %231, %234 %238 = fmul float %232, %235 %239 = fmul float %233, %236 %240 = fcmp ogt float %219, 0.000000e+00 %241 = fcmp ogt float %220, 0.000000e+00 %242 = fcmp ogt float %221, 0.000000e+00 %. = select i1 %240, float %228, float %237 %temp64.0 = select i1 %241, float %229, float %238 %.96 = select i1 %242, float %230, float %239 %243 = fadd float %44, %47 %244 = fadd float %45, %48 %245 = fadd float %46, %49 %246 = fmul float %243, 5.000000e-01 %247 = fmul float %244, 5.000000e-01 %248 = fmul float %245, 5.000000e-01 %249 = call float @llvm.minnum.f32(float %., float %temp64.0) %250 = call float @llvm.minnum.f32(float %249, float %.96) %251 = fsub float %246, %212 %252 = fsub float %247, %211 %253 = fsub float %248, %210 %254 = fadd float %251, %116 %255 = fadd float %252, %117 %256 = fadd float %253, %118 %257 = fmul float %219, %250 %258 = fadd float %257, %254 %259 = fmul float %220, %250 %260 = fadd float %259, %255 %261 = fmul float %221, %250 %262 = fadd float %261, %256 %263 = fsub float %258, %246 %264 = fsub float %260, %247 %265 = fsub float %262, %248 br label %ENDIF ENDIF: ; preds = %main_body, %IF %temp40.0 = phi float [ %263, %IF ], [ %206, %main_body ] %temp41.0 = phi float [ %264, %IF ], [ %207, %main_body ] %temp42.0 = phi float [ %265, %IF ], [ %208, %main_body ] %266 = fsub float 1.000000e+00, %80 %267 = call float @llvm.pow.f32(float %266, float 7.500000e-01) %268 = fmul float %267, 7.000000e+00 %269 = insertelement <4 x float> undef, float %temp40.0, i32 0 %270 = insertelement <4 x float> %269, float %temp41.0, i32 1 %271 = insertelement <4 x float> %270, float %temp42.0, i32 2 %272 = insertelement <4 x float> %271, float %268, i32 3 %273 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %272) %274 = extractelement <4 x float> %273, i32 0 %275 = extractelement <4 x float> %273, i32 1 %276 = extractelement <4 x float> %273, i32 2 %277 = extractelement <4 x float> %273, i32 3 %278 = call float @llvm.fabs.f32(float %276) %279 = fdiv float 1.000000e+00, %278 %280 = fmul float %274, %279 %281 = fadd float %280, 1.500000e+00 %282 = fmul float %275, %279 %283 = fadd float %282, 1.500000e+00 %284 = bitcast float %283 to i32 %285 = bitcast float %281 to i32 %286 = bitcast float %277 to i32 %287 = bitcast float %268 to i32 %288 = insertelement <4 x i32> undef, i32 %284, i32 0 %289 = insertelement <4 x i32> %288, i32 %285, i32 1 %290 = insertelement <4 x i32> %289, i32 %286, i32 2 %291 = insertelement <4 x i32> %290, i32 %287, i32 3 %292 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %291, <32 x i8> %83, <16 x i8> %85, i32 4) %293 = extractelement <4 x float> %292, i32 0 %294 = extractelement <4 x float> %292, i32 1 %295 = extractelement <4 x float> %292, i32 2 %296 = extractelement <4 x float> %292, i32 3 %297 = call float @llvm.pow.f32(float %296, float %53) %298 = fmul float %52, %297 %299 = fmul float %298, %293 %300 = fmul float %298, %294 %301 = fmul float %298, %295 %302 = fcmp olt float %50, 0x3FEFFFEB00000000 br i1 %302, label %IF82, label %ENDIF81 IF82: ; preds = %ENDIF %303 = fcmp ogt float %63, 0.000000e+00 br i1 %303, label %IF85, label %ENDIF84 ENDIF81: ; preds = %ENDIF, %ENDIF84 %temp28.0 = phi float [ %531, %ENDIF84 ], [ %299, %ENDIF ] %temp29.0 = phi float [ %532, %ENDIF84 ], [ %300, %ENDIF ] %temp30.0 = phi float [ %533, %ENDIF84 ], [ %301, %ENDIF ] %304 = fmul float %temp28.0, %164 %305 = fmul float %temp29.0, %164 %306 = fmul float %temp30.0, %164 %307 = fsub float 1.000000e+00, %80 %308 = fsub float %24, %134 %309 = fsub float %25, %135 %310 = fsub float %26, %136 %311 = fmul float %308, %308 %312 = fmul float %309, %309 %313 = fadd float %312, %311 %314 = fmul float %310, %310 %315 = fadd float %313, %314 %316 = call float @llvm.AMDGPU.rsq.clamped.f32(float %315) %317 = fmul float %308, %316 %318 = fmul float %309, %316 %319 = fmul float %310, %316 %320 = fmul float %134, %125 %321 = fsub float -0.000000e+00, %320 %322 = fmul float %135, %126 %323 = fsub float %321, %322 %324 = fmul float %136, %127 %325 = fsub float %323, %324 %326 = call float @llvm.maxnum.f32(float %325, float 0.000000e+00) %327 = fmul float %24, %317 %328 = fmul float %25, %318 %329 = fadd float %328, %327 %330 = fmul float %26, %319 %331 = fadd float %329, %330 %332 = call float @llvm.maxnum.f32(float %331, float 0.000000e+00) %333 = fmul float %307, %307 %334 = fmul float %333, %75 %335 = fsub float 1.000000e+00, %307 %336 = fmul float %335, 0x3FEEF9DB20000000 %337 = fadd float %336, 0x3F9EB851E0000000 %338 = call float @llvm.log2.f32(float %337) %339 = fdiv float 1.000000e+00, %338 %340 = fmul float %339, 1.000000e+01 %341 = fmul float %340, %340 %342 = fsub float 1.000000e+00, %170 %343 = fsub float 1.000000e+00, %326 %344 = fmul float %332, 2.000000e+00 %345 = fmul float %332, %307 %346 = fmul float %344, %345 %347 = fadd float %346, 5.000000e-01 %348 = fsub float 1.000000e+00, %332 %349 = fsub float 1.000000e+00, %326 %350 = fsub float 1.000000e+00, %152 %351 = fadd float %80, %350 %352 = call float @llvm.AMDIL.clamp.(float %351, float 0.000000e+00, float 1.000000e+00) %353 = fmul float %349, %349 %354 = fmul float %349, %349 %355 = fmul float %354, %349 %356 = fmul float %353, %355 %357 = call float @llvm.AMDGPU.lrp(float %356, float %352, float %148) %358 = call float @llvm.AMDGPU.lrp(float %356, float %352, float %149) %359 = call float @llvm.AMDGPU.lrp(float %356, float %352, float %150) %360 = call float @llvm.AMDGPU.lrp(float %170, float 1.000000e+00, float %334) %361 = call float @llvm.AMDGPU.lrp(float %326, float 1.000000e+00, float %334) %362 = fmul float %360, %361 %363 = fadd float %362, 0x3F1A36E2E0000000 %364 = fdiv float 1.000000e+00, %363 %365 = fmul float %125, %317 %366 = fmul float %126, %318 %367 = fadd float %366, %365 %368 = fmul float %127, %319 %369 = fadd float %367, %368 %370 = call float @llvm.maxnum.f32(float %369, float 0.000000e+00) %371 = call float @llvm.pow.f32(float %370, float %341) %372 = fadd float %341, 1.000000e+00 %373 = fmul float %372, %74 %374 = fmul float %371, %373 %375 = fmul float %364, %374 %376 = fmul float %375, %170 %377 = fmul float %376, %73 %378 = call float @llvm.maxnum.f32(float %377, float 0.000000e+00) %379 = fmul float %378, %70 %380 = fmul float %378, %71 %381 = fmul float %378, %72 %382 = fsub float 1.000000e+00, %148 %383 = fsub float 1.000000e+00, %149 %384 = fsub float 1.000000e+00, %150 %385 = fmul float %348, %348 %386 = fmul float %348, %348 %387 = fmul float %386, %348 %388 = fmul float %385, %387 %389 = fmul float %382, %388 %390 = fadd float %389, %148 %391 = fmul float %383, %388 %392 = fadd float %391, %149 %393 = fmul float %384, %388 %394 = fadd float %393, %150 %395 = fadd float %347, -1.000000e+00 %396 = fmul float %342, %342 %397 = fmul float %342, %342 %398 = fmul float %397, %342 %399 = fmul float %396, %398 %400 = fmul float %395, %399 %401 = fadd float %400, 1.000000e+00 %402 = fadd float %347, -1.000000e+00 %403 = fmul float %343, %343 %404 = fmul float %343, %343 %405 = fmul float %404, %343 %406 = fmul float %403, %405 %407 = fmul float %402, %406 %408 = fadd float %407, 1.000000e+00 %409 = fmul float %401, %408 %410 = fmul float %409, %170 %411 = fmul float %70, %410 %412 = fadd float %411, %192 %413 = fmul float %71, %410 %414 = fadd float %413, %193 %415 = fmul float %72, %410 %416 = fadd float %415, %194 %417 = fmul float %153, %412 %418 = fmul float %154, %414 %419 = fmul float %155, %416 %420 = fmul float %379, %390 %421 = fadd float %420, %417 %422 = fmul float %380, %392 %423 = fadd float %422, %418 %424 = fmul float %381, %394 %425 = fadd float %424, %419 %426 = fmul float %304, %357 %427 = fadd float %426, %421 %428 = fmul float %305, %358 %429 = fadd float %428, %423 %430 = fmul float %306, %359 %431 = fadd float %430, %425 %432 = fmul float %112, %42 %433 = fadd float %432, %43 %434 = call float @llvm.AMDIL.clamp.(float %433, float 0.000000e+00, float 1.000000e+00) %435 = call float @llvm.AMDGPU.lrp(float %434, float %427, float %39) %436 = call float @llvm.AMDGPU.lrp(float %434, float %429, float %40) %437 = call float @llvm.AMDGPU.lrp(float %434, float %431, float %41) %438 = call i32 @llvm.SI.packf16(float %435, float %436) %439 = bitcast i32 %438 to float %440 = call i32 @llvm.SI.packf16(float %437, float 1.000000e+00) %441 = bitcast i32 %440 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %439, float %441, float %439, float %441) ret void IF85: ; preds = %IF82 %442 = fmul float %206, %206 %443 = fmul float %207, %207 %444 = fadd float %443, %442 %445 = fmul float %208, %208 %446 = fadd float %444, %445 %447 = call float @llvm.AMDGPU.rsq.clamped.f32(float %446) %448 = fmul float %206, %447 %449 = fmul float %207, %447 %450 = fmul float %208, %447 %451 = fsub float %54, %116 %452 = fsub float %55, %117 %453 = fsub float %56, %118 %454 = fdiv float 1.000000e+00, %448 %455 = fdiv float 1.000000e+00, %449 %456 = fdiv float 1.000000e+00, %450 %457 = fmul float %451, %454 %458 = fmul float %452, %455 %459 = fmul float %453, %456 %460 = fsub float %57, %116 %461 = fsub float %58, %117 %462 = fsub float %59, %118 %463 = fdiv float 1.000000e+00, %448 %464 = fdiv float 1.000000e+00, %449 %465 = fdiv float 1.000000e+00, %450 %466 = fmul float %460, %463 %467 = fmul float %461, %464 %468 = fmul float %462, %465 %469 = fcmp ogt float %448, 0.000000e+00 %470 = fcmp ogt float %449, 0.000000e+00 %471 = fcmp ogt float %450, 0.000000e+00 %.97 = select i1 %469, float %457, float %466 %temp64.1 = select i1 %470, float %458, float %467 %.98 = select i1 %471, float %459, float %468 %472 = fadd float %54, %57 %473 = fadd float %55, %58 %474 = fadd float %56, %59 %475 = fmul float %472, 5.000000e-01 %476 = fmul float %473, 5.000000e-01 %477 = fmul float %474, 5.000000e-01 %478 = call float @llvm.minnum.f32(float %.97, float %temp64.1) %479 = call float @llvm.minnum.f32(float %478, float %.98) %480 = fsub float %475, %60 %481 = fsub float %476, %61 %482 = fsub float %477, %62 %483 = fadd float %480, %116 %484 = fadd float %481, %117 %485 = fadd float %482, %118 %486 = fmul float %448, %479 %487 = fadd float %486, %483 %488 = fmul float %449, %479 %489 = fadd float %488, %484 %490 = fmul float %450, %479 %491 = fadd float %490, %485 %492 = fsub float %487, %475 %493 = fsub float %489, %476 %494 = fsub float %491, %477 br label %ENDIF84 ENDIF84: ; preds = %IF82, %IF85 %temp44.0 = phi float [ %492, %IF85 ], [ %206, %IF82 ] %temp45.0 = phi float [ %493, %IF85 ], [ %207, %IF82 ] %temp46.0 = phi float [ %494, %IF85 ], [ %208, %IF82 ] %495 = fsub float 1.000000e+00, %80 %496 = call float @llvm.pow.f32(float %495, float 7.500000e-01) %497 = fmul float %496, 7.000000e+00 %498 = insertelement <4 x float> undef, float %temp44.0, i32 0 %499 = insertelement <4 x float> %498, float %temp45.0, i32 1 %500 = insertelement <4 x float> %499, float %temp46.0, i32 2 %501 = insertelement <4 x float> %500, float %497, i32 3 %502 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %501) %503 = extractelement <4 x float> %502, i32 0 %504 = extractelement <4 x float> %502, i32 1 %505 = extractelement <4 x float> %502, i32 2 %506 = extractelement <4 x float> %502, i32 3 %507 = call float @llvm.fabs.f32(float %505) %508 = fdiv float 1.000000e+00, %507 %509 = fmul float %503, %508 %510 = fadd float %509, 1.500000e+00 %511 = fmul float %504, %508 %512 = fadd float %511, 1.500000e+00 %513 = bitcast float %512 to i32 %514 = bitcast float %510 to i32 %515 = bitcast float %506 to i32 %516 = bitcast float %497 to i32 %517 = insertelement <4 x i32> undef, i32 %513, i32 0 %518 = insertelement <4 x i32> %517, i32 %514, i32 1 %519 = insertelement <4 x i32> %518, i32 %515, i32 2 %520 = insertelement <4 x i32> %519, i32 %516, i32 3 %521 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %520, <32 x i8> %88, <16 x i8> %91, i32 4) %522 = extractelement <4 x float> %521, i32 0 %523 = extractelement <4 x float> %521, i32 1 %524 = extractelement <4 x float> %521, i32 2 %525 = extractelement <4 x float> %521, i32 3 %526 = call float @llvm.pow.f32(float %525, float %65) %527 = fmul float %64, %526 %528 = fmul float %527, %522 %529 = fmul float %527, %523 %530 = fmul float %527, %524 %531 = call float @llvm.AMDGPU.lrp(float %50, float %299, float %528) %532 = call float @llvm.AMDGPU.lrp(float %50, float %300, float %529) %533 = call float @llvm.AMDGPU.lrp(float %50, float %301, float %530) br label %ENDIF81 } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v13, v0, 0, 0, [m0] ; C8340000 v_interp_p2_f32 v13, [v13], v1, 0, 0, [m0] ; C8350001 v_interp_p1_f32 v14, v0, 1, 0, [m0] ; C8380100 v_interp_p2_f32 v14, [v14], v1, 1, 0, [m0] ; C8390101 v_interp_p1_f32 v2, v0, 0, 1, [m0] ; C8080400 v_interp_p2_f32 v2, [v2], v1, 0, 1, [m0] ; C8090401 v_interp_p1_f32 v3, v0, 1, 1, [m0] ; C80C0500 v_interp_p2_f32 v3, [v3], v1, 1, 1, [m0] ; C80D0501 v_interp_p1_f32 v6, v0, 2, 1, [m0] ; C8180600 v_interp_p2_f32 v6, [v6], v1, 2, 1, [m0] ; C8190601 v_interp_p1_f32 v10, v0, 0, 2, [m0] ; C8280800 v_interp_p2_f32 v10, [v10], v1, 0, 2, [m0] ; C8290801 v_interp_p1_f32 v11, v0, 1, 2, [m0] ; C82C0900 v_interp_p2_f32 v11, [v11], v1, 1, 2, [m0] ; C82D0901 v_interp_p1_f32 v12, v0, 2, 2, [m0] ; C8300A00 v_interp_p2_f32 v12, [v12], v1, 2, 2, [m0] ; C8310A01 v_interp_p1_f32 v7, v0, 0, 3, [m0] ; C81C0C00 v_interp_p2_f32 v7, [v7], v1, 0, 3, [m0] ; C81D0C01 v_interp_p1_f32 v15, v0, 1, 3, [m0] ; C83C0D00 v_interp_p2_f32 v15, [v15], v1, 1, 3, [m0] ; C83D0D01 v_interp_p1_f32 v16, v0, 2, 3, [m0] ; C8400E00 v_interp_p2_f32 v16, [v16], v1, 2, 3, [m0] ; C8410E01 v_interp_p1_f32 v17, v0, 3, 3, [m0] ; C8440F00 v_interp_p2_f32 v17, [v17], v1, 3, 3, [m0] ; C8450F01 v_mul_f32_e32 v4, v2, v2 ; 10080502 v_mac_f32_e32 v4, v3, v3 ; 3E080703 v_mac_f32_e32 v4, v6, v6 ; 3E080D06 v_rsq_clamp_f32_e32 v8, v4 ; 7E105904 v_mul_f32_e32 v4, v15, v15 ; 10081F0F v_mac_f32_e32 v4, v16, v16 ; 3E082110 v_mac_f32_e32 v4, v17, v17 ; 3E082311 v_rsq_clamp_f32_e32 v18, v4 ; 7E245904 v_mul_f32_e32 v5, v8, v2 ; 100A0508 v_mul_f32_e32 v4, v8, v3 ; 10080708 v_mul_f32_e32 v3, v8, v6 ; 10060D08 v_mul_f32_e32 v9, v18, v15 ; 10121F12 v_mul_f32_e32 v8, v18, v16 ; 10102112 v_mul_f32_e32 v2, v9, v5 ; 10040B09 v_mac_f32_e32 v2, v8, v4 ; 3E040908 v_mul_f32_e32 v6, v18, v17 ; 100C2312 v_mac_f32_e32 v2, v6, v3 ; 3E040706 v_mul_f32_e32 v19, v5, v2 ; 10260505 v_mac_f32_e32 v19, v5, v2 ; 3E260505 v_mul_f32_e32 v21, v4, v2 ; 102A0504 v_mac_f32_e32 v21, v4, v2 ; 3E2A0504 v_mad_f32 v20, v15, v18, -v19 ; D2820014 844E250F v_mad_f32 v21, v16, v18, -v21 ; D2820015 84562510 v_mul_f32_e32 v15, v3, v2 ; 101E0503 v_mac_f32_e32 v15, v3, v2 ; 3E1E0503 s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300 v_mad_f32 v22, v17, v18, -v15 ; D2820016 843E2511 v_interp_p1_f32 v24, v0, 0, 4, [m0] ; C8601000 v_interp_p2_f32 v24, [v24], v1, 0, 4, [m0] ; C8611001 s_load_dwordx4 s[0:3], s[4:5], 0x8 ; C0800508 s_load_dwordx8 s[12:19], s[6:7], 0x10 ; C0C60710 v_interp_p1_f32 v23, v0, 1, 4, [m0] ; C85C1100 v_interp_p2_f32 v23, [v23], v1, 1, 4, [m0] ; C85D1101 v_interp_p1_f32 v25, v0, 2, 4, [m0] ; C8641200 v_interp_p2_f32 v25, [v25], v1, 2, 4, [m0] ; C8651201 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s20, s[8:11], 0x4c ; C20A094C s_buffer_load_dword s21, s[8:11], 0x4d ; C20A894D s_buffer_load_dword s22, s[8:11], 0x4e ; C20B094E s_load_dwordx4 s[24:27], s[4:5], 0xc ; C08C050C s_load_dwordx8 s[32:39], s[6:7], 0x18 ; C0D00718 image_sample v[0:2], 7, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[12:19], s[0:3] ; F0800700 0003000D s_buffer_load_dword s0, s[8:11], 0x41 ; C2000941 s_buffer_load_dword s1, s[8:11], 0x42 ; C2008942 s_buffer_load_dword s28, s[8:11], 0x54 ; C20E0954 s_buffer_load_dword s2, s[8:11], 0x40 ; C2010940 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v17, s20, v0 ; 10220014 v_mul_f32_e32 v18, s21, v1 ; 10240215 v_mul_f32_e32 v19, s22, v2 ; 10260416 s_buffer_load_dword s12, s[8:11], 0x27 ; C2060927 s_buffer_load_dword s13, s[8:11], 0x2b ; C206892B s_buffer_load_dword s29, s[8:11], 0x2c ; C20E892C s_buffer_load_dword s30, s[8:11], 0x2d ; C20F092D s_buffer_load_dword s3, s[8:11], 0x58 ; C2018958 v_sub_f32_e64 v0, 1.0, s28 ; D2080000 000038F2 v_mul_f32_e32 v2, s2, v0 ; 10040002 v_mul_f32_e32 v1, s0, v0 ; 10020000 v_mul_f32_e32 v0, s1, v0 ; 10000001 v_mac_f32_e32 v2, s28, v17 ; 3E04221C v_mov_b32_e32 v26, v20 ; 7E340314 v_mac_f32_e32 v1, s28, v18 ; 3E02241C v_mov_b32_e32 v27, v21 ; 7E360315 v_mac_f32_e32 v0, s28, v19 ; 3E00261C v_mov_b32_e32 v28, v22 ; 7E380316 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_lt_f32_e64 s[0:1], 0, s13 ; D0020000 00001A80 image_sample v[13:16], 15, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[32:39], s[24:27] ; F0800F00 00C80D0D s_waitcnt vmcnt(0) ; BF8C0770 s_and_saveexec_b64 s[20:21], s[0:1] ; BE942400 s_xor_b64 s[20:21], exec, s[20:21] ; 8994147E s_cbranch_execz BB0_2 ; BF880000 s_buffer_load_dword s0, s[8:11], 0x20 ; C2000920 s_buffer_load_dword s1, s[8:11], 0x21 ; C2008921 s_buffer_load_dword s2, s[8:11], 0x22 ; C2010922 s_buffer_load_dword s13, s[8:11], 0x24 ; C2068924 s_buffer_load_dword s14, s[8:11], 0x25 ; C2070925 v_mul_f32_e32 v13, v20, v20 ; 101A2914 v_mac_f32_e32 v13, v21, v21 ; 3E1A2B15 v_mac_f32_e32 v13, v22, v22 ; 3E1A2D16 v_rsq_clamp_f32_e32 v13, v13 ; 7E1A590D s_buffer_load_dword s15, s[8:11], 0x26 ; C2078926 s_buffer_load_dword s16, s[8:11], 0x28 ; C2080928 s_buffer_load_dword s17, s[8:11], 0x29 ; C2088929 s_buffer_load_dword s18, s[8:11], 0x2a ; C209092A v_mul_f32_e32 v15, v13, v20 ; 101E290D v_mul_f32_e32 v16, v13, v21 ; 10202B0D v_mul_f32_e32 v13, v13, v22 ; 101A2D0D v_rcp_f32_e32 v26, v15 ; 7E34550F s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v27, s0, v24 ; 08363000 v_sub_f32_e32 v28, s1, v23 ; 08382E01 v_rcp_f32_e32 v29, v16 ; 7E3A5510 v_mul_f32_e32 v27, v26, v27 ; 1036371A v_sub_f32_e32 v30, s13, v24 ; 083C300D v_mul_f32_e32 v26, v26, v30 ; 10343D1A v_cmp_lt_f32_e32 vcc, 0, v15 ; 7C021E80 v_cndmask_b32_e32 v26, v26, v27 ; 0034371A v_rcp_f32_e32 v27, v13 ; 7E36550D v_mul_f32_e32 v28, v29, v28 ; 1038391D v_sub_f32_e32 v30, s14, v23 ; 083C2E0E v_mul_f32_e32 v29, v29, v30 ; 103A3D1D v_cmp_lt_f32_e32 vcc, 0, v16 ; 7C022080 v_cndmask_b32_e32 v28, v29, v28 ; 0038391D v_sub_f32_e32 v29, s2, v25 ; 083A3202 v_mul_f32_e32 v29, v27, v29 ; 103A3B1B v_sub_f32_e32 v30, s15, v25 ; 083C320F v_mul_f32_e32 v27, v27, v30 ; 10363D1B v_cmp_lt_f32_e32 vcc, 0, v13 ; 7C021A80 v_cndmask_b32_e32 v27, v27, v29 ; 00363B1B v_min3_f32 v26, v26, v28, v27 ; D2A2001A 046E391A v_mov_b32_e32 v27, s13 ; 7E36020D v_add_f32_e32 v27, s0, v27 ; 06363600 v_mov_b32_e32 v28, s14 ; 7E38020E v_add_f32_e32 v28, s1, v28 ; 06383801 v_mov_b32_e32 v29, s15 ; 7E3A020F v_add_f32_e32 v29, s2, v29 ; 063A3A02 v_mad_f32 v30, 0.5, v27, -s16 ; D282001E 804236F0 v_add_f32_e32 v30, v24, v30 ; 063C3D18 v_mac_f32_e32 v30, v26, v15 ; 3E3C1F1A v_mad_f32 v15, 0.5, v28, -s17 ; D282000F 804638F0 v_add_f32_e32 v15, v23, v15 ; 061E1F17 v_mac_f32_e32 v15, v26, v16 ; 3E1E211A v_mad_f32 v16, 0.5, v29, -s18 ; D2820010 804A3AF0 v_add_f32_e32 v16, v25, v16 ; 06202119 v_mac_f32_e32 v16, v26, v13 ; 3E201B1A v_mad_f32 v26, 0.5, -v27, v30 ; D282001A 447A36F0 v_mad_f32 v27, 0.5, -v28, v15 ; D282001B 443E38F0 v_mad_f32 v28, 0.5, -v29, v16 ; D282001C 44423AF0 s_or_b64 exec, exec, s[20:21] ; 88FE147E s_buffer_load_dword s14, s[8:11], 0x17 ; C2070917 s_buffer_load_dword s15, s[8:11], 0x43 ; C2078943 s_buffer_load_dword s13, s[8:11], 0x60 ; C2068960 s_buffer_load_dword s0, s[8:11], 0x0 ; C2000900 s_buffer_load_dword s1, s[8:11], 0x1 ; C2008901 s_buffer_load_dword s2, s[8:11], 0x2 ; C2010902 s_buffer_load_dword s16, s[8:11], 0x4 ; C2080904 s_buffer_load_dword s17, s[8:11], 0x5 ; C2088905 s_buffer_load_dword s18, s[8:11], 0x6 ; C2090906 s_buffer_load_dword s20, s[8:11], 0x7 ; C20A0907 s_buffer_load_dword s19, s[8:11], 0x8 ; C2098908 s_buffer_load_dword s21, s[8:11], 0x9 ; C20A8909 s_buffer_load_dword s22, s[8:11], 0xa ; C20B090A s_buffer_load_dword s23, s[8:11], 0xb ; C20B890B s_buffer_load_dword s24, s[8:11], 0xc ; C20C090C s_buffer_load_dword s25, s[8:11], 0xd ; C20C890D s_buffer_load_dword s26, s[8:11], 0xe ; C20D090E s_buffer_load_dword s27, s[8:11], 0xf ; C20D890F v_sub_f32_e64 v13, 1.0, s3 ; D208000D 000006F2 v_log_f32_e32 v13, v13 ; 7E1A4F0D s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500 s_load_dwordx8 s[36:43], s[6:7], 0x0 ; C0D20700 v_mul_legacy_f32_e32 v13, 0x3f400000, v13 ; 0E1A1AFF 3F400000 v_exp_f32_e32 v13, v13 ; 7E1A4B0D v_mul_f32_e32 v29, 0x40e00000, v13 ; 103A1AFF 40E00000 v_cubeid_f32 v33, v26, v27, v28 ; D2880021 0472371A v_cubema_f32 v32, v26, v27, v28 ; D28E0020 0472371A v_cubesc_f32 v31, v26, v27, v28 ; D28A001F 0472371A v_cubetc_f32 v30, v26, v27, v28 ; D28C001E 0472371A v_mov_b32_e32 v26, 0x3fc00000 ; 7E3402FF 3FC00000 v_rcp_f32_e64 v13, |v32| ; D354010D 00000120 v_mad_f32 v27, v13, v30, v26 ; D282001B 046A3D0D v_mac_f32_e32 v26, v13, v31 ; 3E343F0D v_mov_b32_e32 v28, v33 ; 7E380321 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[26:29], 15, 0, 0, 0, 0, 0, 0, 0, v[26:29], s[36:43], s[32:35] ; F0900F00 01091A1A s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v13, v29 ; 7E1A4F1D v_mul_legacy_f32_e32 v13, s30, v13 ; 0E1A1A1E v_exp_f32_e32 v13, v13 ; 7E1A4B0D v_mul_f32_e32 v13, s29, v13 ; 101A1A1D v_mul_f32_e32 v16, v26, v13 ; 10201B1A v_mul_f32_e32 v15, v27, v13 ; 101E1B1B v_mul_f32_e32 v13, v28, v13 ; 101A1B1C v_mov_b32_e32 v26, s28 ; 7E34021C v_mov_b32_e32 v27, 0x3f7fff58 ; 7E3602FF 3F7FFF58 v_cmp_lt_f32_e32 vcc, s12, v27 ; 7C02360C s_and_saveexec_b64 s[28:29], vcc ; BE9C246A s_xor_b64 s[28:29], exec, s[28:29] ; 899C1C7E s_cbranch_execz BB0_6 ; BF880000 s_buffer_load_dword s32, s[8:11], 0x3b ; C210093B s_buffer_load_dword s30, s[8:11], 0x3c ; C20F093C s_buffer_load_dword s31, s[8:11], 0x3d ; C20F893D s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_lt_f32_e64 s[32:33], 0, s32 ; D0020020 00004080 s_and_saveexec_b64 s[32:33], s[32:33] ; BEA02420 s_xor_b64 s[32:33], exec, s[32:33] ; 89A0207E s_cbranch_execz BB0_7 ; BF880000 s_buffer_load_dword s34, s[8:11], 0x36 ; C2110936 s_buffer_load_dword s35, s[8:11], 0x38 ; C2118938 s_buffer_load_dword s36, s[8:11], 0x39 ; C2120939 s_buffer_load_dword s37, s[8:11], 0x3a ; C212893A s_buffer_load_dword s38, s[8:11], 0x30 ; C2130930 s_buffer_load_dword s39, s[8:11], 0x31 ; C2138931 s_buffer_load_dword s40, s[8:11], 0x32 ; C2140932 s_buffer_load_dword s41, s[8:11], 0x34 ; C2148934 s_buffer_load_dword s42, s[8:11], 0x35 ; C2150935 v_mul_f32_e32 v27, v20, v20 ; 10362914 v_mac_f32_e32 v27, v21, v21 ; 3E362B15 v_mac_f32_e32 v27, v22, v22 ; 3E362D16 v_rsq_clamp_f32_e32 v27, v27 ; 7E36591B s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v28, s34, v25 ; 08383222 v_mov_b32_e32 v29, s34 ; 7E3A0222 v_sub_f32_e32 v30, s38, v24 ; 083C3026 v_sub_f32_e32 v31, s39, v23 ; 083E2E27 v_add_f32_e32 v29, s40, v29 ; 063A3A28 v_sub_f32_e32 v32, s40, v25 ; 08403228 v_mad_f32 v33, 0.5, v29, -s37 ; D2820021 80963AF0 v_add_f32_e32 v25, v25, v33 ; 06324319 v_mul_f32_e32 v20, v27, v20 ; 1028291B v_mul_f32_e32 v21, v27, v21 ; 102A2B1B v_mul_f32_e32 v22, v27, v22 ; 102C2D1B v_rcp_f32_e32 v27, v20 ; 7E365514 v_rcp_f32_e32 v33, v21 ; 7E425515 v_rcp_f32_e32 v34, v22 ; 7E445516 v_sub_f32_e32 v35, s41, v24 ; 08463029 v_mov_b32_e32 v36, s41 ; 7E480229 v_add_f32_e32 v36, s38, v36 ; 06484826 v_mul_f32_e32 v30, v27, v30 ; 103C3D1B v_mul_f32_e32 v27, v27, v35 ; 1036471B v_mul_f32_e32 v31, v33, v31 ; 103E3F21 v_mul_f32_e32 v32, v34, v32 ; 10404122 v_mul_f32_e32 v28, v34, v28 ; 10383922 v_mad_f32 v34, 0.5, v36, -s35 ; D2820022 808E48F0 v_add_f32_e32 v24, v24, v34 ; 06304518 v_sub_f32_e32 v34, s42, v23 ; 08442E2A v_mov_b32_e32 v35, s42 ; 7E46022A v_mul_f32_e32 v33, v33, v34 ; 10424521 v_add_f32_e32 v34, s39, v35 ; 06444627 v_cmp_lt_f32_e32 vcc, 0, v20 ; 7C022880 v_cndmask_b32_e32 v27, v27, v30 ; 00363D1B v_cmp_lt_f32_e32 vcc, 0, v21 ; 7C022A80 v_cndmask_b32_e32 v30, v33, v31 ; 003C3F21 v_cmp_lt_f32_e32 vcc, 0, v22 ; 7C022C80 v_cndmask_b32_e32 v28, v28, v32 ; 0038411C v_min3_f32 v27, v27, v30, v28 ; D2A2001B 04723D1B v_mad_f32 v28, 0.5, v34, -s36 ; D282001C 809244F0 v_add_f32_e32 v23, v23, v28 ; 062E3917 v_mac_f32_e32 v24, v27, v20 ; 3E30291B v_mac_f32_e32 v23, v27, v21 ; 3E2E2B1B v_mac_f32_e32 v25, v27, v22 ; 3E322D1B v_mad_f32 v20, 0.5, -v36, v24 ; D2820014 446248F0 v_mad_f32 v21, 0.5, -v34, v23 ; D2820015 445E44F0 v_mad_f32 v22, 0.5, -v29, v25 ; D2820016 44663AF0 s_or_b64 exec, exec, s[32:33] ; 88FE207E v_sub_f32_e64 v23, 1.0, s3 ; D2080017 000006F2 v_log_f32_e32 v23, v23 ; 7E2E4F17 s_load_dwordx4 s[32:35], s[4:5], 0x4 ; C0900504 v_mul_legacy_f32_e32 v23, 0x3f400000, v23 ; 0E2E2EFF 3F400000 v_exp_f32_e32 v23, v23 ; 7E2E4B17 v_mul_f32_e32 v23, 0x40e00000, v23 ; 102E2EFF 40E00000 v_cubeid_f32 v30, v20, v21, v22 ; D288001E 045A2B14 v_cubema_f32 v29, v20, v21, v22 ; D28E001D 045A2B14 s_load_dwordx8 s[36:43], s[6:7], 0x8 ; C0D20708 v_cubesc_f32 v28, v20, v21, v22 ; D28A001C 045A2B14 v_cubetc_f32 v27, v20, v21, v22 ; D28C001B 045A2B14 v_rcp_f32_e64 v22, |v29| ; D3540116 0000011D v_mov_b32_e32 v20, 0x3fc00000 ; 7E2802FF 3FC00000 v_mad_f32 v21, v22, v27, v20 ; D2820015 04523716 v_mac_f32_e32 v20, v22, v28 ; 3E283916 v_mov_b32_e32 v22, v30 ; 7E2C031E s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[20:23], 15, 0, 0, 0, 0, 0, 0, 0, v[20:23], s[36:43], s[32:35] ; F0900F00 01091414 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v23, v23 ; 7E2E4F17 v_sub_f32_e64 v24, 1.0, s12 ; D2080018 000018F2 v_mul_legacy_f32_e32 v23, s31, v23 ; 0E2E2E1F v_exp_f32_e32 v23, v23 ; 7E2E4B17 v_mul_f32_e32 v23, s30, v23 ; 102E2E1E v_mul_f32_e32 v20, v20, v23 ; 10282F14 v_mul_f32_e32 v21, v21, v23 ; 102A2F15 v_mul_f32_e32 v22, v22, v23 ; 102C2F16 v_mul_f32_e32 v20, v20, v24 ; 10283114 v_mul_f32_e32 v21, v21, v24 ; 102A3115 v_mul_f32_e32 v22, v22, v24 ; 102C3116 v_mac_f32_e32 v20, s12, v16 ; 3E28200C v_mac_f32_e32 v21, s12, v15 ; 3E2A1E0C v_mac_f32_e32 v22, s12, v13 ; 3E2C1A0C v_mov_b32_e32 v13, v22 ; 7E1A0316 v_mov_b32_e32 v15, v21 ; 7E1E0315 v_mov_b32_e32 v16, v20 ; 7E200314 s_or_b64 exec, exec, s[28:29] ; 88FE1C7E v_mad_f32 v22, -v26, s15, s15 ; D2820016 203C1F1A v_mov_b32_e32 v20, s14 ; 7E28020E v_mul_f32_e32 v21, v22, v17 ; 102A2316 v_mul_f32_e32 v18, v22, v18 ; 10242516 v_mul_f32_e32 v17, v22, v19 ; 10222716 v_mul_f32_e32 v19, s17, v4 ; 10260811 v_mac_f32_e32 v19, s16, v5 ; 3E260A10 v_mac_f32_e32 v19, s18, v3 ; 3E260612 v_add_f32_e32 v19, s20, v19 ; 06262614 v_add_f32_e32 v23, v19, v10 ; 062E1513 v_mul_f32_e32 v10, s21, v4 ; 10140815 v_mac_f32_e32 v10, s19, v5 ; 3E140A13 v_mac_f32_e32 v10, s22, v3 ; 3E140616 v_add_f32_e32 v10, s23, v10 ; 06141417 v_add_f32_e32 v11, v10, v11 ; 0616170A v_mul_f32_e32 v10, s25, v4 ; 10140819 v_mac_f32_e32 v10, s24, v5 ; 3E140A18 v_mac_f32_e32 v10, s26, v3 ; 3E14061A v_add_f32_e32 v10, s27, v10 ; 0614141B v_add_f32_e32 v12, v10, v12 ; 0618190A s_buffer_load_dword s6, s[8:11], 0x10 ; C2030910 s_buffer_load_dword s5, s[8:11], 0x11 ; C2028911 s_buffer_load_dword s4, s[8:11], 0x12 ; C2020912 s_buffer_load_dword s17, s[8:11], 0x16 ; C2088916 s_buffer_load_dword s14, s[8:11], 0x44 ; C2070944 s_buffer_load_dword s7, s[8:11], 0x45 ; C2038945 s_buffer_load_dword s12, s[8:11], 0x46 ; C2060946 s_buffer_load_dword s15, s[8:11], 0x48 ; C2078948 s_buffer_load_dword s16, s[8:11], 0x49 ; C2080949 s_buffer_load_dword s8, s[8:11], 0x4b ; C204094B v_sub_f32_e64 v19, 1.0, s13 ; D2080013 00001AF2 v_mac_f32_e32 v19, s13, v14 ; 3E261C0D v_mul_f32_e32 v10, s0, v5 ; 10140A00 v_mac_f32_e32 v10, s1, v4 ; 3E140801 v_mac_f32_e32 v10, s2, v3 ; 3E140602 v_max_f32_e32 v10, 0, v10 ; 20141480 v_mul_f32_e32 v14, v19, v23 ; 101C2F13 v_mul_f32_e32 v11, v19, v11 ; 10161713 v_mul_f32_e32 v12, v19, v12 ; 10181913 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v20, s17, v7 ; 3E280E11 v_mul_f32_e32 v7, v19, v16 ; 100E2113 v_mul_f32_e32 v15, v19, v15 ; 101E1F13 v_mul_f32_e32 v13, v19, v13 ; 101A1B13 v_sub_f32_e32 v16, 1.0, v22 ; 08202CF2 v_add_f32_e32 v16, s3, v16 ; 06202003 v_sub_f32_e64 v19, 1.0, s3 ; D2080013 000006F2 v_add_f32_e64 v16, 0, v16 clamp ; D2060810 00022080 v_sub_f32_e32 v22, s0, v9 ; 082C1200 v_sub_f32_e32 v23, s1, v8 ; 082E1001 v_mul_f32_e32 v24, v22, v22 ; 10302D16 v_mac_f32_e32 v24, v23, v23 ; 3E302F17 v_sub_f32_e32 v25, s2, v6 ; 08320C02 v_mac_f32_e32 v24, v25, v25 ; 3E303319 v_rsq_clamp_f32_e32 v24, v24 ; 7E305918 v_mul_f32_e32 v22, v24, v22 ; 102C2D18 v_mul_f32_e32 v23, v24, v23 ; 102E2F18 v_mul_f32_e32 v24, v24, v25 ; 10303318 v_mul_f32_e32 v9, v9, v5 ; 10120B09 v_mad_f32 v8, -v8, v4, -v9 ; D2820008 A4260908 v_mul_f32_e32 v5, v22, v5 ; 100A0B16 v_mac_f32_e32 v5, v23, v4 ; 3E0A0917 v_mul_f32_e32 v4, s0, v22 ; 10082C00 v_mac_f32_e32 v4, s1, v23 ; 3E082E01 v_mad_f32 v6, -v6, v3, v8 ; D2820006 24220706 v_mac_f32_e32 v4, s2, v24 ; 3E083002 v_mac_f32_e32 v5, v24, v3 ; 3E0A0718 v_max_f32_e32 v3, 0, v4 ; 20060880 v_sub_f32_e32 v4, 1.0, v3 ; 080806F2 v_mul_f32_e32 v8, v4, v4 ; 10100904 v_mul_f32_e32 v4, v4, v8 ; 10081104 v_mul_f32_e32 v4, v4, v8 ; 10081104 v_max_f32_e32 v6, 0, v6 ; 200C0C80 v_sub_f32_e32 v8, 1.0, v6 ; 08100CF2 v_mul_f32_e32 v9, v8, v8 ; 10121108 v_mul_f32_e32 v22, v8, v9 ; 102C1308 v_mad_f32 v23, -v9, v22, 1.0 ; D2820017 23CA2D09 v_mul_f32_e32 v24, v2, v23 ; 10302F02 v_sub_f32_e32 v25, 1.0, v2 ; 083204F2 v_mac_f32_e32 v2, v4, v25 ; 3E043304 v_mul_f32_e32 v25, v1, v23 ; 10322F01 v_sub_f32_e32 v26, 1.0, v1 ; 083402F2 v_mac_f32_e32 v1, v4, v26 ; 3E023504 v_mul_f32_e32 v23, v0, v23 ; 102E2F00 v_sub_f32_e32 v26, 1.0, v0 ; 083400F2 v_mac_f32_e32 v0, v4, v26 ; 3E003504 v_sub_f32_e32 v4, 1.0, v19 ; 080826F2 v_mov_b32_e32 v26, 0x3cf5c28f ; 7E3402FF 3CF5C28F v_madmk_f32_e32 v4, v4, v26, 0x3f77ced9 ; 40083504 3F77CED9 v_add_f32_e32 v26, v3, v3 ; 06340703 v_mul_f32_e32 v3, v19, v3 ; 10060713 v_mad_f32 v3, v26, v3, 0.5 ; D2820003 03C2071A v_mul_f32_e32 v9, v22, v9 ; 10121316 v_mac_f32_e32 v24, v16, v9 ; 3E301310 v_mac_f32_e32 v25, v16, v9 ; 3E321310 v_mac_f32_e32 v23, v16, v9 ; 3E2E1310 v_mul_f32_e32 v16, v19, v19 ; 10202713 v_log_f32_e32 v4, v4 ; 7E084F04 v_mul_f32_e32 v16, s8, v16 ; 10202008 v_mul_f32_e32 v8, v16, v8 ; 10101110 v_mac_f32_e32 v8, 1.0, v6 ; 3E100CF2 v_rcp_f32_e32 v4, v4 ; 7E085504 v_sub_f32_e32 v6, 1.0, v10 ; 080C14F2 v_mul_f32_e32 v16, v16, v6 ; 10200D10 v_mac_f32_e32 v16, 1.0, v10 ; 3E2014F2 v_max_f32_e32 v5, 0, v5 ; 200A0A80 v_log_f32_e32 v5, v5 ; 7E0A4F05 v_madak_f32_e32 v8, v16, v8, 0x38d1b717 ; 42101110 38D1B717 v_mul_f32_e32 v4, 0x41200000, v4 ; 100808FF 41200000 v_mul_f32_e32 v16, v4, v4 ; 10200904 v_mul_legacy_f32_e32 v5, v16, v5 ; 0E0A0B10 v_rcp_f32_e32 v8, v8 ; 7E105508 v_mad_f32 v4, v4, v4, 1.0 ; D2820004 03CA0904 v_mul_f32_e32 v4, s16, v4 ; 10080810 v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_mul_f32_e32 v4, v4, v5 ; 10080B04 v_mul_f32_e32 v4, v4, v8 ; 10081104 v_mul_f32_e32 v4, v10, v4 ; 1008090A v_mul_f32_e32 v4, s15, v4 ; 1008080F v_mul_f32_e32 v5, v6, v6 ; 100A0D06 v_mul_f32_e32 v6, v6, v5 ; 100C0B06 v_mul_f32_e32 v5, v6, v5 ; 100A0B06 v_add_f32_e32 v3, -1.0, v3 ; 060606F3 v_mad_f32 v5, v3, v5, 1.0 ; D2820005 03CA0B03 v_mad_f32 v3, v3, v9, 1.0 ; D2820003 03CA1303 v_mul_f32_e32 v3, v3, v5 ; 10060B03 v_mul_f32_e32 v3, v10, v3 ; 1006070A v_mac_f32_e32 v14, s14, v3 ; 3E1C060E v_mul_f32_e32 v5, v14, v21 ; 100A2B0E v_max_f32_e32 v4, 0, v4 ; 20080880 v_mul_f32_e32 v6, s14, v4 ; 100C080E v_mac_f32_e32 v5, v2, v6 ; 3E0A0D02 v_mac_f32_e32 v11, s7, v3 ; 3E160607 v_mac_f32_e32 v12, s12, v3 ; 3E18060C v_mul_f32_e32 v2, s7, v4 ; 10040807 v_mul_f32_e32 v3, s12, v4 ; 1006080C v_mul_f32_e32 v4, v11, v18 ; 1008250B v_mul_f32_e32 v6, v12, v17 ; 100C230C v_mac_f32_e32 v4, v1, v2 ; 3E080501 v_mac_f32_e32 v6, v0, v3 ; 3E0C0700 v_mac_f32_e32 v5, v24, v7 ; 3E0A0F18 v_mac_f32_e32 v4, v25, v15 ; 3E081F19 v_mac_f32_e32 v6, v23, v13 ; 3E0C1B17 v_add_f32_e64 v0, 0, v20 clamp ; D2060800 00022880 v_sub_f32_e32 v1, 1.0, v0 ; 080200F2 v_mul_f32_e32 v2, s6, v1 ; 10040206 v_mac_f32_e32 v2, v5, v0 ; 3E040105 v_mul_f32_e32 v3, s5, v1 ; 10060205 v_mac_f32_e32 v3, v4, v0 ; 3E060104 v_mul_f32_e32 v1, s4, v1 ; 10020204 v_mac_f32_e32 v1, v6, v0 ; 3E020106 v_cvt_pkrtz_f16_f32_e32 v0, v2, v3 ; 5E000702 v_cvt_pkrtz_f16_f32_e64 v1, v1, 1.0 ; D25E0001 0001E501 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 48 VGPRS: 40 Code Size: 2112 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL OUT[5], GENERIC[4] DCL OUT[6], GENERIC[5] DCL CONST[0..20] DCL TEMP[0..8], LOCAL IMM[0] FLT32 { 0.0000, 0.5000, 0.0000, 0.0000} 0: MUL TEMP[0], CONST[6], IN[0].xxxx 1: MAD TEMP[0], CONST[7], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[8], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0].xyz, CONST[9], IN[0].wwww, TEMP[0] 4: MUL TEMP[1], CONST[17], IN[0].xxxx 5: MAD TEMP[1], CONST[18], IN[0].yyyy, TEMP[1] 6: MAD TEMP[1], CONST[19], IN[0].zzzz, TEMP[1] 7: MAD TEMP[1], CONST[20], IN[0].wwww, TEMP[1] 8: MAD TEMP[2].xy, IN[2].xyyy, CONST[14].xyyy, CONST[14].zwww 9: FSEQ TEMP[3].x, CONST[16].xxxx, IMM[0].xxxx 10: UIF TEMP[3].xxxx :0 11: MOV TEMP[3].xy, IN[2].xyxx 12: ELSE :0 13: MOV TEMP[3].xy, IN[3].xyxx 14: ENDIF 15: MAD TEMP[3].xy, TEMP[3].xyyy, CONST[15].xyyy, CONST[15].zwww 16: MOV TEMP[2].zw, TEMP[3].yyxy 17: MOV TEMP[3].x, CONST[10].xxxx 18: MOV TEMP[3].y, CONST[11].xxxx 19: MOV TEMP[3].z, CONST[12].xxxx 20: MOV TEMP[4].x, CONST[10].yyyy 21: MOV TEMP[4].y, CONST[11].yyyy 22: MOV TEMP[4].z, CONST[12].yyyy 23: MOV TEMP[5].x, CONST[10].zzzz 24: MOV TEMP[5].y, CONST[11].zzzz 25: MOV TEMP[5].z, CONST[12].zzzz 26: MUL TEMP[3].xyz, TEMP[3].xyzz, IN[1].xxxx 27: MAD TEMP[3].xyz, TEMP[4].xyzz, IN[1].yyyy, TEMP[3].xyzz 28: MAD TEMP[3].xyz, TEMP[5].xyzz, IN[1].zzzz, TEMP[3].xyzz 29: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz 30: RSQ TEMP[4].x, TEMP[4].xxxx 31: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx 32: MOV TEMP[4].xyz, TEMP[3].xyzx 33: MUL TEMP[5].xyw, TEMP[1], IMM[0].yyyy 34: MOV TEMP[6].x, TEMP[5].xxxx 35: MUL TEMP[7].x, TEMP[5].yyyy, CONST[1].xxxx 36: MOV TEMP[6].y, TEMP[7].xxxx 37: ADD TEMP[5].xy, TEMP[6].xyyy, TEMP[5].wwww 38: MOV TEMP[5].zw, TEMP[1].wwzw 39: MUL TEMP[6], TEMP[3].xyzz, TEMP[3].yzzx 40: DP4 TEMP[7].x, CONST[2], TEMP[6] 41: DP4 TEMP[8].x, CONST[3], TEMP[6] 42: MOV TEMP[7].y, TEMP[8].xxxx 43: DP4 TEMP[6].x, CONST[4], TEMP[6] 44: MOV TEMP[7].z, TEMP[6].xxxx 45: MUL TEMP[6].x, TEMP[3].yyyy, TEMP[3].yyyy 46: MAD TEMP[3].x, TEMP[3].xxxx, TEMP[3].xxxx, -TEMP[6].xxxx 47: MAD TEMP[3].xyz, CONST[5].xyzz, TEMP[3].xxxx, TEMP[7].xyzz 48: ADD TEMP[6].xyz, TEMP[0].xyzz, -CONST[0].xyzz 49: MOV TEMP[6].yzw, TEMP[6].yxyz 50: MOV TEMP[6].x, TEMP[1].zzzz 51: MOV TEMP[0].xyz, TEMP[0].xyzx 52: MOV OUT[6], TEMP[0] 53: MOV OUT[1], TEMP[2] 54: MOV OUT[2], TEMP[4] 55: MOV OUT[3], TEMP[3] 56: MOV OUT[4], TEMP[5] 57: MOV OUT[0], TEMP[1] 58: MOV OUT[5], TEMP[6] 59: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248) %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 252) %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256) %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272) %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276) %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280) %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284) %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288) %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292) %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296) %72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300) %73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304) %74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308) %75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312) %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316) %77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320) %78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 324) %79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 328) %80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 332) %81 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %82 = load <16 x i8>, <16 x i8> addrspace(2)* %81, align 16, !tbaa !0 %83 = add i32 %5, %7 %84 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %82, i32 0, i32 %83) %85 = extractelement <4 x float> %84, i32 0 %86 = extractelement <4 x float> %84, i32 1 %87 = extractelement <4 x float> %84, i32 2 %88 = extractelement <4 x float> %84, i32 3 %89 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %90 = load <16 x i8>, <16 x i8> addrspace(2)* %89, align 16, !tbaa !0 %91 = add i32 %5, %7 %92 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %90, i32 0, i32 %91) %93 = extractelement <4 x float> %92, i32 0 %94 = extractelement <4 x float> %92, i32 1 %95 = extractelement <4 x float> %92, i32 2 %96 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %97 = load <16 x i8>, <16 x i8> addrspace(2)* %96, align 16, !tbaa !0 %98 = add i32 %5, %7 %99 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %97, i32 0, i32 %98) %100 = extractelement <4 x float> %99, i32 0 %101 = extractelement <4 x float> %99, i32 1 %102 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %103 = load <16 x i8>, <16 x i8> addrspace(2)* %102, align 16, !tbaa !0 %104 = add i32 %5, %7 %105 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %103, i32 0, i32 %104) %106 = extractelement <4 x float> %105, i32 0 %107 = extractelement <4 x float> %105, i32 1 %108 = fmul float %32, %85 %109 = fmul float %33, %85 %110 = fmul float %34, %85 %111 = fmul float %35, %85 %112 = fmul float %36, %86 %113 = fadd float %112, %108 %114 = fmul float %37, %86 %115 = fadd float %114, %109 %116 = fmul float %38, %86 %117 = fadd float %116, %110 %118 = fmul float %39, %86 %119 = fadd float %118, %111 %120 = fmul float %40, %87 %121 = fadd float %120, %113 %122 = fmul float %41, %87 %123 = fadd float %122, %115 %124 = fmul float %42, %87 %125 = fadd float %124, %117 %126 = fmul float %43, %87 %127 = fadd float %126, %119 %128 = fmul float %44, %88 %129 = fadd float %128, %121 %130 = fmul float %45, %88 %131 = fadd float %130, %123 %132 = fmul float %46, %88 %133 = fadd float %132, %125 %134 = fmul float %65, %85 %135 = fmul float %66, %85 %136 = fmul float %67, %85 %137 = fmul float %68, %85 %138 = fmul float %69, %86 %139 = fadd float %138, %134 %140 = fmul float %70, %86 %141 = fadd float %140, %135 %142 = fmul float %71, %86 %143 = fadd float %142, %136 %144 = fmul float %72, %86 %145 = fadd float %144, %137 %146 = fmul float %73, %87 %147 = fadd float %146, %139 %148 = fmul float %74, %87 %149 = fadd float %148, %141 %150 = fmul float %75, %87 %151 = fadd float %150, %143 %152 = fmul float %76, %87 %153 = fadd float %152, %145 %154 = fmul float %77, %88 %155 = fadd float %154, %147 %156 = fmul float %78, %88 %157 = fadd float %156, %149 %158 = fmul float %79, %88 %159 = fadd float %158, %151 %160 = fmul float %80, %88 %161 = fadd float %160, %153 %162 = fmul float %100, %56 %163 = fadd float %162, %58 %164 = fmul float %101, %57 %165 = fadd float %164, %59 %166 = fcmp oeq float %64, 0.000000e+00 %. = select i1 %166, float %100, float %106 %.36 = select i1 %166, float %101, float %107 %167 = fmul float %., %60 %168 = fadd float %167, %62 %169 = fmul float %.36, %61 %170 = fadd float %169, %63 %171 = fmul float %47, %93 %172 = fmul float %50, %93 %173 = fmul float %53, %93 %174 = fmul float %48, %94 %175 = fadd float %174, %171 %176 = fmul float %51, %94 %177 = fadd float %176, %172 %178 = fmul float %54, %94 %179 = fadd float %178, %173 %180 = fmul float %49, %95 %181 = fadd float %180, %175 %182 = fmul float %52, %95 %183 = fadd float %182, %177 %184 = fmul float %55, %95 %185 = fadd float %184, %179 %186 = fmul float %181, %181 %187 = fmul float %183, %183 %188 = fadd float %187, %186 %189 = fmul float %185, %185 %190 = fadd float %188, %189 %191 = call float @llvm.AMDGPU.rsq.clamped.f32(float %190) %192 = fmul float %181, %191 %193 = fmul float %183, %191 %194 = fmul float %185, %191 %195 = fmul float %155, 5.000000e-01 %196 = fmul float %157, 5.000000e-01 %197 = fmul float %161, 5.000000e-01 %198 = fmul float %196, %16 %199 = fadd float %195, %197 %200 = fadd float %198, %197 %201 = fmul float %192, %193 %202 = fmul float %193, %194 %203 = fmul float %194, %194 %204 = fmul float %194, %192 %205 = fmul float %17, %201 %206 = fmul float %18, %202 %207 = fadd float %205, %206 %208 = fmul float %19, %203 %209 = fadd float %207, %208 %210 = fmul float %20, %204 %211 = fadd float %209, %210 %212 = fmul float %21, %201 %213 = fmul float %22, %202 %214 = fadd float %212, %213 %215 = fmul float %23, %203 %216 = fadd float %214, %215 %217 = fmul float %24, %204 %218 = fadd float %216, %217 %219 = fmul float %25, %201 %220 = fmul float %26, %202 %221 = fadd float %219, %220 %222 = fmul float %27, %203 %223 = fadd float %221, %222 %224 = fmul float %28, %204 %225 = fadd float %223, %224 %226 = fmul float %193, %193 %227 = fmul float %192, %192 %228 = fsub float %227, %226 %229 = fmul float %29, %228 %230 = fadd float %229, %211 %231 = fmul float %30, %228 %232 = fadd float %231, %218 %233 = fmul float %31, %228 %234 = fadd float %233, %225 %235 = fsub float %129, %13 %236 = fsub float %131, %14 %237 = fsub float %133, %15 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %163, float %165, float %168, float %170) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %192, float %193, float %194, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %230, float %232, float %234, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %199, float %200, float %159, float %161) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %159, float %235, float %236, float %237) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %129, float %131, float %133, float %127) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %155, float %157, float %159, float %161) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0 ; 7E020280 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[20:23], s[2:3], 0x0 ; C08A0300 s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 s_load_dwordx4 s[12:15], s[8:9], 0x8 ; C0860908 s_load_dwordx4 s[8:11], s[8:9], 0xc ; C084090C s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s19, s[20:23], 0x23 ; C2099523 buffer_load_format_xyzw v[2:5], v0, s[0:3], 0 idxen ; E00C2000 80000200 buffer_load_format_xyzw v[6:9], v0, s[4:7], 0 idxen ; E00C2000 80010600 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[9:12], v0, s[12:15], 0 idxen ; E00C2000 80030900 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[11:14], v0, s[8:11], 0 idxen ; E00C2000 80020B00 s_buffer_load_dword s24, s[20:23], 0x24 ; C20C1524 s_buffer_load_dword s25, s[20:23], 0x25 ; C20C9525 s_buffer_load_dword s26, s[20:23], 0x26 ; C20D1526 s_buffer_load_dword s27, s[20:23], 0x28 ; C20D9528 s_buffer_load_dword s28, s[20:23], 0x29 ; C20E1529 s_buffer_load_dword s29, s[20:23], 0x2a ; C20E952A s_buffer_load_dword s30, s[20:23], 0x2c ; C20F152C s_buffer_load_dword s31, s[20:23], 0x2d ; C20F952D s_buffer_load_dword s32, s[20:23], 0x2e ; C210152E s_buffer_load_dword s33, s[20:23], 0x30 ; C2109530 s_buffer_load_dword s34, s[20:23], 0x31 ; C2111531 s_buffer_load_dword s35, s[20:23], 0x32 ; C2119532 s_buffer_load_dword s36, s[20:23], 0x38 ; C2121538 s_buffer_load_dword s37, s[20:23], 0x39 ; C2129539 s_buffer_load_dword s11, s[20:23], 0x9 ; C2059509 s_buffer_load_dword s5, s[20:23], 0xa ; C202950A s_buffer_load_dword s3, s[20:23], 0xb ; C201950B s_buffer_load_dword s9, s[20:23], 0xc ; C204950C s_buffer_load_dword s12, s[20:23], 0xd ; C206150D s_buffer_load_dword s7, s[20:23], 0xe ; C203950E s_buffer_load_dword s4, s[20:23], 0xf ; C202150F s_buffer_load_dword s10, s[20:23], 0x10 ; C2051510 s_buffer_load_dword s13, s[20:23], 0x11 ; C2069511 s_buffer_load_dword s8, s[20:23], 0x12 ; C2041512 s_buffer_load_dword s0, s[20:23], 0x3f ; C200153F s_buffer_load_dword s1, s[20:23], 0x40 ; C2009540 s_buffer_load_dword s38, s[20:23], 0x44 ; C2131544 s_buffer_load_dword s39, s[20:23], 0x45 ; C2139545 s_buffer_load_dword s40, s[20:23], 0x46 ; C2141546 s_buffer_load_dword s41, s[20:23], 0x47 ; C2149547 s_buffer_load_dword s42, s[20:23], 0x48 ; C2151548 s_buffer_load_dword s43, s[20:23], 0x49 ; C2159549 s_buffer_load_dword s44, s[20:23], 0x4a ; C216154A s_buffer_load_dword s45, s[20:23], 0x4b ; C216954B s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v0, s0 ; 7E000200 s_buffer_load_dword s0, s[20:23], 0x0 ; C2001500 v_cmp_eq_f32_e64 vcc, 0, s1 ; D004006A 00000280 s_buffer_load_dword s1, s[20:23], 0x1 ; C2009501 s_buffer_load_dword s2, s[20:23], 0x2 ; C2011502 s_buffer_load_dword s6, s[20:23], 0x4 ; C2031504 s_buffer_load_dword s14, s[20:23], 0x8 ; C2071508 s_buffer_load_dword s46, s[20:23], 0x3a ; C217153A s_buffer_load_dword s47, s[20:23], 0x3b ; C217953B s_buffer_load_dword s48, s[20:23], 0x3c ; C218153C s_buffer_load_dword s49, s[20:23], 0x3d ; C218953D s_buffer_load_dword s50, s[20:23], 0x3e ; C219153E s_buffer_load_dword s18, s[20:23], 0x13 ; C2091513 s_buffer_load_dword s15, s[20:23], 0x14 ; C2079514 s_buffer_load_dword s16, s[20:23], 0x15 ; C2081515 s_buffer_load_dword s17, s[20:23], 0x16 ; C2089516 s_buffer_load_dword s51, s[20:23], 0x18 ; C2199518 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v13, s46 ; 7E1A022E s_buffer_load_dword s46, s[20:23], 0x19 ; C2171519 s_buffer_load_dword s52, s[20:23], 0x1a ; C21A151A s_buffer_load_dword s53, s[20:23], 0x1b ; C21A951B s_buffer_load_dword s54, s[20:23], 0x1c ; C21B151C s_buffer_load_dword s55, s[20:23], 0x1d ; C21B951D s_buffer_load_dword s56, s[20:23], 0x1e ; C21C151E s_buffer_load_dword s57, s[20:23], 0x1f ; C21C951F s_buffer_load_dword s58, s[20:23], 0x20 ; C21D1520 s_buffer_load_dword s59, s[20:23], 0x21 ; C21D9521 s_buffer_load_dword s60, s[20:23], 0x22 ; C21E1522 s_buffer_load_dword s61, s[20:23], 0x4c ; C21E954C s_buffer_load_dword s62, s[20:23], 0x4d ; C21F154D s_buffer_load_dword s63, s[20:23], 0x4e ; C21F954E s_buffer_load_dword s64, s[20:23], 0x4f ; C220154F s_buffer_load_dword s65, s[20:23], 0x50 ; C2209550 s_buffer_load_dword s66, s[20:23], 0x51 ; C2211551 s_buffer_load_dword s67, s[20:23], 0x52 ; C2219552 s_buffer_load_dword s20, s[20:23], 0x53 ; C20A1553 v_mac_f32_e32 v13, s36, v9 ; 3E1A1224 v_mov_b32_e32 v14, s47 ; 7E1C022F v_mul_f32_e32 v15, s51, v2 ; 101E0433 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v16, s46, v2 ; 1020042E v_mul_f32_e32 v17, s27, v6 ; 10220C1B v_mul_f32_e32 v18, s30, v6 ; 10240C1E v_mul_f32_e32 v6, s33, v6 ; 100C0C21 v_mac_f32_e32 v17, s28, v7 ; 3E220E1C v_mac_f32_e32 v18, s31, v7 ; 3E240E1F v_mac_f32_e32 v6, s34, v7 ; 3E0C0E22 v_mac_f32_e32 v17, s29, v8 ; 3E22101D v_mac_f32_e32 v18, s32, v8 ; 3E241020 v_mac_f32_e32 v6, s35, v8 ; 3E0C1023 v_mul_f32_e32 v7, s52, v2 ; 100E0434 v_mul_f32_e32 v8, s53, v2 ; 10100435 v_mul_f32_e32 v19, s38, v2 ; 10260426 v_mac_f32_e32 v14, s37, v10 ; 3E1C1425 v_cndmask_b32_e32 v9, v11, v9 ; 0012130B v_cndmask_b32_e32 v10, v12, v10 ; 0014150C v_mac_f32_e32 v15, s54, v3 ; 3E1E0636 v_mac_f32_e32 v16, s55, v3 ; 3E200637 v_mac_f32_e32 v7, s56, v3 ; 3E0E0638 v_mac_f32_e32 v8, s57, v3 ; 3E100639 v_mac_f32_e32 v19, s42, v3 ; 3E26062A v_mul_f32_e32 v11, s39, v2 ; 10160427 v_mac_f32_e32 v11, s43, v3 ; 3E16062B v_mul_f32_e32 v12, s40, v2 ; 10180428 v_mac_f32_e32 v12, s44, v3 ; 3E18062C v_mul_f32_e32 v2, s41, v2 ; 10040429 v_mac_f32_e32 v2, s45, v3 ; 3E04062D v_mac_f32_e32 v15, s58, v4 ; 3E1E083A v_mac_f32_e32 v16, s59, v4 ; 3E20083B v_mac_f32_e32 v7, s60, v4 ; 3E0E083C v_mac_f32_e32 v8, s19, v4 ; 3E100813 v_mac_f32_e32 v19, s61, v4 ; 3E26083D v_mac_f32_e32 v11, s62, v4 ; 3E16083E v_mac_f32_e32 v12, s63, v4 ; 3E18083F v_mac_f32_e32 v2, s64, v4 ; 3E040840 v_mac_f32_e32 v15, s24, v5 ; 3E1E0A18 v_mac_f32_e32 v16, s25, v5 ; 3E200A19 v_mac_f32_e32 v7, s26, v5 ; 3E0E0A1A v_mac_f32_e32 v19, s65, v5 ; 3E260A41 v_mac_f32_e32 v11, s66, v5 ; 3E160A42 v_mac_f32_e32 v12, s67, v5 ; 3E180A43 v_mac_f32_e32 v2, s20, v5 ; 3E040A14 v_mov_b32_e32 v3, s50 ; 7E060232 v_mul_f32_e32 v4, v17, v17 ; 10082311 v_mac_f32_e32 v4, v18, v18 ; 3E082512 v_mac_f32_e32 v4, v6, v6 ; 3E080D06 v_rsq_clamp_f32_e32 v4, v4 ; 7E085904 v_mac_f32_e32 v3, s48, v9 ; 3E061230 v_mac_f32_e32 v0, s49, v10 ; 3E001431 exp 15, 32, 0, 0, 0, v13, v14, v3, v0 ; F800020F 00030E0D s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, v4, v17 ; 10002304 v_mul_f32_e32 v3, v4, v18 ; 10062504 v_mul_f32_e32 v4, v4, v6 ; 10080D04 v_mul_f32_e32 v5, v4, v3 ; 100A0704 v_mul_f32_e32 v6, s11, v5 ; 100C0A0B v_mul_f32_e32 v9, s12, v5 ; 10120A0C v_mul_f32_e32 v5, s13, v5 ; 100A0A0D v_mul_f32_e32 v10, v3, v0 ; 10140103 v_mac_f32_e32 v6, s14, v10 ; 3E0C140E v_mac_f32_e32 v9, s9, v10 ; 3E121409 v_mac_f32_e32 v5, s10, v10 ; 3E0A140A v_mul_f32_e32 v10, v4, v4 ; 10140904 v_mac_f32_e32 v6, s5, v10 ; 3E0C1405 v_mac_f32_e32 v9, s7, v10 ; 3E121407 v_mac_f32_e32 v5, s8, v10 ; 3E0A1408 v_mul_f32_e32 v10, v0, v4 ; 10140900 v_mac_f32_e32 v6, s3, v10 ; 3E0C1403 v_mac_f32_e32 v9, s4, v10 ; 3E121404 v_mac_f32_e32 v5, s18, v10 ; 3E0A1412 exp 15, 33, 0, 0, 0, v0, v3, v4, v1 ; F800021F 01040300 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v3, v3, v3 ; 10060703 v_mad_f32 v0, v0, v0, -v3 ; D2820000 840E0100 v_mac_f32_e32 v6, s15, v0 ; 3E0C000F v_mac_f32_e32 v9, s16, v0 ; 3E120010 v_mac_f32_e32 v5, s17, v0 ; 3E0A0011 v_mul_f32_e32 v0, 0.5, v11 ; 100016F0 v_mul_f32_e32 v3, 0.5, v2 ; 100604F0 exp 15, 34, 0, 0, 0, v6, v9, v5, v1 ; F800022F 01050906 s_waitcnt expcnt(0) ; BF8C070F v_mad_f32 v1, 0.5, v19, v3 ; D2820001 040E26F0 v_mac_f32_e32 v3, s6, v0 ; 3E060006 exp 15, 35, 0, 0, 0, v1, v3, v12, v2 ; F800023F 020C0301 v_subrev_f32_e32 v0, s0, v15 ; 0A001E00 s_waitcnt expcnt(0) ; BF8C070F v_subrev_f32_e32 v1, s1, v16 ; 0A022001 v_subrev_f32_e32 v3, s2, v7 ; 0A060E02 exp 15, 36, 0, 0, 0, v12, v0, v1, v3 ; F800024F 0301000C exp 15, 37, 0, 0, 0, v15, v16, v7, v8 ; F800025F 0807100F exp 15, 12, 0, 1, 0, v19, v11, v12, v2 ; F80008CF 020C0B13 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 72 VGPRS: 20 Code Size: 788 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL IN[4], GENERIC[4], PERSPECTIVE DCL IN[5], GENERIC[5], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SVIEW[0], CUBE, FLOAT DCL SVIEW[1], CUBE, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL SVIEW[4], 2D, FLOAT DCL SVIEW[5], 2D, FLOAT DCL CONST[0..5] DCL CONST[8..19] DCL CONST[21..22] DCL CONST[24..25] DCL TEMP[0..19], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 2.0000, 0.5000} IMM[1] FLT32 { 0.7500, 7.0000, 1.0000, 10.0000} IMM[2] FLT32 { 0.9680, 0.0300, 0.0001, -1.0000} 0: DP3 TEMP[0].x, IN[1].xyzz, IN[1].xyzz 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MUL TEMP[0].xyz, IN[1].xyzz, TEMP[0].xxxx 3: DP3 TEMP[1].x, IN[4].yzww, IN[4].yzww 4: RSQ TEMP[1].x, TEMP[1].xxxx 5: MUL TEMP[1].xyz, IN[4].yzww, TEMP[1].xxxx 6: MOV TEMP[2].xy, IN[0].xyyy 7: TEX TEMP[2].xyz, TEMP[2], SAMP[2], 2D 8: MUL TEMP[2].xyz, CONST[19].xyzz, TEMP[2].xyzz 9: LRP TEMP[3].xyz, CONST[21].xxxx, TEMP[2].xyzz, CONST[16].xyzz 10: MUL TEMP[4].x, CONST[21].xxxx, CONST[16].wwww 11: ADD TEMP[4].x, CONST[16].wwww, -TEMP[4].xxxx 12: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[4].xxxx 13: MOV TEMP[5].xy, IN[0].xyyy 14: TEX TEMP[5].y, TEMP[5], SAMP[3], 2D 15: ADD TEMP[6].x, IMM[0].xxxx, -CONST[24].xxxx 16: MAD TEMP[5].x, TEMP[5].yyyy, CONST[24].xxxx, TEMP[6].xxxx 17: DP3 TEMP[6].x, TEMP[0].xyzz, CONST[0].xyzz 18: MAX TEMP[6].x, IMM[0].yyyy, TEMP[6].xxxx 19: MOV TEMP[7].xyz, IMM[0].yyyy 20: MOV TEMP[8].w, IMM[0].xxxx 21: MOV TEMP[8].xyz, TEMP[0].xyzx 22: DP4 TEMP[9].x, CONST[1], TEMP[8] 23: DP4 TEMP[10].x, CONST[2], TEMP[8] 24: MOV TEMP[9].y, TEMP[10].xxxx 25: DP4 TEMP[8].x, CONST[3], TEMP[8] 26: MOV TEMP[9].z, TEMP[8].xxxx 27: ADD TEMP[8].xyz, IN[2].xyzz, TEMP[9].xyzz 28: MOV TEMP[9].xy, IN[3].xyyy 29: MOV TEMP[9].w, IN[3].wwww 30: TXP TEMP[9].x, TEMP[9], SAMP[5], 2D 31: MUL TEMP[9].xyz, CONST[17].xyzz, TEMP[9].xxxx 32: MUL TEMP[8].xyz, TEMP[8].xyzz, TEMP[5].xxxx 33: DP3 TEMP[10].x, TEMP[0].xyzz, TEMP[1].xyzz 34: MUL TEMP[10].xyz, TEMP[10].xxxx, TEMP[0].xyzz 35: MUL TEMP[10].xyz, IMM[0].zzzz, TEMP[10].xyzz 36: ADD TEMP[10].xyz, TEMP[1].xyzz, -TEMP[10].xyzz 37: MOV TEMP[11].xyz, TEMP[10].xyzx 38: FSLT TEMP[12].x, IMM[0].yyyy, CONST[10].wwww 39: UIF TEMP[12].xxxx :0 40: DP3 TEMP[12].x, TEMP[10].xyzz, TEMP[10].xyzz 41: RSQ TEMP[12].x, TEMP[12].xxxx 42: MUL TEMP[12].xyz, TEMP[10].xyzz, TEMP[12].xxxx 43: MOV TEMP[13].xyz, -IN[5].xyzx 44: ADD TEMP[14].xyz, CONST[8].xyzz, TEMP[13].xyzz 45: RCP TEMP[15].x, TEMP[12].xxxx 46: RCP TEMP[15].y, TEMP[12].yyyy 47: RCP TEMP[15].z, TEMP[12].zzzz 48: MUL TEMP[14].xyz, TEMP[14].xyzz, TEMP[15].xyzz 49: ADD TEMP[13].xyz, CONST[9].xyzz, TEMP[13].xyzz 50: RCP TEMP[15].x, TEMP[12].xxxx 51: RCP TEMP[15].y, TEMP[12].yyyy 52: RCP TEMP[15].z, TEMP[12].zzzz 53: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[15].xyzz 54: FSLT TEMP[15].xyz, IMM[0].yyyy, TEMP[12].xyzz 55: UIF TEMP[15].xxxx :0 56: MOV TEMP[16].x, TEMP[14].xxxx 57: ELSE :0 58: MOV TEMP[16].x, TEMP[13].xxxx 59: ENDIF 60: UIF TEMP[15].yyyy :0 61: MOV TEMP[17].x, TEMP[14].yyyy 62: ELSE :0 63: MOV TEMP[17].x, TEMP[13].yyyy 64: ENDIF 65: UIF TEMP[15].zzzz :0 66: MOV TEMP[14].x, TEMP[14].zzzz 67: ELSE :0 68: MOV TEMP[14].x, TEMP[13].zzzz 69: ENDIF 70: ADD TEMP[13].xyz, CONST[8].xyzz, CONST[9].xyzz 71: MUL TEMP[13].xyz, TEMP[13].xyzz, IMM[0].wwww 72: MIN TEMP[15].x, TEMP[16].xxxx, TEMP[17].xxxx 73: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[14].xxxx 74: ADD TEMP[15].xyz, TEMP[13].xyzz, -CONST[10].xyzz 75: ADD TEMP[15].xyz, TEMP[15].xyzz, IN[5].xyzz 76: MAD TEMP[12].xyz, TEMP[12].xyzz, TEMP[14].xxxx, TEMP[15].xyzz 77: ADD TEMP[11].xyz, TEMP[12].xyzz, -TEMP[13].xyzz 78: ENDIF 79: ADD TEMP[12].x, IMM[0].xxxx, -CONST[22].xxxx 80: POW TEMP[12].x, TEMP[12].xxxx, IMM[1].xxxx 81: MUL TEMP[12].x, TEMP[12].xxxx, IMM[1].yyyy 82: MOV TEMP[11].xyz, TEMP[11].xyzz 83: MOV TEMP[11].w, TEMP[12].xxxx 84: TXL TEMP[11], TEMP[11], SAMP[0], CUBE 85: POW TEMP[12].x, TEMP[11].wwww, CONST[11].yyyy 86: MUL TEMP[12].x, CONST[11].xxxx, TEMP[12].xxxx 87: MUL TEMP[11].xyz, TEMP[12].xxxx, TEMP[11].xyzz 88: FSLT TEMP[12].x, CONST[9].wwww, IMM[1].zzzz 89: UIF TEMP[12].xxxx :0 90: MOV TEMP[12].xyz, TEMP[10].xyzx 91: FSLT TEMP[13].x, IMM[0].yyyy, CONST[14].wwww 92: UIF TEMP[13].xxxx :0 93: DP3 TEMP[13].x, TEMP[10].xyzz, TEMP[10].xyzz 94: RSQ TEMP[13].x, TEMP[13].xxxx 95: MUL TEMP[10].xyz, TEMP[10].xyzz, TEMP[13].xxxx 96: MOV TEMP[13].xyz, -IN[5].xyzx 97: ADD TEMP[14].xyz, CONST[12].xyzz, TEMP[13].xyzz 98: RCP TEMP[15].x, TEMP[10].xxxx 99: RCP TEMP[15].y, TEMP[10].yyyy 100: RCP TEMP[15].z, TEMP[10].zzzz 101: MUL TEMP[14].xyz, TEMP[14].xyzz, TEMP[15].xyzz 102: ADD TEMP[13].xyz, CONST[13].xyzz, TEMP[13].xyzz 103: RCP TEMP[15].x, TEMP[10].xxxx 104: RCP TEMP[15].y, TEMP[10].yyyy 105: RCP TEMP[15].z, TEMP[10].zzzz 106: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[15].xyzz 107: FSLT TEMP[15].xyz, IMM[0].yyyy, TEMP[10].xyzz 108: UIF TEMP[15].xxxx :0 109: MOV TEMP[16].x, TEMP[14].xxxx 110: ELSE :0 111: MOV TEMP[16].x, TEMP[13].xxxx 112: ENDIF 113: UIF TEMP[15].yyyy :0 114: MOV TEMP[17].x, TEMP[14].yyyy 115: ELSE :0 116: MOV TEMP[17].x, TEMP[13].yyyy 117: ENDIF 118: UIF TEMP[15].zzzz :0 119: MOV TEMP[14].x, TEMP[14].zzzz 120: ELSE :0 121: MOV TEMP[14].x, TEMP[13].zzzz 122: ENDIF 123: ADD TEMP[13].xyz, CONST[12].xyzz, CONST[13].xyzz 124: MUL TEMP[13].xyz, TEMP[13].xyzz, IMM[0].wwww 125: MIN TEMP[15].x, TEMP[16].xxxx, TEMP[17].xxxx 126: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[14].xxxx 127: ADD TEMP[15].xyz, TEMP[13].xyzz, -CONST[14].xyzz 128: ADD TEMP[15].xyz, TEMP[15].xyzz, IN[5].xyzz 129: MAD TEMP[10].xyz, TEMP[10].xyzz, TEMP[14].xxxx, TEMP[15].xyzz 130: ADD TEMP[12].xyz, TEMP[10].xyzz, -TEMP[13].xyzz 131: ENDIF 132: ADD TEMP[10].x, IMM[0].xxxx, -CONST[22].xxxx 133: POW TEMP[10].x, TEMP[10].xxxx, IMM[1].xxxx 134: MUL TEMP[10].x, TEMP[10].xxxx, IMM[1].yyyy 135: MOV TEMP[12].xyz, TEMP[12].xyzz 136: MOV TEMP[12].w, TEMP[10].xxxx 137: TXL TEMP[10], TEMP[12], SAMP[1], CUBE 138: POW TEMP[12].x, TEMP[10].wwww, CONST[15].yyyy 139: MUL TEMP[12].x, CONST[15].xxxx, TEMP[12].xxxx 140: MUL TEMP[10].xyz, TEMP[12].xxxx, TEMP[10].xyzz 141: LRP TEMP[7].xyz, CONST[9].wwww, TEMP[11].xyzz, TEMP[10].xyzz 142: ELSE :0 143: MOV TEMP[7].xyz, TEMP[11].xyzx 144: ENDIF 145: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[5].xxxx 146: MOV TEMP[1].xyz, -TEMP[1].xyzx 147: ADD TEMP[5].x, IMM[0].xxxx, -CONST[22].xxxx 148: ADD TEMP[10].xyz, CONST[0].xyzz, TEMP[1].xyzz 149: DP3 TEMP[11].x, TEMP[10].xyzz, TEMP[10].xyzz 150: RSQ TEMP[11].x, TEMP[11].xxxx 151: MUL TEMP[10].xyz, TEMP[10].xyzz, TEMP[11].xxxx 152: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[1].xyzz 153: MAX TEMP[1].x, IMM[0].yyyy, TEMP[1].xxxx 154: DP3 TEMP[11].x, CONST[0].xyzz, TEMP[10].xyzz 155: MAX TEMP[11].x, IMM[0].yyyy, TEMP[11].xxxx 156: MUL TEMP[12].x, TEMP[5].xxxx, TEMP[5].xxxx 157: MUL TEMP[12].x, TEMP[12].xxxx, CONST[18].wwww 158: ADD TEMP[13].x, IMM[0].xxxx, -TEMP[5].xxxx 159: MAD TEMP[13].x, TEMP[13].xxxx, IMM[2].xxxx, IMM[2].yyyy 160: LG2 TEMP[13].x, TEMP[13].xxxx 161: RCP TEMP[13].x, TEMP[13].xxxx 162: MUL TEMP[13].x, IMM[1].wwww, TEMP[13].xxxx 163: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[13].xxxx 164: ADD TEMP[14].x, IMM[0].xxxx, -TEMP[6].xxxx 165: ADD TEMP[15].x, IMM[0].xxxx, -TEMP[1].xxxx 166: MUL TEMP[16].x, IMM[0].zzzz, TEMP[11].xxxx 167: MUL TEMP[5].x, TEMP[11].xxxx, TEMP[5].xxxx 168: MAD TEMP[5].x, TEMP[16].xxxx, TEMP[5].xxxx, IMM[0].wwww 169: ADD TEMP[11].x, IMM[0].xxxx, -TEMP[11].xxxx 170: ADD TEMP[16].x, IMM[0].xxxx, -TEMP[1].xxxx 171: MOV TEMP[17].xy, IN[0].xyyy 172: TEX TEMP[17].xyz, TEMP[17], SAMP[4], 2D 173: ADD TEMP[4].x, IMM[0].xxxx, -TEMP[4].xxxx 174: ADD TEMP[4].x, CONST[22].xxxx, TEMP[4].xxxx 175: MOV_SAT TEMP[4].x, TEMP[4].xxxx 176: MUL TEMP[18].x, TEMP[16].xxxx, TEMP[16].xxxx 177: MUL TEMP[19].x, TEMP[16].xxxx, TEMP[16].xxxx 178: MUL TEMP[16].x, TEMP[19].xxxx, TEMP[16].xxxx 179: MUL TEMP[16].x, TEMP[18].xxxx, TEMP[16].xxxx 180: LRP TEMP[4].xyz, TEMP[16].xxxx, TEMP[4].xxxx, TEMP[3].xyzz 181: LRP TEMP[16].x, TEMP[6].xxxx, IMM[0].xxxx, TEMP[12].xxxx 182: LRP TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx, TEMP[12].xxxx 183: MAD TEMP[1].x, TEMP[16].xxxx, TEMP[1].xxxx, IMM[2].zzzz 184: RCP TEMP[1].x, TEMP[1].xxxx 185: DP3 TEMP[10].x, TEMP[0].xyzz, TEMP[10].xyzz 186: MAX TEMP[10].x, IMM[0].yyyy, TEMP[10].xxxx 187: POW TEMP[10].x, TEMP[10].xxxx, TEMP[13].xxxx 188: ADD TEMP[12].x, TEMP[13].xxxx, IMM[0].xxxx 189: MUL TEMP[12].x, TEMP[12].xxxx, CONST[18].yyyy 190: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[12].xxxx 191: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[10].xxxx 192: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[6].xxxx 193: MUL TEMP[1].x, TEMP[1].xxxx, CONST[18].xxxx 194: MAX TEMP[1].x, IMM[0].yyyy, TEMP[1].xxxx 195: MUL TEMP[1].xyz, TEMP[1].xxxx, TEMP[9].xyzz 196: ADD TEMP[10].xyz, IMM[0].xxxx, -TEMP[3].xyzz 197: MUL TEMP[12].x, TEMP[11].xxxx, TEMP[11].xxxx 198: MUL TEMP[13].x, TEMP[11].xxxx, TEMP[11].xxxx 199: MUL TEMP[11].x, TEMP[13].xxxx, TEMP[11].xxxx 200: MUL TEMP[11].x, TEMP[12].xxxx, TEMP[11].xxxx 201: MAD TEMP[3].xyz, TEMP[10].xyzz, TEMP[11].xxxx, TEMP[3].xyzz 202: ADD TEMP[10].x, TEMP[5].xxxx, IMM[2].wwww 203: MUL TEMP[11].x, TEMP[14].xxxx, TEMP[14].xxxx 204: MUL TEMP[12].x, TEMP[14].xxxx, TEMP[14].xxxx 205: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[14].xxxx 206: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[12].xxxx 207: MAD TEMP[10].x, TEMP[10].xxxx, TEMP[11].xxxx, IMM[0].xxxx 208: ADD TEMP[5].x, TEMP[5].xxxx, IMM[2].wwww 209: MUL TEMP[11].x, TEMP[15].xxxx, TEMP[15].xxxx 210: MUL TEMP[12].x, TEMP[15].xxxx, TEMP[15].xxxx 211: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[15].xxxx 212: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[12].xxxx 213: MAD TEMP[5].x, TEMP[5].xxxx, TEMP[11].xxxx, IMM[0].xxxx 214: MUL TEMP[5].x, TEMP[10].xxxx, TEMP[5].xxxx 215: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[6].xxxx 216: MAD TEMP[5].xyz, TEMP[9].xyzz, TEMP[5].xxxx, TEMP[8].xyzz 217: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[5].xyzz 218: MAD TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xyzz, TEMP[2].xyzz 219: MAD TEMP[1].xyz, TEMP[7].xyzz, TEMP[4].xyzz, TEMP[1].xyzz 220: MAD TEMP[0].xyz, TEMP[17].xyzz, CONST[25].xyzz, TEMP[1].xyzz 221: MAD TEMP[1].x, IN[4].xxxx, CONST[5].zzzz, CONST[5].wwww 222: MOV_SAT TEMP[1].x, TEMP[1].xxxx 223: LRP TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[4].xyzz 224: MOV TEMP[0].xyz, TEMP[0].xyzx 225: MOV TEMP[0].w, IMM[0].xxxx 226: MOV OUT[0], TEMP[0] 227: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 172) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200) %57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208) %58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212) %59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216) %60 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224) %61 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228) %62 = call float @llvm.SI.load.const(<16 x i8> %23, i32 232) %63 = call float @llvm.SI.load.const(<16 x i8> %23, i32 236) %64 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240) %65 = call float @llvm.SI.load.const(<16 x i8> %23, i32 244) %66 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256) %67 = call float @llvm.SI.load.const(<16 x i8> %23, i32 260) %68 = call float @llvm.SI.load.const(<16 x i8> %23, i32 264) %69 = call float @llvm.SI.load.const(<16 x i8> %23, i32 268) %70 = call float @llvm.SI.load.const(<16 x i8> %23, i32 272) %71 = call float @llvm.SI.load.const(<16 x i8> %23, i32 276) %72 = call float @llvm.SI.load.const(<16 x i8> %23, i32 280) %73 = call float @llvm.SI.load.const(<16 x i8> %23, i32 288) %74 = call float @llvm.SI.load.const(<16 x i8> %23, i32 292) %75 = call float @llvm.SI.load.const(<16 x i8> %23, i32 300) %76 = call float @llvm.SI.load.const(<16 x i8> %23, i32 304) %77 = call float @llvm.SI.load.const(<16 x i8> %23, i32 308) %78 = call float @llvm.SI.load.const(<16 x i8> %23, i32 312) %79 = call float @llvm.SI.load.const(<16 x i8> %23, i32 336) %80 = call float @llvm.SI.load.const(<16 x i8> %23, i32 352) %81 = call float @llvm.SI.load.const(<16 x i8> %23, i32 384) %82 = call float @llvm.SI.load.const(<16 x i8> %23, i32 400) %83 = call float @llvm.SI.load.const(<16 x i8> %23, i32 404) %84 = call float @llvm.SI.load.const(<16 x i8> %23, i32 408) %85 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %86 = load <32 x i8>, <32 x i8> addrspace(2)* %85, align 32, !tbaa !0 %87 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %88 = load <16 x i8>, <16 x i8> addrspace(2)* %87, align 16, !tbaa !0 %89 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %90 = bitcast <8 x i32> addrspace(2)* %89 to <32 x i8> addrspace(2)* %91 = load <32 x i8>, <32 x i8> addrspace(2)* %90, align 32, !tbaa !0 %92 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %93 = bitcast <4 x i32> addrspace(2)* %92 to <16 x i8> addrspace(2)* %94 = load <16 x i8>, <16 x i8> addrspace(2)* %93, align 16, !tbaa !0 %95 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %96 = bitcast <8 x i32> addrspace(2)* %95 to <32 x i8> addrspace(2)* %97 = load <32 x i8>, <32 x i8> addrspace(2)* %96, align 32, !tbaa !0 %98 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %99 = bitcast <4 x i32> addrspace(2)* %98 to <16 x i8> addrspace(2)* %100 = load <16 x i8>, <16 x i8> addrspace(2)* %99, align 16, !tbaa !0 %101 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %102 = bitcast <8 x i32> addrspace(2)* %101 to <32 x i8> addrspace(2)* %103 = load <32 x i8>, <32 x i8> addrspace(2)* %102, align 32, !tbaa !0 %104 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %105 = bitcast <4 x i32> addrspace(2)* %104 to <16 x i8> addrspace(2)* %106 = load <16 x i8>, <16 x i8> addrspace(2)* %105, align 16, !tbaa !0 %107 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %108 = bitcast <8 x i32> addrspace(2)* %107 to <32 x i8> addrspace(2)* %109 = load <32 x i8>, <32 x i8> addrspace(2)* %108, align 32, !tbaa !0 %110 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %111 = bitcast <4 x i32> addrspace(2)* %110 to <16 x i8> addrspace(2)* %112 = load <16 x i8>, <16 x i8> addrspace(2)* %111, align 16, !tbaa !0 %113 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5 %114 = bitcast <8 x i32> addrspace(2)* %113 to <32 x i8> addrspace(2)* %115 = load <32 x i8>, <32 x i8> addrspace(2)* %114, align 32, !tbaa !0 %116 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5 %117 = bitcast <4 x i32> addrspace(2)* %116 to <16 x i8> addrspace(2)* %118 = load <16 x i8>, <16 x i8> addrspace(2)* %117, align 16, !tbaa !0 %119 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %120 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %121 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %122 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %123 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %124 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %125 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %126 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %127 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %128 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %129 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7) %130 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %131 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %132 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %133 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7) %134 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7) %135 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %136 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7) %137 = fmul float %121, %121 %138 = fmul float %122, %122 %139 = fadd float %138, %137 %140 = fmul float %123, %123 %141 = fadd float %139, %140 %142 = call float @llvm.AMDGPU.rsq.clamped.f32(float %141) %143 = fmul float %121, %142 %144 = fmul float %122, %142 %145 = fmul float %123, %142 %146 = fmul float %131, %131 %147 = fmul float %132, %132 %148 = fadd float %147, %146 %149 = fmul float %133, %133 %150 = fadd float %148, %149 %151 = call float @llvm.AMDGPU.rsq.clamped.f32(float %150) %152 = fmul float %131, %151 %153 = fmul float %132, %151 %154 = fmul float %133, %151 %155 = bitcast float %119 to i32 %156 = bitcast float %120 to i32 %157 = insertelement <2 x i32> undef, i32 %155, i32 0 %158 = insertelement <2 x i32> %157, i32 %156, i32 1 %159 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %158, <32 x i8> %97, <16 x i8> %100, i32 2) %160 = extractelement <4 x float> %159, i32 0 %161 = extractelement <4 x float> %159, i32 1 %162 = extractelement <4 x float> %159, i32 2 %163 = fmul float %76, %160 %164 = fmul float %77, %161 %165 = fmul float %78, %162 %166 = call float @llvm.AMDGPU.lrp(float %79, float %163, float %66) %167 = call float @llvm.AMDGPU.lrp(float %79, float %164, float %67) %168 = call float @llvm.AMDGPU.lrp(float %79, float %165, float %68) %169 = fmul float %79, %69 %170 = fsub float %69, %169 %171 = fmul float %163, %170 %172 = fmul float %164, %170 %173 = fmul float %165, %170 %174 = bitcast float %119 to i32 %175 = bitcast float %120 to i32 %176 = insertelement <2 x i32> undef, i32 %174, i32 0 %177 = insertelement <2 x i32> %176, i32 %175, i32 1 %178 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %177, <32 x i8> %103, <16 x i8> %106, i32 2) %179 = extractelement <4 x float> %178, i32 1 %180 = fsub float 1.000000e+00, %81 %181 = fmul float %179, %81 %182 = fadd float %181, %180 %183 = fmul float %143, %24 %184 = fmul float %144, %25 %185 = fadd float %184, %183 %186 = fmul float %145, %26 %187 = fadd float %185, %186 %188 = call float @llvm.maxnum.f32(float %187, float 0.000000e+00) %189 = fmul float %27, %143 %190 = fmul float %28, %144 %191 = fadd float %189, %190 %192 = fmul float %29, %145 %193 = fadd float %191, %192 %194 = fadd float %193, %30 %195 = fmul float %31, %143 %196 = fmul float %32, %144 %197 = fadd float %195, %196 %198 = fmul float %33, %145 %199 = fadd float %197, %198 %200 = fadd float %199, %34 %201 = fmul float %35, %143 %202 = fmul float %36, %144 %203 = fadd float %201, %202 %204 = fmul float %37, %145 %205 = fadd float %203, %204 %206 = fadd float %205, %38 %207 = fadd float %124, %194 %208 = fadd float %125, %200 %209 = fadd float %126, %206 %210 = fdiv float %127, %129 %211 = fdiv float %128, %129 %212 = bitcast float %210 to i32 %213 = bitcast float %211 to i32 %214 = insertelement <2 x i32> undef, i32 %212, i32 0 %215 = insertelement <2 x i32> %214, i32 %213, i32 1 %216 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %215, <32 x i8> %115, <16 x i8> %118, i32 2) %217 = extractelement <4 x float> %216, i32 0 %218 = fmul float %70, %217 %219 = fmul float %71, %217 %220 = fmul float %72, %217 %221 = fmul float %207, %182 %222 = fmul float %208, %182 %223 = fmul float %209, %182 %224 = fmul float %143, %152 %225 = fmul float %144, %153 %226 = fadd float %225, %224 %227 = fmul float %145, %154 %228 = fadd float %226, %227 %229 = fmul float %228, %143 %230 = fmul float %228, %144 %231 = fmul float %228, %145 %232 = fmul float %229, 2.000000e+00 %233 = fmul float %230, 2.000000e+00 %234 = fmul float %231, 2.000000e+00 %235 = fsub float %152, %232 %236 = fsub float %153, %233 %237 = fsub float %154, %234 %238 = fcmp ogt float %51, 0.000000e+00 br i1 %238, label %IF, label %ENDIF IF: ; preds = %main_body %239 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %240 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %241 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %242 = fmul float %235, %235 %243 = fmul float %236, %236 %244 = fadd float %243, %242 %245 = fmul float %237, %237 %246 = fadd float %244, %245 %247 = call float @llvm.AMDGPU.rsq.clamped.f32(float %246) %248 = fmul float %235, %247 %249 = fmul float %236, %247 %250 = fmul float %237, %247 %251 = fsub float %44, %134 %252 = fsub float %45, %135 %253 = fsub float %46, %136 %254 = fdiv float 1.000000e+00, %248 %255 = fdiv float 1.000000e+00, %249 %256 = fdiv float 1.000000e+00, %250 %257 = fmul float %251, %254 %258 = fmul float %252, %255 %259 = fmul float %253, %256 %260 = fsub float %47, %134 %261 = fsub float %48, %135 %262 = fsub float %49, %136 %263 = fdiv float 1.000000e+00, %248 %264 = fdiv float 1.000000e+00, %249 %265 = fdiv float 1.000000e+00, %250 %266 = fmul float %260, %263 %267 = fmul float %261, %264 %268 = fmul float %262, %265 %269 = fcmp ogt float %248, 0.000000e+00 %270 = fcmp ogt float %249, 0.000000e+00 %271 = fcmp ogt float %250, 0.000000e+00 %. = select i1 %269, float %257, float %266 %temp68.0 = select i1 %270, float %258, float %267 %.104 = select i1 %271, float %259, float %268 %272 = fadd float %44, %47 %273 = fadd float %45, %48 %274 = fadd float %46, %49 %275 = fmul float %272, 5.000000e-01 %276 = fmul float %273, 5.000000e-01 %277 = fmul float %274, 5.000000e-01 %278 = call float @llvm.minnum.f32(float %., float %temp68.0) %279 = call float @llvm.minnum.f32(float %278, float %.104) %280 = fsub float %275, %241 %281 = fsub float %276, %240 %282 = fsub float %277, %239 %283 = fadd float %280, %134 %284 = fadd float %281, %135 %285 = fadd float %282, %136 %286 = fmul float %248, %279 %287 = fadd float %286, %283 %288 = fmul float %249, %279 %289 = fadd float %288, %284 %290 = fmul float %250, %279 %291 = fadd float %290, %285 %292 = fsub float %287, %275 %293 = fsub float %289, %276 %294 = fsub float %291, %277 br label %ENDIF ENDIF: ; preds = %main_body, %IF %temp44.0 = phi float [ %292, %IF ], [ %235, %main_body ] %temp45.0 = phi float [ %293, %IF ], [ %236, %main_body ] %temp46.0 = phi float [ %294, %IF ], [ %237, %main_body ] %295 = fsub float 1.000000e+00, %80 %296 = call float @llvm.pow.f32(float %295, float 7.500000e-01) %297 = fmul float %296, 7.000000e+00 %298 = insertelement <4 x float> undef, float %temp44.0, i32 0 %299 = insertelement <4 x float> %298, float %temp45.0, i32 1 %300 = insertelement <4 x float> %299, float %temp46.0, i32 2 %301 = insertelement <4 x float> %300, float %297, i32 3 %302 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %301) %303 = extractelement <4 x float> %302, i32 0 %304 = extractelement <4 x float> %302, i32 1 %305 = extractelement <4 x float> %302, i32 2 %306 = extractelement <4 x float> %302, i32 3 %307 = call float @llvm.fabs.f32(float %305) %308 = fdiv float 1.000000e+00, %307 %309 = fmul float %303, %308 %310 = fadd float %309, 1.500000e+00 %311 = fmul float %304, %308 %312 = fadd float %311, 1.500000e+00 %313 = bitcast float %312 to i32 %314 = bitcast float %310 to i32 %315 = bitcast float %306 to i32 %316 = bitcast float %297 to i32 %317 = insertelement <4 x i32> undef, i32 %313, i32 0 %318 = insertelement <4 x i32> %317, i32 %314, i32 1 %319 = insertelement <4 x i32> %318, i32 %315, i32 2 %320 = insertelement <4 x i32> %319, i32 %316, i32 3 %321 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %320, <32 x i8> %86, <16 x i8> %88, i32 4) %322 = extractelement <4 x float> %321, i32 0 %323 = extractelement <4 x float> %321, i32 1 %324 = extractelement <4 x float> %321, i32 2 %325 = extractelement <4 x float> %321, i32 3 %326 = call float @llvm.pow.f32(float %325, float %53) %327 = fmul float %52, %326 %328 = fmul float %327, %322 %329 = fmul float %327, %323 %330 = fmul float %327, %324 %331 = fcmp olt float %50, 0x3FEFFFEB00000000 br i1 %331, label %IF90, label %ENDIF89 IF90: ; preds = %ENDIF %332 = fcmp ogt float %63, 0.000000e+00 br i1 %332, label %IF93, label %ENDIF92 ENDIF89: ; preds = %ENDIF, %ENDIF92 %temp28.0 = phi float [ %574, %ENDIF92 ], [ %328, %ENDIF ] %temp29.0 = phi float [ %575, %ENDIF92 ], [ %329, %ENDIF ] %temp30.0 = phi float [ %576, %ENDIF92 ], [ %330, %ENDIF ] %333 = fmul float %temp28.0, %182 %334 = fmul float %temp29.0, %182 %335 = fmul float %temp30.0, %182 %336 = fsub float 1.000000e+00, %80 %337 = fsub float %24, %152 %338 = fsub float %25, %153 %339 = fsub float %26, %154 %340 = fmul float %337, %337 %341 = fmul float %338, %338 %342 = fadd float %341, %340 %343 = fmul float %339, %339 %344 = fadd float %342, %343 %345 = call float @llvm.AMDGPU.rsq.clamped.f32(float %344) %346 = fmul float %337, %345 %347 = fmul float %338, %345 %348 = fmul float %339, %345 %349 = fmul float %152, %143 %350 = fsub float -0.000000e+00, %349 %351 = fmul float %153, %144 %352 = fsub float %350, %351 %353 = fmul float %154, %145 %354 = fsub float %352, %353 %355 = call float @llvm.maxnum.f32(float %354, float 0.000000e+00) %356 = fmul float %24, %346 %357 = fmul float %25, %347 %358 = fadd float %357, %356 %359 = fmul float %26, %348 %360 = fadd float %358, %359 %361 = call float @llvm.maxnum.f32(float %360, float 0.000000e+00) %362 = fmul float %336, %336 %363 = fmul float %362, %75 %364 = fsub float 1.000000e+00, %336 %365 = fmul float %364, 0x3FEEF9DB20000000 %366 = fadd float %365, 0x3F9EB851E0000000 %367 = call float @llvm.log2.f32(float %366) %368 = fdiv float 1.000000e+00, %367 %369 = fmul float %368, 1.000000e+01 %370 = fmul float %369, %369 %371 = fsub float 1.000000e+00, %188 %372 = fsub float 1.000000e+00, %355 %373 = fmul float %361, 2.000000e+00 %374 = fmul float %361, %336 %375 = fmul float %373, %374 %376 = fadd float %375, 5.000000e-01 %377 = fsub float 1.000000e+00, %361 %378 = fsub float 1.000000e+00, %355 %379 = bitcast float %119 to i32 %380 = bitcast float %120 to i32 %381 = insertelement <2 x i32> undef, i32 %379, i32 0 %382 = insertelement <2 x i32> %381, i32 %380, i32 1 %383 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %382, <32 x i8> %109, <16 x i8> %112, i32 2) %384 = extractelement <4 x float> %383, i32 0 %385 = extractelement <4 x float> %383, i32 1 %386 = extractelement <4 x float> %383, i32 2 %387 = fsub float 1.000000e+00, %170 %388 = fadd float %80, %387 %389 = call float @llvm.AMDIL.clamp.(float %388, float 0.000000e+00, float 1.000000e+00) %390 = fmul float %378, %378 %391 = fmul float %378, %378 %392 = fmul float %391, %378 %393 = fmul float %390, %392 %394 = call float @llvm.AMDGPU.lrp(float %393, float %389, float %166) %395 = call float @llvm.AMDGPU.lrp(float %393, float %389, float %167) %396 = call float @llvm.AMDGPU.lrp(float %393, float %389, float %168) %397 = call float @llvm.AMDGPU.lrp(float %188, float 1.000000e+00, float %363) %398 = call float @llvm.AMDGPU.lrp(float %355, float 1.000000e+00, float %363) %399 = fmul float %397, %398 %400 = fadd float %399, 0x3F1A36E2E0000000 %401 = fdiv float 1.000000e+00, %400 %402 = fmul float %143, %346 %403 = fmul float %144, %347 %404 = fadd float %403, %402 %405 = fmul float %145, %348 %406 = fadd float %404, %405 %407 = call float @llvm.maxnum.f32(float %406, float 0.000000e+00) %408 = call float @llvm.pow.f32(float %407, float %370) %409 = fadd float %370, 1.000000e+00 %410 = fmul float %409, %74 %411 = fmul float %408, %410 %412 = fmul float %401, %411 %413 = fmul float %412, %188 %414 = fmul float %413, %73 %415 = call float @llvm.maxnum.f32(float %414, float 0.000000e+00) %416 = fmul float %415, %218 %417 = fmul float %415, %219 %418 = fmul float %415, %220 %419 = fsub float 1.000000e+00, %166 %420 = fsub float 1.000000e+00, %167 %421 = fsub float 1.000000e+00, %168 %422 = fmul float %377, %377 %423 = fmul float %377, %377 %424 = fmul float %423, %377 %425 = fmul float %422, %424 %426 = fmul float %419, %425 %427 = fadd float %426, %166 %428 = fmul float %420, %425 %429 = fadd float %428, %167 %430 = fmul float %421, %425 %431 = fadd float %430, %168 %432 = fadd float %376, -1.000000e+00 %433 = fmul float %371, %371 %434 = fmul float %371, %371 %435 = fmul float %434, %371 %436 = fmul float %433, %435 %437 = fmul float %432, %436 %438 = fadd float %437, 1.000000e+00 %439 = fadd float %376, -1.000000e+00 %440 = fmul float %372, %372 %441 = fmul float %372, %372 %442 = fmul float %441, %372 %443 = fmul float %440, %442 %444 = fmul float %439, %443 %445 = fadd float %444, 1.000000e+00 %446 = fmul float %438, %445 %447 = fmul float %446, %188 %448 = fmul float %218, %447 %449 = fadd float %448, %221 %450 = fmul float %219, %447 %451 = fadd float %450, %222 %452 = fmul float %220, %447 %453 = fadd float %452, %223 %454 = fmul float %171, %449 %455 = fmul float %172, %451 %456 = fmul float %173, %453 %457 = fmul float %416, %427 %458 = fadd float %457, %454 %459 = fmul float %417, %429 %460 = fadd float %459, %455 %461 = fmul float %418, %431 %462 = fadd float %461, %456 %463 = fmul float %333, %394 %464 = fadd float %463, %458 %465 = fmul float %334, %395 %466 = fadd float %465, %460 %467 = fmul float %335, %396 %468 = fadd float %467, %462 %469 = fmul float %384, %82 %470 = fadd float %469, %464 %471 = fmul float %385, %83 %472 = fadd float %471, %466 %473 = fmul float %386, %84 %474 = fadd float %473, %468 %475 = fmul float %130, %42 %476 = fadd float %475, %43 %477 = call float @llvm.AMDIL.clamp.(float %476, float 0.000000e+00, float 1.000000e+00) %478 = call float @llvm.AMDGPU.lrp(float %477, float %470, float %39) %479 = call float @llvm.AMDGPU.lrp(float %477, float %472, float %40) %480 = call float @llvm.AMDGPU.lrp(float %477, float %474, float %41) %481 = call i32 @llvm.SI.packf16(float %478, float %479) %482 = bitcast i32 %481 to float %483 = call i32 @llvm.SI.packf16(float %480, float 1.000000e+00) %484 = bitcast i32 %483 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %482, float %484, float %482, float %484) ret void IF93: ; preds = %IF90 %485 = fmul float %235, %235 %486 = fmul float %236, %236 %487 = fadd float %486, %485 %488 = fmul float %237, %237 %489 = fadd float %487, %488 %490 = call float @llvm.AMDGPU.rsq.clamped.f32(float %489) %491 = fmul float %235, %490 %492 = fmul float %236, %490 %493 = fmul float %237, %490 %494 = fsub float %54, %134 %495 = fsub float %55, %135 %496 = fsub float %56, %136 %497 = fdiv float 1.000000e+00, %491 %498 = fdiv float 1.000000e+00, %492 %499 = fdiv float 1.000000e+00, %493 %500 = fmul float %494, %497 %501 = fmul float %495, %498 %502 = fmul float %496, %499 %503 = fsub float %57, %134 %504 = fsub float %58, %135 %505 = fsub float %59, %136 %506 = fdiv float 1.000000e+00, %491 %507 = fdiv float 1.000000e+00, %492 %508 = fdiv float 1.000000e+00, %493 %509 = fmul float %503, %506 %510 = fmul float %504, %507 %511 = fmul float %505, %508 %512 = fcmp ogt float %491, 0.000000e+00 %513 = fcmp ogt float %492, 0.000000e+00 %514 = fcmp ogt float %493, 0.000000e+00 %.105 = select i1 %512, float %500, float %509 %temp68.1 = select i1 %513, float %501, float %510 %.106 = select i1 %514, float %502, float %511 %515 = fadd float %54, %57 %516 = fadd float %55, %58 %517 = fadd float %56, %59 %518 = fmul float %515, 5.000000e-01 %519 = fmul float %516, 5.000000e-01 %520 = fmul float %517, 5.000000e-01 %521 = call float @llvm.minnum.f32(float %.105, float %temp68.1) %522 = call float @llvm.minnum.f32(float %521, float %.106) %523 = fsub float %518, %60 %524 = fsub float %519, %61 %525 = fsub float %520, %62 %526 = fadd float %523, %134 %527 = fadd float %524, %135 %528 = fadd float %525, %136 %529 = fmul float %491, %522 %530 = fadd float %529, %526 %531 = fmul float %492, %522 %532 = fadd float %531, %527 %533 = fmul float %493, %522 %534 = fadd float %533, %528 %535 = fsub float %530, %518 %536 = fsub float %532, %519 %537 = fsub float %534, %520 br label %ENDIF92 ENDIF92: ; preds = %IF90, %IF93 %temp48.0 = phi float [ %535, %IF93 ], [ %235, %IF90 ] %temp49.0 = phi float [ %536, %IF93 ], [ %236, %IF90 ] %temp50.0 = phi float [ %537, %IF93 ], [ %237, %IF90 ] %538 = fsub float 1.000000e+00, %80 %539 = call float @llvm.pow.f32(float %538, float 7.500000e-01) %540 = fmul float %539, 7.000000e+00 %541 = insertelement <4 x float> undef, float %temp48.0, i32 0 %542 = insertelement <4 x float> %541, float %temp49.0, i32 1 %543 = insertelement <4 x float> %542, float %temp50.0, i32 2 %544 = insertelement <4 x float> %543, float %540, i32 3 %545 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %544) %546 = extractelement <4 x float> %545, i32 0 %547 = extractelement <4 x float> %545, i32 1 %548 = extractelement <4 x float> %545, i32 2 %549 = extractelement <4 x float> %545, i32 3 %550 = call float @llvm.fabs.f32(float %548) %551 = fdiv float 1.000000e+00, %550 %552 = fmul float %546, %551 %553 = fadd float %552, 1.500000e+00 %554 = fmul float %547, %551 %555 = fadd float %554, 1.500000e+00 %556 = bitcast float %555 to i32 %557 = bitcast float %553 to i32 %558 = bitcast float %549 to i32 %559 = bitcast float %540 to i32 %560 = insertelement <4 x i32> undef, i32 %556, i32 0 %561 = insertelement <4 x i32> %560, i32 %557, i32 1 %562 = insertelement <4 x i32> %561, i32 %558, i32 2 %563 = insertelement <4 x i32> %562, i32 %559, i32 3 %564 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %563, <32 x i8> %91, <16 x i8> %94, i32 4) %565 = extractelement <4 x float> %564, i32 0 %566 = extractelement <4 x float> %564, i32 1 %567 = extractelement <4 x float> %564, i32 2 %568 = extractelement <4 x float> %564, i32 3 %569 = call float @llvm.pow.f32(float %568, float %65) %570 = fmul float %64, %569 %571 = fmul float %570, %565 %572 = fmul float %570, %566 %573 = fmul float %570, %567 %574 = call float @llvm.AMDGPU.lrp(float %50, float %328, float %571) %575 = call float @llvm.AMDGPU.lrp(float %50, float %329, float %572) %576 = call float @llvm.AMDGPU.lrp(float %50, float %330, float %573) br label %ENDIF89 } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400 v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401 v_interp_p1_f32 v5, v0, 1, 1, [m0] ; C8140500 v_interp_p2_f32 v5, [v5], v1, 1, 1, [m0] ; C8150501 v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600 v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601 v_interp_p1_f32 v7, v0, 0, 2, [m0] ; C81C0800 v_interp_p2_f32 v7, [v7], v1, 0, 2, [m0] ; C81D0801 v_interp_p1_f32 v9, v0, 1, 2, [m0] ; C8240900 v_interp_p2_f32 v9, [v9], v1, 1, 2, [m0] ; C8250901 v_interp_p1_f32 v10, v0, 2, 2, [m0] ; C8280A00 v_interp_p2_f32 v10, [v10], v1, 2, 2, [m0] ; C8290A01 v_interp_p1_f32 v15, v0, 0, 3, [m0] ; C83C0C00 v_interp_p2_f32 v15, [v15], v1, 0, 3, [m0] ; C83D0C01 v_interp_p1_f32 v16, v0, 1, 3, [m0] ; C8400D00 v_interp_p2_f32 v16, [v16], v1, 1, 3, [m0] ; C8410D01 v_interp_p1_f32 v17, v0, 3, 3, [m0] ; C8440F00 v_interp_p2_f32 v17, [v17], v1, 3, 3, [m0] ; C8450F01 v_interp_p1_f32 v11, v0, 0, 4, [m0] ; C82C1000 v_interp_p2_f32 v11, [v11], v1, 0, 4, [m0] ; C82D1001 v_interp_p1_f32 v18, v0, 1, 4, [m0] ; C8481100 v_interp_p2_f32 v18, [v18], v1, 1, 4, [m0] ; C8491101 v_interp_p1_f32 v19, v0, 2, 4, [m0] ; C84C1200 v_interp_p2_f32 v19, [v19], v1, 2, 4, [m0] ; C84D1201 v_interp_p1_f32 v20, v0, 3, 4, [m0] ; C8501300 v_interp_p2_f32 v20, [v20], v1, 3, 4, [m0] ; C8511301 v_mul_f32_e32 v6, v4, v4 ; 100C0904 v_mac_f32_e32 v6, v5, v5 ; 3E0C0B05 v_mac_f32_e32 v6, v8, v8 ; 3E0C1108 v_rsq_clamp_f32_e32 v12, v6 ; 7E185906 v_mul_f32_e32 v6, v18, v18 ; 100C2512 v_mac_f32_e32 v6, v19, v19 ; 3E0C2713 v_mac_f32_e32 v6, v20, v20 ; 3E0C2914 v_rsq_clamp_f32_e32 v21, v6 ; 7E2A5906 v_mul_f32_e32 v6, v12, v4 ; 100C090C v_mul_f32_e32 v5, v12, v5 ; 100A0B0C v_mul_f32_e32 v4, v12, v8 ; 1008110C v_mul_f32_e32 v13, v21, v18 ; 101A2515 v_mul_f32_e32 v12, v21, v19 ; 10182715 v_mul_f32_e32 v8, v13, v6 ; 10100D0D v_mac_f32_e32 v8, v12, v5 ; 3E100B0C v_mul_f32_e32 v14, v21, v20 ; 101C2915 v_mac_f32_e32 v8, v14, v4 ; 3E10090E v_mul_f32_e32 v22, v6, v8 ; 102C1106 v_mac_f32_e32 v22, v6, v8 ; 3E2C1106 v_mul_f32_e32 v23, v5, v8 ; 102E1105 v_mac_f32_e32 v23, v5, v8 ; 3E2E1105 v_mad_f32 v27, v18, v21, -v22 ; D282001B 845A2B12 v_mad_f32 v28, v19, v21, -v23 ; D282001C 845E2B13 v_interp_p1_f32 v30, v0, 0, 5, [m0] ; C8781400 v_interp_p2_f32 v30, [v30], v1, 0, 5, [m0] ; C8791401 v_interp_p1_f32 v26, v0, 1, 5, [m0] ; C8681500 v_interp_p2_f32 v26, [v26], v1, 1, 5, [m0] ; C8691501 s_load_dwordx4 s[12:15], s[4:5], 0x8 ; C0860508 s_load_dwordx8 s[16:23], s[6:7], 0x10 ; C0C80710 s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300 v_interp_p1_f32 v31, v0, 2, 5, [m0] ; C87C1600 v_interp_p2_f32 v31, [v31], v1, 2, 5, [m0] ; C87D1601 v_mul_f32_e32 v0, v4, v8 ; 10001104 v_mac_f32_e32 v0, v4, v8 ; 3E001104 s_load_dwordx4 s[24:27], s[4:5], 0xc ; C08C050C s_load_dwordx4 s[36:39], s[4:5], 0x14 ; C0920514 s_load_dwordx8 s[40:47], s[6:7], 0x18 ; C0D40718 s_load_dwordx8 s[48:55], s[6:7], 0x28 ; C0D80728 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[32:34], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[16:23], s[12:15] ; F0800700 00642002 s_buffer_load_dword s0, s[8:11], 0x4c ; C200094C s_buffer_load_dword s1, s[8:11], 0x4d ; C200894D s_buffer_load_dword s2, s[8:11], 0x4e ; C201094E v_mad_f32 v29, v20, v21, -v0 ; D282001D 84022B14 v_mov_b32_e32 v0, 0x6f800000 ; 7E0002FF 6F800000 v_cmp_gt_f32_e64 vcc, |v17|, v0 ; D008016A 00020111 v_mov_b32_e32 v0, 0x2f800000 ; 7E0002FF 2F800000 v_cndmask_b32_e32 v0, 1.0, v0 ; 000000F2 v_mul_f32_e32 v1, v0, v17 ; 10022300 v_rcp_f32_e32 v1, v1 ; 7E025501 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v23, s0, v32 ; 102E4000 v_mul_f32_e32 v24, s1, v33 ; 10304201 v_mul_f32_e32 v25, s2, v34 ; 10324402 v_mul_f32_e32 v8, v1, v15 ; 10101F01 v_mul_f32_e32 v1, v1, v16 ; 10022101 s_buffer_load_dword s1, s[8:11], 0x40 ; C2008940 s_buffer_load_dword s31, s[8:11], 0x54 ; C20F8954 s_buffer_load_dword s2, s[8:11], 0x41 ; C2010941 s_buffer_load_dword s3, s[8:11], 0x42 ; C2018942 v_mul_f32_e32 v15, v8, v0 ; 101E0108 v_mul_f32_e32 v16, v1, v0 ; 10200101 s_buffer_load_dword s12, s[8:11], 0x27 ; C2060927 s_buffer_load_dword s13, s[8:11], 0x2b ; C206892B s_buffer_load_dword s32, s[8:11], 0x2c ; C210092C s_buffer_load_dword s33, s[8:11], 0x2d ; C210892D s_buffer_load_dword s0, s[8:11], 0x58 ; C2000958 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e64 v0, 1.0, s31 ; D2080000 00003EF2 v_mul_f32_e32 v8, s1, v0 ; 10100001 v_mul_f32_e32 v1, s2, v0 ; 10020002 v_mul_f32_e32 v0, s3, v0 ; 10000003 v_mac_f32_e32 v8, s31, v23 ; 3E102E1F v_mov_b32_e32 v32, v27 ; 7E40031B v_mac_f32_e32 v1, s31, v24 ; 3E02301F v_mov_b32_e32 v33, v28 ; 7E42031C v_mac_f32_e32 v0, s31, v25 ; 3E00321F v_mov_b32_e32 v34, v29 ; 7E44031D v_cmp_lt_f32_e64 s[2:3], 0, s13 ; D0020002 00001A80 image_sample v[19:22], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[40:47], s[24:27] ; F0800F00 00CA1302 image_sample v[15:18], 15, 0, 0, 0, 0, 0, 0, 0, v[15:16], s[48:55], s[36:39] ; F0800F00 012C0F0F s_waitcnt vmcnt(0) ; BF8C0770 s_and_saveexec_b64 s[14:15], s[2:3] ; BE8E2402 s_xor_b64 s[14:15], exec, s[14:15] ; 898E0E7E s_cbranch_execz BB0_2 ; BF880000 s_buffer_load_dword s1, s[8:11], 0x20 ; C2008920 s_buffer_load_dword s2, s[8:11], 0x21 ; C2010921 s_buffer_load_dword s3, s[8:11], 0x22 ; C2018922 s_buffer_load_dword s13, s[8:11], 0x24 ; C2068924 s_buffer_load_dword s16, s[8:11], 0x25 ; C2080925 v_mul_f32_e32 v16, v27, v27 ; 1020371B v_mac_f32_e32 v16, v28, v28 ; 3E20391C v_mac_f32_e32 v16, v29, v29 ; 3E203B1D v_rsq_clamp_f32_e32 v16, v16 ; 7E205910 s_buffer_load_dword s17, s[8:11], 0x26 ; C2088926 s_buffer_load_dword s18, s[8:11], 0x28 ; C2090928 s_buffer_load_dword s19, s[8:11], 0x29 ; C2098929 s_buffer_load_dword s20, s[8:11], 0x2a ; C20A092A v_mul_f32_e32 v17, v16, v27 ; 10223710 v_mul_f32_e32 v18, v16, v28 ; 10243910 v_mul_f32_e32 v16, v16, v29 ; 10203B10 v_rcp_f32_e32 v19, v17 ; 7E265511 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v21, s1, v30 ; 082A3C01 v_sub_f32_e32 v22, s2, v26 ; 082C3402 v_rcp_f32_e32 v32, v18 ; 7E405512 v_mul_f32_e32 v21, v19, v21 ; 102A2B13 v_sub_f32_e32 v33, s13, v30 ; 08423C0D v_mul_f32_e32 v19, v19, v33 ; 10264313 v_cmp_lt_f32_e32 vcc, 0, v17 ; 7C022280 v_cndmask_b32_e32 v19, v19, v21 ; 00262B13 v_rcp_f32_e32 v21, v16 ; 7E2A5510 v_mul_f32_e32 v22, v32, v22 ; 102C2D20 v_sub_f32_e32 v33, s16, v26 ; 08423410 v_mul_f32_e32 v32, v32, v33 ; 10404320 v_cmp_lt_f32_e32 vcc, 0, v18 ; 7C022480 v_cndmask_b32_e32 v22, v32, v22 ; 002C2D20 v_sub_f32_e32 v32, s3, v31 ; 08403E03 v_mul_f32_e32 v32, v21, v32 ; 10404115 v_sub_f32_e32 v33, s17, v31 ; 08423E11 v_mul_f32_e32 v21, v21, v33 ; 102A4315 v_cmp_lt_f32_e32 vcc, 0, v16 ; 7C022080 v_cndmask_b32_e32 v21, v21, v32 ; 002A4115 v_min3_f32 v19, v19, v22, v21 ; D2A20013 04562D13 v_mov_b32_e32 v21, s13 ; 7E2A020D v_add_f32_e32 v21, s1, v21 ; 062A2A01 v_mov_b32_e32 v22, s16 ; 7E2C0210 v_add_f32_e32 v22, s2, v22 ; 062C2C02 v_mov_b32_e32 v32, s17 ; 7E400211 v_add_f32_e32 v34, s3, v32 ; 06444003 v_mad_f32 v32, 0.5, v21, -s18 ; D2820020 804A2AF0 v_add_f32_e32 v32, v30, v32 ; 0640411E v_mac_f32_e32 v32, v19, v17 ; 3E402313 v_mad_f32 v17, 0.5, v22, -s19 ; D2820011 804E2CF0 v_add_f32_e32 v17, v26, v17 ; 0622231A v_mac_f32_e32 v17, v19, v18 ; 3E222513 v_mad_f32 v18, 0.5, v34, -s20 ; D2820012 805244F0 v_add_f32_e32 v18, v31, v18 ; 0624251F v_mac_f32_e32 v18, v19, v16 ; 3E242113 v_mad_f32 v32, 0.5, -v21, v32 ; D2820020 44822AF0 v_mad_f32 v33, 0.5, -v22, v17 ; D2820021 44462CF0 v_mad_f32 v34, 0.5, -v34, v18 ; D2820022 444A44F0 s_or_b64 exec, exec, s[14:15] ; 88FE0E7E s_buffer_load_dword s28, s[8:11], 0x17 ; C20E0917 s_buffer_load_dword s29, s[8:11], 0x43 ; C20E8943 s_buffer_load_dword s26, s[8:11], 0x44 ; C20D0944 s_buffer_load_dword s23, s[8:11], 0x45 ; C20B8945 s_buffer_load_dword s22, s[8:11], 0x46 ; C20B0946 s_buffer_load_dword s2, s[8:11], 0x0 ; C2010900 s_buffer_load_dword s3, s[8:11], 0x1 ; C2018901 s_buffer_load_dword s1, s[8:11], 0x2 ; C2008902 s_buffer_load_dword s13, s[8:11], 0x4 ; C2068904 s_buffer_load_dword s14, s[8:11], 0x5 ; C2070905 s_buffer_load_dword s15, s[8:11], 0x6 ; C2078906 s_buffer_load_dword s16, s[8:11], 0x7 ; C2080907 s_buffer_load_dword s17, s[8:11], 0x8 ; C2088908 s_buffer_load_dword s18, s[8:11], 0x9 ; C2090909 s_buffer_load_dword s19, s[8:11], 0xa ; C209890A s_buffer_load_dword s20, s[8:11], 0xb ; C20A090B s_buffer_load_dword s21, s[8:11], 0xc ; C20A890C s_buffer_load_dword s24, s[8:11], 0xd ; C20C090D s_buffer_load_dword s25, s[8:11], 0xe ; C20C890E v_sub_f32_e64 v16, 1.0, s0 ; D2080010 000000F2 v_log_f32_e32 v16, v16 ; 7E204F10 v_mul_legacy_f32_e32 v16, 0x3f400000, v16 ; 0E2020FF 3F400000 v_exp_f32_e32 v16, v16 ; 7E204B10 v_mul_f32_e32 v35, 0x40e00000, v16 ; 104620FF 40E00000 v_cubeid_f32 v19, v32, v33, v34 ; D2880013 048A4320 v_cubema_f32 v18, v32, v33, v34 ; D28E0012 048A4320 s_load_dwordx4 s[36:39], s[4:5], 0x0 ; C0920500 s_load_dwordx8 s[40:47], s[6:7], 0x0 ; C0D40700 v_cubesc_f32 v17, v32, v33, v34 ; D28A0011 048A4320 v_cubetc_f32 v16, v32, v33, v34 ; D28C0010 048A4320 v_rcp_f32_e64 v18, |v18| ; D3540112 00000112 v_mov_b32_e32 v32, 0x3fc00000 ; 7E4002FF 3FC00000 v_mad_f32 v33, v18, v16, v32 ; D2820021 04822112 v_mac_f32_e32 v32, v18, v17 ; 3E402312 v_mov_b32_e32 v34, v19 ; 7E440313 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[32:35], 15, 0, 0, 0, 0, 0, 0, 0, v[32:35], s[40:47], s[36:39] ; F0900F00 012A2020 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v16, v35 ; 7E204F23 s_buffer_load_dword s27, s[8:11], 0xf ; C20D890F s_buffer_load_dword s30, s[8:11], 0x60 ; C20F0960 v_mul_legacy_f32_e32 v16, s33, v16 ; 0E202021 v_exp_f32_e32 v16, v16 ; 7E204B10 v_mul_f32_e32 v16, s32, v16 ; 10202020 v_mul_f32_e32 v19, v32, v16 ; 10262120 v_mul_f32_e32 v18, v33, v16 ; 10242121 v_mul_f32_e32 v17, v34, v16 ; 10222122 v_mov_b32_e32 v16, s31 ; 7E20021F v_mov_b32_e32 v21, 0x3f7fff58 ; 7E2A02FF 3F7FFF58 v_cmp_lt_f32_e32 vcc, s12, v21 ; 7C022A0C s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[32:33], vcc ; BEA0246A s_xor_b64 s[32:33], exec, s[32:33] ; 89A0207E s_cbranch_execz BB0_6 ; BF880000 s_buffer_load_dword s35, s[8:11], 0x3b ; C211893B s_buffer_load_dword s31, s[8:11], 0x3c ; C20F893C s_buffer_load_dword s34, s[8:11], 0x3d ; C211093D s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_lt_f32_e64 s[36:37], 0, s35 ; D0020024 00004680 s_and_saveexec_b64 s[36:37], s[36:37] ; BEA42424 s_xor_b64 s[36:37], exec, s[36:37] ; 89A4247E s_cbranch_execz BB0_7 ; BF880000 s_buffer_load_dword s35, s[8:11], 0x36 ; C2118936 s_buffer_load_dword s38, s[8:11], 0x38 ; C2130938 s_buffer_load_dword s39, s[8:11], 0x39 ; C2138939 s_buffer_load_dword s40, s[8:11], 0x3a ; C214093A s_buffer_load_dword s41, s[8:11], 0x30 ; C2148930 s_buffer_load_dword s42, s[8:11], 0x31 ; C2150931 s_buffer_load_dword s43, s[8:11], 0x32 ; C2158932 s_buffer_load_dword s44, s[8:11], 0x34 ; C2160934 s_buffer_load_dword s45, s[8:11], 0x35 ; C2168935 v_mul_f32_e32 v21, v27, v27 ; 102A371B v_mac_f32_e32 v21, v28, v28 ; 3E2A391C v_mac_f32_e32 v21, v29, v29 ; 3E2A3B1D v_rsq_clamp_f32_e32 v21, v21 ; 7E2A5915 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v22, s35, v31 ; 082C3E23 v_mov_b32_e32 v32, s35 ; 7E400223 v_sub_f32_e32 v33, s41, v30 ; 08423C29 v_sub_f32_e32 v34, s42, v26 ; 0844342A v_add_f32_e32 v32, s43, v32 ; 0640402B v_sub_f32_e32 v35, s43, v31 ; 08463E2B v_mad_f32 v36, 0.5, v32, -s40 ; D2820024 80A240F0 v_add_f32_e32 v31, v31, v36 ; 063E491F v_mul_f32_e32 v27, v21, v27 ; 10363715 v_mul_f32_e32 v28, v21, v28 ; 10383915 v_mul_f32_e32 v21, v21, v29 ; 102A3B15 v_rcp_f32_e32 v29, v27 ; 7E3A551B v_rcp_f32_e32 v36, v28 ; 7E48551C v_rcp_f32_e32 v37, v21 ; 7E4A5515 v_sub_f32_e32 v38, s44, v30 ; 084C3C2C v_mov_b32_e32 v39, s44 ; 7E4E022C v_add_f32_e32 v39, s41, v39 ; 064E4E29 v_mul_f32_e32 v33, v29, v33 ; 1042431D v_mul_f32_e32 v29, v29, v38 ; 103A4D1D v_mul_f32_e32 v34, v36, v34 ; 10444524 v_mul_f32_e32 v35, v37, v35 ; 10464725 v_mul_f32_e32 v22, v37, v22 ; 102C2D25 v_mad_f32 v37, 0.5, v39, -s38 ; D2820025 809A4EF0 v_add_f32_e32 v30, v30, v37 ; 063C4B1E v_sub_f32_e32 v37, s45, v26 ; 084A342D v_mov_b32_e32 v38, s45 ; 7E4C022D v_mul_f32_e32 v36, v36, v37 ; 10484B24 v_add_f32_e32 v37, s42, v38 ; 064A4C2A v_cmp_lt_f32_e32 vcc, 0, v27 ; 7C023680 v_cndmask_b32_e32 v29, v29, v33 ; 003A431D v_cmp_lt_f32_e32 vcc, 0, v28 ; 7C023880 v_cndmask_b32_e32 v33, v36, v34 ; 00424524 v_cmp_lt_f32_e32 vcc, 0, v21 ; 7C022A80 v_cndmask_b32_e32 v22, v22, v35 ; 002C4716 v_min3_f32 v22, v29, v33, v22 ; D2A20016 045A431D v_mad_f32 v29, 0.5, v37, -s39 ; D282001D 809E4AF0 v_add_f32_e32 v26, v26, v29 ; 06343B1A v_mac_f32_e32 v30, v22, v27 ; 3E3C3716 v_mac_f32_e32 v26, v22, v28 ; 3E343916 v_mac_f32_e32 v31, v22, v21 ; 3E3E2B16 v_mad_f32 v27, 0.5, -v39, v30 ; D282001B 447A4EF0 v_mad_f32 v28, 0.5, -v37, v26 ; D282001C 446A4AF0 v_mad_f32 v29, 0.5, -v32, v31 ; D282001D 447E40F0 s_or_b64 exec, exec, s[36:37] ; 88FE247E v_sub_f32_e64 v21, 1.0, s0 ; D2080015 000000F2 v_log_f32_e32 v21, v21 ; 7E2A4F15 s_load_dwordx4 s[36:39], s[4:5], 0x4 ; C0920504 v_mul_legacy_f32_e32 v21, 0x3f400000, v21 ; 0E2A2AFF 3F400000 v_exp_f32_e32 v21, v21 ; 7E2A4B15 v_mul_f32_e32 v30, 0x40e00000, v21 ; 103C2AFF 40E00000 v_cubeid_f32 v34, v27, v28, v29 ; D2880022 0476391B v_cubema_f32 v33, v27, v28, v29 ; D28E0021 0476391B s_load_dwordx8 s[40:47], s[6:7], 0x8 ; C0D40708 v_cubesc_f32 v32, v27, v28, v29 ; D28A0020 0476391B v_cubetc_f32 v31, v27, v28, v29 ; D28C001F 0476391B v_rcp_f32_e64 v21, |v33| ; D3540115 00000121 v_mov_b32_e32 v27, 0x3fc00000 ; 7E3602FF 3FC00000 v_mad_f32 v28, v21, v31, v27 ; D282001C 046E3F15 v_mac_f32_e32 v27, v21, v32 ; 3E364115 v_mov_b32_e32 v29, v34 ; 7E3A0322 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[26:29], 15, 0, 0, 0, 0, 0, 0, 0, v[27:30], s[40:47], s[36:39] ; F0900F00 012A1A1B s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v21, v29 ; 7E2A4F1D v_sub_f32_e64 v22, 1.0, s12 ; D2080016 000018F2 v_mul_legacy_f32_e32 v21, s34, v21 ; 0E2A2A22 v_exp_f32_e32 v21, v21 ; 7E2A4B15 v_mul_f32_e32 v21, s31, v21 ; 102A2A1F v_mul_f32_e32 v26, v26, v21 ; 10342B1A v_mul_f32_e32 v27, v27, v21 ; 10362B1B v_mul_f32_e32 v21, v28, v21 ; 102A2B1C v_mul_f32_e32 v26, v26, v22 ; 10342D1A v_mul_f32_e32 v27, v27, v22 ; 10362D1B v_mul_f32_e32 v21, v21, v22 ; 102A2D15 v_mac_f32_e32 v26, s12, v19 ; 3E34260C v_mac_f32_e32 v27, s12, v18 ; 3E36240C v_mac_f32_e32 v21, s12, v17 ; 3E2A220C v_mov_b32_e32 v17, v21 ; 7E220315 v_mov_b32_e32 v18, v27 ; 7E24031B v_mov_b32_e32 v19, v26 ; 7E26031A s_or_b64 exec, exec, s[32:33] ; 88FE207E v_mad_f32 v27, -v16, s29, s29 ; D282001B 20743B10 v_mov_b32_e32 v16, s28 ; 7E20021C v_mul_f32_e32 v26, v27, v23 ; 10342F1B v_mul_f32_e32 v22, v27, v24 ; 102C311B v_mul_f32_e32 v21, v27, v25 ; 102A331B v_mul_f32_e32 v23, s26, v15 ; 102E1E1A v_sub_f32_e64 v25, 1.0, s30 ; D2080019 00003CF2 v_mac_f32_e32 v25, s30, v20 ; 3E32281E v_mul_f32_e32 v20, s23, v15 ; 10281E17 v_mul_f32_e32 v15, s22, v15 ; 101E1E16 s_buffer_load_dword s23, s[8:11], 0x10 ; C20B8910 s_buffer_load_dword s22, s[8:11], 0x11 ; C20B0911 s_buffer_load_dword s12, s[8:11], 0x12 ; C2060912 s_buffer_load_dword s40, s[8:11], 0x16 ; C2140916 s_buffer_load_dword s29, s[8:11], 0x48 ; C20E8948 s_buffer_load_dword s30, s[8:11], 0x49 ; C20F0949 s_buffer_load_dword s31, s[8:11], 0x4b ; C20F894B s_buffer_load_dword s28, s[8:11], 0x64 ; C20E0964 s_buffer_load_dword s26, s[8:11], 0x65 ; C20D0965 s_buffer_load_dword s8, s[8:11], 0x66 ; C2040966 v_mul_f32_e32 v24, s14, v5 ; 10300A0E v_mac_f32_e32 v24, s13, v6 ; 3E300C0D v_mac_f32_e32 v24, s15, v4 ; 3E30080F v_add_f32_e32 v24, s16, v24 ; 06303010 v_mul_f32_e32 v28, s18, v5 ; 10380A12 v_mac_f32_e32 v28, s17, v6 ; 3E380C11 v_mac_f32_e32 v28, s19, v4 ; 3E380813 v_add_f32_e32 v28, s20, v28 ; 06383814 v_mul_f32_e32 v29, s24, v5 ; 103A0A18 v_mac_f32_e32 v29, s21, v6 ; 3E3A0C15 v_mac_f32_e32 v29, s25, v4 ; 3E3A0819 v_add_f32_e32 v29, s27, v29 ; 063A3A1B v_add_f32_e32 v7, v24, v7 ; 060E0F18 s_load_dwordx4 s[16:19], s[4:5], 0x10 ; C0880510 s_load_dwordx8 s[32:39], s[6:7], 0x20 ; C0D00720 v_add_f32_e32 v9, v28, v9 ; 0612131C v_add_f32_e32 v28, v29, v10 ; 0638151D v_mul_f32_e32 v10, s2, v6 ; 10140C02 v_mac_f32_e32 v10, s3, v5 ; 3E140A03 v_mac_f32_e32 v10, s1, v4 ; 3E140801 v_max_f32_e32 v24, 0, v10 ; 20301480 v_mul_f32_e32 v10, v25, v7 ; 10140F19 v_mul_f32_e32 v9, v25, v9 ; 10121319 v_mul_f32_e32 v7, v25, v28 ; 100E3919 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v16, s40, v11 ; 3E201628 v_mul_f32_e32 v11, v25, v19 ; 10162719 v_mul_f32_e32 v18, v25, v18 ; 10242519 v_mul_f32_e32 v17, v25, v17 ; 10222319 v_sub_f32_e32 v19, 1.0, v27 ; 082636F2 v_add_f32_e32 v19, s0, v19 ; 06262600 v_add_f32_e64 v19, 0, v19 clamp ; D2060813 00022680 v_sub_f32_e32 v25, s2, v13 ; 08321A02 v_sub_f32_e32 v27, s3, v12 ; 08361803 v_mul_f32_e32 v28, v25, v25 ; 10383319 v_mac_f32_e32 v28, v27, v27 ; 3E38371B v_sub_f32_e32 v29, s1, v14 ; 083A1C01 v_mac_f32_e32 v28, v29, v29 ; 3E383B1D v_rsq_clamp_f32_e32 v28, v28 ; 7E38591C v_mul_f32_e32 v25, v28, v25 ; 1032331C v_mul_f32_e32 v27, v28, v27 ; 1036371C v_mul_f32_e32 v28, v28, v29 ; 10383B1C v_mul_f32_e32 v13, v13, v6 ; 101A0D0D v_mad_f32 v12, -v12, v5, -v13 ; D282000C A4360B0C v_mad_f32 v12, -v14, v4, v12 ; D282000C 2432090E v_mul_f32_e32 v6, v25, v6 ; 100C0D19 v_mac_f32_e32 v6, v27, v5 ; 3E0C0B1B v_mul_f32_e32 v5, s2, v25 ; 100A3202 v_mac_f32_e32 v5, s3, v27 ; 3E0A3603 v_mac_f32_e32 v6, v28, v4 ; 3E0C091C v_mac_f32_e32 v5, s1, v28 ; 3E0A3801 v_max_f32_e32 v4, 0, v5 ; 20080A80 v_sub_f32_e32 v5, 1.0, v4 ; 080A08F2 v_mul_f32_e32 v13, v5, v5 ; 101A0B05 v_mul_f32_e32 v5, v5, v13 ; 100A1B05 v_mul_f32_e32 v5, v5, v13 ; 100A1B05 v_max_f32_e32 v12, 0, v12 ; 20181880 v_sub_f32_e32 v13, 1.0, v12 ; 081A18F2 v_mul_f32_e32 v14, v13, v13 ; 101C1B0D v_mul_f32_e32 v25, v13, v14 ; 10321D0D v_mad_f32 v27, -v14, v25, 1.0 ; D282001B 23CA330E v_mul_f32_e32 v28, v8, v27 ; 10383708 v_sub_f32_e32 v29, 1.0, v8 ; 083A10F2 v_mac_f32_e32 v8, v5, v29 ; 3E103B05 v_mul_f32_e32 v29, v1, v27 ; 103A3701 v_sub_f32_e32 v30, 1.0, v1 ; 083C02F2 v_mac_f32_e32 v1, v5, v30 ; 3E023D05 v_mul_f32_e32 v27, v0, v27 ; 10363700 v_sub_f32_e32 v30, 1.0, v0 ; 083C00F2 v_mac_f32_e32 v0, v5, v30 ; 3E003D05 v_sub_f32_e64 v5, 1.0, s0 ; D2080005 000000F2 v_sub_f32_e32 v30, 1.0, v5 ; 083C0AF2 v_mov_b32_e32 v31, 0x3cf5c28f ; 7E3E02FF 3CF5C28F v_madmk_f32_e32 v30, v30, v31, 0x3f77ced9 ; 403C3F1E 3F77CED9 v_add_f32_e32 v31, v4, v4 ; 063E0904 v_mul_f32_e32 v4, v5, v4 ; 10080905 v_mad_f32 v4, v31, v4, 0.5 ; D2820004 03C2091F v_mul_f32_e32 v14, v25, v14 ; 101C1D19 v_mac_f32_e32 v28, v19, v14 ; 3E381D13 v_mac_f32_e32 v29, v19, v14 ; 3E3A1D13 v_mac_f32_e32 v27, v19, v14 ; 3E361D13 v_mul_f32_e32 v5, v5, v5 ; 100A0B05 v_log_f32_e32 v19, v30 ; 7E264F1E v_mul_f32_e32 v5, s31, v5 ; 100A0A1F v_mul_f32_e32 v13, v5, v13 ; 101A1B05 v_mac_f32_e32 v13, 1.0, v12 ; 3E1A18F2 v_rcp_f32_e32 v12, v19 ; 7E185513 v_sub_f32_e32 v19, 1.0, v24 ; 082630F2 v_mul_f32_e32 v5, v5, v19 ; 100A2705 v_mac_f32_e32 v5, 1.0, v24 ; 3E0A30F2 v_max_f32_e32 v6, 0, v6 ; 200C0C80 v_log_f32_e32 v6, v6 ; 7E0C4F06 v_madak_f32_e32 v5, v5, v13, 0x38d1b717 ; 420A1B05 38D1B717 v_mul_f32_e32 v12, 0x41200000, v12 ; 101818FF 41200000 v_mul_f32_e32 v13, v12, v12 ; 101A190C v_mul_legacy_f32_e32 v6, v13, v6 ; 0E0C0D0D v_rcp_f32_e32 v5, v5 ; 7E0A5505 v_mad_f32 v12, v12, v12, 1.0 ; D282000C 03CA190C v_mul_f32_e32 v12, s30, v12 ; 1018181E v_exp_f32_e32 v6, v6 ; 7E0C4B06 v_mul_f32_e32 v6, v12, v6 ; 100C0D0C v_mul_f32_e32 v5, v6, v5 ; 100A0B06 v_mul_f32_e32 v6, v19, v19 ; 100C2713 v_mul_f32_e32 v12, v19, v6 ; 10180D13 v_mul_f32_e32 v6, v12, v6 ; 100C0D0C v_add_f32_e32 v4, -1.0, v4 ; 060808F3 v_mad_f32 v6, v4, v6, 1.0 ; D2820006 03CA0D04 v_mad_f32 v4, v4, v14, 1.0 ; D2820004 03CA1D04 v_mul_f32_e32 v4, v4, v6 ; 10080D04 v_mul_f32_e32 v5, v24, v5 ; 100A0B18 v_mul_f32_e32 v4, v24, v4 ; 10080918 v_mac_f32_e32 v10, v4, v23 ; 3E142F04 v_mul_f32_e32 v6, v10, v26 ; 100C350A v_mul_f32_e32 v5, s29, v5 ; 100A0A1D v_max_f32_e32 v5, 0, v5 ; 200A0A80 v_mul_f32_e32 v10, v23, v5 ; 10140B17 v_mac_f32_e32 v6, v8, v10 ; 3E0C1508 image_sample v[12:14], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[32:39], s[16:19] ; F0800700 00880C02 v_mac_f32_e32 v9, v4, v20 ; 3E122904 v_mul_f32_e32 v2, v9, v22 ; 10042D09 v_mul_f32_e32 v3, v20, v5 ; 10060B14 v_mac_f32_e32 v2, v1, v3 ; 3E040701 v_mac_f32_e32 v7, v4, v15 ; 3E0E1F04 v_mul_f32_e32 v1, v15, v5 ; 10020B0F v_mul_f32_e32 v3, v7, v21 ; 10062B07 v_mac_f32_e32 v3, v0, v1 ; 3E060300 v_mac_f32_e32 v6, v28, v11 ; 3E0C171C v_mac_f32_e32 v2, v29, v18 ; 3E04251D v_mac_f32_e32 v3, v27, v17 ; 3E06231B s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v6, s28, v12 ; 3E0C181C v_mac_f32_e32 v2, s26, v13 ; 3E041A1A v_mac_f32_e32 v3, s8, v14 ; 3E061C08 v_add_f32_e64 v0, 0, v16 clamp ; D2060800 00022080 v_sub_f32_e32 v1, 1.0, v0 ; 080200F2 v_mul_f32_e32 v4, s23, v1 ; 10080217 v_mac_f32_e32 v4, v6, v0 ; 3E080106 v_mul_f32_e32 v5, s22, v1 ; 100A0216 v_mac_f32_e32 v5, v2, v0 ; 3E0A0102 v_mul_f32_e32 v1, s12, v1 ; 1002020C v_mac_f32_e32 v1, v3, v0 ; 3E020103 v_cvt_pkrtz_f16_f32_e32 v0, v4, v5 ; 5E000B04 v_cvt_pkrtz_f16_f32_e64 v1, v1, 1.0 ; D25E0001 0001E501 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 64 VGPRS: 40 Code Size: 2264 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL OUT[5], GENERIC[4] DCL OUT[6], GENERIC[5] DCL CONST[0..20] DCL TEMP[0..8], LOCAL IMM[0] FLT32 { 0.0000, 0.5000, 0.0000, 0.0000} 0: MUL TEMP[0], CONST[6], IN[0].xxxx 1: MAD TEMP[0], CONST[7], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[8], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0].xyz, CONST[9], IN[0].wwww, TEMP[0] 4: MUL TEMP[1], CONST[17], IN[0].xxxx 5: MAD TEMP[1], CONST[18], IN[0].yyyy, TEMP[1] 6: MAD TEMP[1], CONST[19], IN[0].zzzz, TEMP[1] 7: MAD TEMP[1], CONST[20], IN[0].wwww, TEMP[1] 8: MAD TEMP[2].xy, IN[2].xyyy, CONST[14].xyyy, CONST[14].zwww 9: FSEQ TEMP[3].x, CONST[16].xxxx, IMM[0].xxxx 10: UIF TEMP[3].xxxx :0 11: MOV TEMP[3].xy, IN[2].xyxx 12: ELSE :0 13: MOV TEMP[3].xy, IN[3].xyxx 14: ENDIF 15: MAD TEMP[3].xy, TEMP[3].xyyy, CONST[15].xyyy, CONST[15].zwww 16: MOV TEMP[2].zw, TEMP[3].yyxy 17: MOV TEMP[3].x, CONST[10].xxxx 18: MOV TEMP[3].y, CONST[11].xxxx 19: MOV TEMP[3].z, CONST[12].xxxx 20: MOV TEMP[4].x, CONST[10].yyyy 21: MOV TEMP[4].y, CONST[11].yyyy 22: MOV TEMP[4].z, CONST[12].yyyy 23: MOV TEMP[5].x, CONST[10].zzzz 24: MOV TEMP[5].y, CONST[11].zzzz 25: MOV TEMP[5].z, CONST[12].zzzz 26: MUL TEMP[3].xyz, TEMP[3].xyzz, IN[1].xxxx 27: MAD TEMP[3].xyz, TEMP[4].xyzz, IN[1].yyyy, TEMP[3].xyzz 28: MAD TEMP[3].xyz, TEMP[5].xyzz, IN[1].zzzz, TEMP[3].xyzz 29: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz 30: RSQ TEMP[4].x, TEMP[4].xxxx 31: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx 32: MOV TEMP[4].xyz, TEMP[3].xyzx 33: MUL TEMP[5].xyw, TEMP[1], IMM[0].yyyy 34: MOV TEMP[6].x, TEMP[5].xxxx 35: MUL TEMP[7].x, TEMP[5].yyyy, CONST[1].xxxx 36: MOV TEMP[6].y, TEMP[7].xxxx 37: ADD TEMP[5].xy, TEMP[6].xyyy, TEMP[5].wwww 38: MOV TEMP[5].zw, TEMP[1].wwzw 39: MUL TEMP[6], TEMP[3].xyzz, TEMP[3].yzzx 40: DP4 TEMP[7].x, CONST[2], TEMP[6] 41: DP4 TEMP[8].x, CONST[3], TEMP[6] 42: MOV TEMP[7].y, TEMP[8].xxxx 43: DP4 TEMP[6].x, CONST[4], TEMP[6] 44: MOV TEMP[7].z, TEMP[6].xxxx 45: MUL TEMP[6].x, TEMP[3].yyyy, TEMP[3].yyyy 46: MAD TEMP[3].x, TEMP[3].xxxx, TEMP[3].xxxx, -TEMP[6].xxxx 47: MAD TEMP[3].xyz, CONST[5].xyzz, TEMP[3].xxxx, TEMP[7].xyzz 48: ADD TEMP[6].xyz, TEMP[0].xyzz, -CONST[0].xyzz 49: MOV TEMP[6].yzw, TEMP[6].yxyz 50: MOV TEMP[6].x, TEMP[1].zzzz 51: MOV TEMP[0].xyz, TEMP[0].xyzx 52: MOV OUT[6], TEMP[0] 53: MOV OUT[1], TEMP[2] 54: MOV OUT[2], TEMP[4] 55: MOV OUT[3], TEMP[3] 56: MOV OUT[4], TEMP[5] 57: MOV OUT[0], TEMP[1] 58: MOV OUT[5], TEMP[6] 59: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248) %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 252) %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256) %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272) %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276) %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280) %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284) %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288) %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292) %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296) %72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300) %73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304) %74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308) %75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312) %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316) %77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320) %78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 324) %79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 328) %80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 332) %81 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %82 = load <16 x i8>, <16 x i8> addrspace(2)* %81, align 16, !tbaa !0 %83 = add i32 %5, %7 %84 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %82, i32 0, i32 %83) %85 = extractelement <4 x float> %84, i32 0 %86 = extractelement <4 x float> %84, i32 1 %87 = extractelement <4 x float> %84, i32 2 %88 = extractelement <4 x float> %84, i32 3 %89 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %90 = load <16 x i8>, <16 x i8> addrspace(2)* %89, align 16, !tbaa !0 %91 = add i32 %5, %7 %92 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %90, i32 0, i32 %91) %93 = extractelement <4 x float> %92, i32 0 %94 = extractelement <4 x float> %92, i32 1 %95 = extractelement <4 x float> %92, i32 2 %96 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %97 = load <16 x i8>, <16 x i8> addrspace(2)* %96, align 16, !tbaa !0 %98 = add i32 %5, %7 %99 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %97, i32 0, i32 %98) %100 = extractelement <4 x float> %99, i32 0 %101 = extractelement <4 x float> %99, i32 1 %102 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %103 = load <16 x i8>, <16 x i8> addrspace(2)* %102, align 16, !tbaa !0 %104 = add i32 %5, %7 %105 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %103, i32 0, i32 %104) %106 = extractelement <4 x float> %105, i32 0 %107 = extractelement <4 x float> %105, i32 1 %108 = fmul float %32, %85 %109 = fmul float %33, %85 %110 = fmul float %34, %85 %111 = fmul float %35, %85 %112 = fmul float %36, %86 %113 = fadd float %112, %108 %114 = fmul float %37, %86 %115 = fadd float %114, %109 %116 = fmul float %38, %86 %117 = fadd float %116, %110 %118 = fmul float %39, %86 %119 = fadd float %118, %111 %120 = fmul float %40, %87 %121 = fadd float %120, %113 %122 = fmul float %41, %87 %123 = fadd float %122, %115 %124 = fmul float %42, %87 %125 = fadd float %124, %117 %126 = fmul float %43, %87 %127 = fadd float %126, %119 %128 = fmul float %44, %88 %129 = fadd float %128, %121 %130 = fmul float %45, %88 %131 = fadd float %130, %123 %132 = fmul float %46, %88 %133 = fadd float %132, %125 %134 = fmul float %65, %85 %135 = fmul float %66, %85 %136 = fmul float %67, %85 %137 = fmul float %68, %85 %138 = fmul float %69, %86 %139 = fadd float %138, %134 %140 = fmul float %70, %86 %141 = fadd float %140, %135 %142 = fmul float %71, %86 %143 = fadd float %142, %136 %144 = fmul float %72, %86 %145 = fadd float %144, %137 %146 = fmul float %73, %87 %147 = fadd float %146, %139 %148 = fmul float %74, %87 %149 = fadd float %148, %141 %150 = fmul float %75, %87 %151 = fadd float %150, %143 %152 = fmul float %76, %87 %153 = fadd float %152, %145 %154 = fmul float %77, %88 %155 = fadd float %154, %147 %156 = fmul float %78, %88 %157 = fadd float %156, %149 %158 = fmul float %79, %88 %159 = fadd float %158, %151 %160 = fmul float %80, %88 %161 = fadd float %160, %153 %162 = fmul float %100, %56 %163 = fadd float %162, %58 %164 = fmul float %101, %57 %165 = fadd float %164, %59 %166 = fcmp oeq float %64, 0.000000e+00 %. = select i1 %166, float %100, float %106 %.36 = select i1 %166, float %101, float %107 %167 = fmul float %., %60 %168 = fadd float %167, %62 %169 = fmul float %.36, %61 %170 = fadd float %169, %63 %171 = fmul float %47, %93 %172 = fmul float %50, %93 %173 = fmul float %53, %93 %174 = fmul float %48, %94 %175 = fadd float %174, %171 %176 = fmul float %51, %94 %177 = fadd float %176, %172 %178 = fmul float %54, %94 %179 = fadd float %178, %173 %180 = fmul float %49, %95 %181 = fadd float %180, %175 %182 = fmul float %52, %95 %183 = fadd float %182, %177 %184 = fmul float %55, %95 %185 = fadd float %184, %179 %186 = fmul float %181, %181 %187 = fmul float %183, %183 %188 = fadd float %187, %186 %189 = fmul float %185, %185 %190 = fadd float %188, %189 %191 = call float @llvm.AMDGPU.rsq.clamped.f32(float %190) %192 = fmul float %181, %191 %193 = fmul float %183, %191 %194 = fmul float %185, %191 %195 = fmul float %155, 5.000000e-01 %196 = fmul float %157, 5.000000e-01 %197 = fmul float %161, 5.000000e-01 %198 = fmul float %196, %16 %199 = fadd float %195, %197 %200 = fadd float %198, %197 %201 = fmul float %192, %193 %202 = fmul float %193, %194 %203 = fmul float %194, %194 %204 = fmul float %194, %192 %205 = fmul float %17, %201 %206 = fmul float %18, %202 %207 = fadd float %205, %206 %208 = fmul float %19, %203 %209 = fadd float %207, %208 %210 = fmul float %20, %204 %211 = fadd float %209, %210 %212 = fmul float %21, %201 %213 = fmul float %22, %202 %214 = fadd float %212, %213 %215 = fmul float %23, %203 %216 = fadd float %214, %215 %217 = fmul float %24, %204 %218 = fadd float %216, %217 %219 = fmul float %25, %201 %220 = fmul float %26, %202 %221 = fadd float %219, %220 %222 = fmul float %27, %203 %223 = fadd float %221, %222 %224 = fmul float %28, %204 %225 = fadd float %223, %224 %226 = fmul float %193, %193 %227 = fmul float %192, %192 %228 = fsub float %227, %226 %229 = fmul float %29, %228 %230 = fadd float %229, %211 %231 = fmul float %30, %228 %232 = fadd float %231, %218 %233 = fmul float %31, %228 %234 = fadd float %233, %225 %235 = fsub float %129, %13 %236 = fsub float %131, %14 %237 = fsub float %133, %15 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %163, float %165, float %168, float %170) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %192, float %193, float %194, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %230, float %232, float %234, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %199, float %200, float %159, float %161) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %159, float %235, float %236, float %237) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %129, float %131, float %133, float %127) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %155, float %157, float %159, float %161) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0 ; 7E020280 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[20:23], s[2:3], 0x0 ; C08A0300 s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 s_load_dwordx4 s[12:15], s[8:9], 0x8 ; C0860908 s_load_dwordx4 s[8:11], s[8:9], 0xc ; C084090C s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s19, s[20:23], 0x23 ; C2099523 buffer_load_format_xyzw v[2:5], v0, s[0:3], 0 idxen ; E00C2000 80000200 buffer_load_format_xyzw v[6:9], v0, s[4:7], 0 idxen ; E00C2000 80010600 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[9:12], v0, s[12:15], 0 idxen ; E00C2000 80030900 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[11:14], v0, s[8:11], 0 idxen ; E00C2000 80020B00 s_buffer_load_dword s24, s[20:23], 0x24 ; C20C1524 s_buffer_load_dword s25, s[20:23], 0x25 ; C20C9525 s_buffer_load_dword s26, s[20:23], 0x26 ; C20D1526 s_buffer_load_dword s27, s[20:23], 0x28 ; C20D9528 s_buffer_load_dword s28, s[20:23], 0x29 ; C20E1529 s_buffer_load_dword s29, s[20:23], 0x2a ; C20E952A s_buffer_load_dword s30, s[20:23], 0x2c ; C20F152C s_buffer_load_dword s31, s[20:23], 0x2d ; C20F952D s_buffer_load_dword s32, s[20:23], 0x2e ; C210152E s_buffer_load_dword s33, s[20:23], 0x30 ; C2109530 s_buffer_load_dword s34, s[20:23], 0x31 ; C2111531 s_buffer_load_dword s35, s[20:23], 0x32 ; C2119532 s_buffer_load_dword s36, s[20:23], 0x38 ; C2121538 s_buffer_load_dword s37, s[20:23], 0x39 ; C2129539 s_buffer_load_dword s11, s[20:23], 0x9 ; C2059509 s_buffer_load_dword s5, s[20:23], 0xa ; C202950A s_buffer_load_dword s3, s[20:23], 0xb ; C201950B s_buffer_load_dword s9, s[20:23], 0xc ; C204950C s_buffer_load_dword s12, s[20:23], 0xd ; C206150D s_buffer_load_dword s7, s[20:23], 0xe ; C203950E s_buffer_load_dword s4, s[20:23], 0xf ; C202150F s_buffer_load_dword s10, s[20:23], 0x10 ; C2051510 s_buffer_load_dword s13, s[20:23], 0x11 ; C2069511 s_buffer_load_dword s8, s[20:23], 0x12 ; C2041512 s_buffer_load_dword s0, s[20:23], 0x3f ; C200153F s_buffer_load_dword s1, s[20:23], 0x40 ; C2009540 s_buffer_load_dword s38, s[20:23], 0x44 ; C2131544 s_buffer_load_dword s39, s[20:23], 0x45 ; C2139545 s_buffer_load_dword s40, s[20:23], 0x46 ; C2141546 s_buffer_load_dword s41, s[20:23], 0x47 ; C2149547 s_buffer_load_dword s42, s[20:23], 0x48 ; C2151548 s_buffer_load_dword s43, s[20:23], 0x49 ; C2159549 s_buffer_load_dword s44, s[20:23], 0x4a ; C216154A s_buffer_load_dword s45, s[20:23], 0x4b ; C216954B s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v0, s0 ; 7E000200 s_buffer_load_dword s0, s[20:23], 0x0 ; C2001500 v_cmp_eq_f32_e64 vcc, 0, s1 ; D004006A 00000280 s_buffer_load_dword s1, s[20:23], 0x1 ; C2009501 s_buffer_load_dword s2, s[20:23], 0x2 ; C2011502 s_buffer_load_dword s6, s[20:23], 0x4 ; C2031504 s_buffer_load_dword s14, s[20:23], 0x8 ; C2071508 s_buffer_load_dword s46, s[20:23], 0x3a ; C217153A s_buffer_load_dword s47, s[20:23], 0x3b ; C217953B s_buffer_load_dword s48, s[20:23], 0x3c ; C218153C s_buffer_load_dword s49, s[20:23], 0x3d ; C218953D s_buffer_load_dword s50, s[20:23], 0x3e ; C219153E s_buffer_load_dword s18, s[20:23], 0x13 ; C2091513 s_buffer_load_dword s15, s[20:23], 0x14 ; C2079514 s_buffer_load_dword s16, s[20:23], 0x15 ; C2081515 s_buffer_load_dword s17, s[20:23], 0x16 ; C2089516 s_buffer_load_dword s51, s[20:23], 0x18 ; C2199518 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v13, s46 ; 7E1A022E s_buffer_load_dword s46, s[20:23], 0x19 ; C2171519 s_buffer_load_dword s52, s[20:23], 0x1a ; C21A151A s_buffer_load_dword s53, s[20:23], 0x1b ; C21A951B s_buffer_load_dword s54, s[20:23], 0x1c ; C21B151C s_buffer_load_dword s55, s[20:23], 0x1d ; C21B951D s_buffer_load_dword s56, s[20:23], 0x1e ; C21C151E s_buffer_load_dword s57, s[20:23], 0x1f ; C21C951F s_buffer_load_dword s58, s[20:23], 0x20 ; C21D1520 s_buffer_load_dword s59, s[20:23], 0x21 ; C21D9521 s_buffer_load_dword s60, s[20:23], 0x22 ; C21E1522 s_buffer_load_dword s61, s[20:23], 0x4c ; C21E954C s_buffer_load_dword s62, s[20:23], 0x4d ; C21F154D s_buffer_load_dword s63, s[20:23], 0x4e ; C21F954E s_buffer_load_dword s64, s[20:23], 0x4f ; C220154F s_buffer_load_dword s65, s[20:23], 0x50 ; C2209550 s_buffer_load_dword s66, s[20:23], 0x51 ; C2211551 s_buffer_load_dword s67, s[20:23], 0x52 ; C2219552 s_buffer_load_dword s20, s[20:23], 0x53 ; C20A1553 v_mac_f32_e32 v13, s36, v9 ; 3E1A1224 v_mov_b32_e32 v14, s47 ; 7E1C022F v_mul_f32_e32 v15, s51, v2 ; 101E0433 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v16, s46, v2 ; 1020042E v_mul_f32_e32 v17, s27, v6 ; 10220C1B v_mul_f32_e32 v18, s30, v6 ; 10240C1E v_mul_f32_e32 v6, s33, v6 ; 100C0C21 v_mac_f32_e32 v17, s28, v7 ; 3E220E1C v_mac_f32_e32 v18, s31, v7 ; 3E240E1F v_mac_f32_e32 v6, s34, v7 ; 3E0C0E22 v_mac_f32_e32 v17, s29, v8 ; 3E22101D v_mac_f32_e32 v18, s32, v8 ; 3E241020 v_mac_f32_e32 v6, s35, v8 ; 3E0C1023 v_mul_f32_e32 v7, s52, v2 ; 100E0434 v_mul_f32_e32 v8, s53, v2 ; 10100435 v_mul_f32_e32 v19, s38, v2 ; 10260426 v_mac_f32_e32 v14, s37, v10 ; 3E1C1425 v_cndmask_b32_e32 v9, v11, v9 ; 0012130B v_cndmask_b32_e32 v10, v12, v10 ; 0014150C v_mac_f32_e32 v15, s54, v3 ; 3E1E0636 v_mac_f32_e32 v16, s55, v3 ; 3E200637 v_mac_f32_e32 v7, s56, v3 ; 3E0E0638 v_mac_f32_e32 v8, s57, v3 ; 3E100639 v_mac_f32_e32 v19, s42, v3 ; 3E26062A v_mul_f32_e32 v11, s39, v2 ; 10160427 v_mac_f32_e32 v11, s43, v3 ; 3E16062B v_mul_f32_e32 v12, s40, v2 ; 10180428 v_mac_f32_e32 v12, s44, v3 ; 3E18062C v_mul_f32_e32 v2, s41, v2 ; 10040429 v_mac_f32_e32 v2, s45, v3 ; 3E04062D v_mac_f32_e32 v15, s58, v4 ; 3E1E083A v_mac_f32_e32 v16, s59, v4 ; 3E20083B v_mac_f32_e32 v7, s60, v4 ; 3E0E083C v_mac_f32_e32 v8, s19, v4 ; 3E100813 v_mac_f32_e32 v19, s61, v4 ; 3E26083D v_mac_f32_e32 v11, s62, v4 ; 3E16083E v_mac_f32_e32 v12, s63, v4 ; 3E18083F v_mac_f32_e32 v2, s64, v4 ; 3E040840 v_mac_f32_e32 v15, s24, v5 ; 3E1E0A18 v_mac_f32_e32 v16, s25, v5 ; 3E200A19 v_mac_f32_e32 v7, s26, v5 ; 3E0E0A1A v_mac_f32_e32 v19, s65, v5 ; 3E260A41 v_mac_f32_e32 v11, s66, v5 ; 3E160A42 v_mac_f32_e32 v12, s67, v5 ; 3E180A43 v_mac_f32_e32 v2, s20, v5 ; 3E040A14 v_mov_b32_e32 v3, s50 ; 7E060232 v_mul_f32_e32 v4, v17, v17 ; 10082311 v_mac_f32_e32 v4, v18, v18 ; 3E082512 v_mac_f32_e32 v4, v6, v6 ; 3E080D06 v_rsq_clamp_f32_e32 v4, v4 ; 7E085904 v_mac_f32_e32 v3, s48, v9 ; 3E061230 v_mac_f32_e32 v0, s49, v10 ; 3E001431 exp 15, 32, 0, 0, 0, v13, v14, v3, v0 ; F800020F 00030E0D s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, v4, v17 ; 10002304 v_mul_f32_e32 v3, v4, v18 ; 10062504 v_mul_f32_e32 v4, v4, v6 ; 10080D04 v_mul_f32_e32 v5, v4, v3 ; 100A0704 v_mul_f32_e32 v6, s11, v5 ; 100C0A0B v_mul_f32_e32 v9, s12, v5 ; 10120A0C v_mul_f32_e32 v5, s13, v5 ; 100A0A0D v_mul_f32_e32 v10, v3, v0 ; 10140103 v_mac_f32_e32 v6, s14, v10 ; 3E0C140E v_mac_f32_e32 v9, s9, v10 ; 3E121409 v_mac_f32_e32 v5, s10, v10 ; 3E0A140A v_mul_f32_e32 v10, v4, v4 ; 10140904 v_mac_f32_e32 v6, s5, v10 ; 3E0C1405 v_mac_f32_e32 v9, s7, v10 ; 3E121407 v_mac_f32_e32 v5, s8, v10 ; 3E0A1408 v_mul_f32_e32 v10, v0, v4 ; 10140900 v_mac_f32_e32 v6, s3, v10 ; 3E0C1403 v_mac_f32_e32 v9, s4, v10 ; 3E121404 v_mac_f32_e32 v5, s18, v10 ; 3E0A1412 exp 15, 33, 0, 0, 0, v0, v3, v4, v1 ; F800021F 01040300 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v3, v3, v3 ; 10060703 v_mad_f32 v0, v0, v0, -v3 ; D2820000 840E0100 v_mac_f32_e32 v6, s15, v0 ; 3E0C000F v_mac_f32_e32 v9, s16, v0 ; 3E120010 v_mac_f32_e32 v5, s17, v0 ; 3E0A0011 v_mul_f32_e32 v0, 0.5, v11 ; 100016F0 v_mul_f32_e32 v3, 0.5, v2 ; 100604F0 exp 15, 34, 0, 0, 0, v6, v9, v5, v1 ; F800022F 01050906 s_waitcnt expcnt(0) ; BF8C070F v_mad_f32 v1, 0.5, v19, v3 ; D2820001 040E26F0 v_mac_f32_e32 v3, s6, v0 ; 3E060006 exp 15, 35, 0, 0, 0, v1, v3, v12, v2 ; F800023F 020C0301 v_subrev_f32_e32 v0, s0, v15 ; 0A001E00 s_waitcnt expcnt(0) ; BF8C070F v_subrev_f32_e32 v1, s1, v16 ; 0A022001 v_subrev_f32_e32 v3, s2, v7 ; 0A060E02 exp 15, 36, 0, 0, 0, v12, v0, v1, v3 ; F800024F 0301000C exp 15, 37, 0, 0, 0, v15, v16, v7, v8 ; F800025F 0807100F exp 15, 12, 0, 1, 0, v19, v11, v12, v2 ; F80008CF 020C0B13 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 72 VGPRS: 20 Code Size: 788 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL IN[4], GENERIC[4], PERSPECTIVE DCL IN[5], GENERIC[5], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SVIEW[0], CUBE, FLOAT DCL SVIEW[1], CUBE, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL SVIEW[4], 2D, FLOAT DCL CONST[0..5] DCL CONST[8..19] DCL CONST[21..22] DCL CONST[24] DCL TEMP[0..18], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 2.0000, 0.5000} IMM[1] FLT32 { 0.7500, 7.0000, 1.0000, 10.0000} IMM[2] FLT32 { 0.9680, 0.0300, 0.0001, -1.0000} 0: DP3 TEMP[0].x, IN[1].xyzz, IN[1].xyzz 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MUL TEMP[0].xyz, IN[1].xyzz, TEMP[0].xxxx 3: DP3 TEMP[1].x, IN[4].yzww, IN[4].yzww 4: RSQ TEMP[1].x, TEMP[1].xxxx 5: MUL TEMP[1].xyz, IN[4].yzww, TEMP[1].xxxx 6: MOV TEMP[2].xy, IN[0].xyyy 7: TEX TEMP[2].xyz, TEMP[2], SAMP[2], 2D 8: MUL TEMP[2].xyz, CONST[19].xyzz, TEMP[2].xyzz 9: LRP TEMP[3].xyz, CONST[21].xxxx, TEMP[2].xyzz, CONST[16].xyzz 10: MUL TEMP[4].x, CONST[21].xxxx, CONST[16].wwww 11: ADD TEMP[4].x, CONST[16].wwww, -TEMP[4].xxxx 12: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[4].xxxx 13: MOV TEMP[5].xy, IN[0].xyyy 14: TEX TEMP[5].y, TEMP[5], SAMP[3], 2D 15: ADD TEMP[6].x, IMM[0].xxxx, -CONST[24].xxxx 16: MAD TEMP[5].x, TEMP[5].yyyy, CONST[24].xxxx, TEMP[6].xxxx 17: DP3 TEMP[6].x, TEMP[0].xyzz, CONST[0].xyzz 18: MAX TEMP[6].x, IMM[0].yyyy, TEMP[6].xxxx 19: MOV TEMP[7].xyz, IMM[0].yyyy 20: MOV TEMP[8].w, IMM[0].xxxx 21: MOV TEMP[8].xyz, TEMP[0].xyzx 22: DP4 TEMP[9].x, CONST[1], TEMP[8] 23: DP4 TEMP[10].x, CONST[2], TEMP[8] 24: MOV TEMP[9].y, TEMP[10].xxxx 25: DP4 TEMP[8].x, CONST[3], TEMP[8] 26: MOV TEMP[9].z, TEMP[8].xxxx 27: ADD TEMP[8].xyz, IN[2].xyzz, TEMP[9].xyzz 28: MOV TEMP[9].xy, IN[3].xyyy 29: MOV TEMP[9].w, IN[3].wwww 30: TXP TEMP[9].x, TEMP[9], SAMP[4], 2D 31: MUL TEMP[9].xyz, CONST[17].xyzz, TEMP[9].xxxx 32: MUL TEMP[8].xyz, TEMP[8].xyzz, TEMP[5].xxxx 33: DP3 TEMP[10].x, TEMP[0].xyzz, TEMP[1].xyzz 34: MUL TEMP[10].xyz, TEMP[10].xxxx, TEMP[0].xyzz 35: MUL TEMP[10].xyz, IMM[0].zzzz, TEMP[10].xyzz 36: ADD TEMP[10].xyz, TEMP[1].xyzz, -TEMP[10].xyzz 37: MOV TEMP[11].xyz, TEMP[10].xyzx 38: FSLT TEMP[12].x, IMM[0].yyyy, CONST[10].wwww 39: UIF TEMP[12].xxxx :0 40: DP3 TEMP[12].x, TEMP[10].xyzz, TEMP[10].xyzz 41: RSQ TEMP[12].x, TEMP[12].xxxx 42: MUL TEMP[12].xyz, TEMP[10].xyzz, TEMP[12].xxxx 43: MOV TEMP[13].xyz, -IN[5].xyzx 44: ADD TEMP[14].xyz, CONST[8].xyzz, TEMP[13].xyzz 45: RCP TEMP[15].x, TEMP[12].xxxx 46: RCP TEMP[15].y, TEMP[12].yyyy 47: RCP TEMP[15].z, TEMP[12].zzzz 48: MUL TEMP[14].xyz, TEMP[14].xyzz, TEMP[15].xyzz 49: ADD TEMP[13].xyz, CONST[9].xyzz, TEMP[13].xyzz 50: RCP TEMP[15].x, TEMP[12].xxxx 51: RCP TEMP[15].y, TEMP[12].yyyy 52: RCP TEMP[15].z, TEMP[12].zzzz 53: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[15].xyzz 54: FSLT TEMP[15].xyz, IMM[0].yyyy, TEMP[12].xyzz 55: UIF TEMP[15].xxxx :0 56: MOV TEMP[16].x, TEMP[14].xxxx 57: ELSE :0 58: MOV TEMP[16].x, TEMP[13].xxxx 59: ENDIF 60: UIF TEMP[15].yyyy :0 61: MOV TEMP[17].x, TEMP[14].yyyy 62: ELSE :0 63: MOV TEMP[17].x, TEMP[13].yyyy 64: ENDIF 65: UIF TEMP[15].zzzz :0 66: MOV TEMP[14].x, TEMP[14].zzzz 67: ELSE :0 68: MOV TEMP[14].x, TEMP[13].zzzz 69: ENDIF 70: ADD TEMP[13].xyz, CONST[8].xyzz, CONST[9].xyzz 71: MUL TEMP[13].xyz, TEMP[13].xyzz, IMM[0].wwww 72: MIN TEMP[15].x, TEMP[16].xxxx, TEMP[17].xxxx 73: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[14].xxxx 74: ADD TEMP[15].xyz, TEMP[13].xyzz, -CONST[10].xyzz 75: ADD TEMP[15].xyz, TEMP[15].xyzz, IN[5].xyzz 76: MAD TEMP[12].xyz, TEMP[12].xyzz, TEMP[14].xxxx, TEMP[15].xyzz 77: ADD TEMP[11].xyz, TEMP[12].xyzz, -TEMP[13].xyzz 78: ENDIF 79: ADD TEMP[12].x, IMM[0].xxxx, -CONST[22].xxxx 80: POW TEMP[12].x, TEMP[12].xxxx, IMM[1].xxxx 81: MUL TEMP[12].x, TEMP[12].xxxx, IMM[1].yyyy 82: MOV TEMP[11].xyz, TEMP[11].xyzz 83: MOV TEMP[11].w, TEMP[12].xxxx 84: TXL TEMP[11], TEMP[11], SAMP[0], CUBE 85: POW TEMP[12].x, TEMP[11].wwww, CONST[11].yyyy 86: MUL TEMP[12].x, CONST[11].xxxx, TEMP[12].xxxx 87: MUL TEMP[11].xyz, TEMP[12].xxxx, TEMP[11].xyzz 88: FSLT TEMP[12].x, CONST[9].wwww, IMM[1].zzzz 89: UIF TEMP[12].xxxx :0 90: MOV TEMP[12].xyz, TEMP[10].xyzx 91: FSLT TEMP[13].x, IMM[0].yyyy, CONST[14].wwww 92: UIF TEMP[13].xxxx :0 93: DP3 TEMP[13].x, TEMP[10].xyzz, TEMP[10].xyzz 94: RSQ TEMP[13].x, TEMP[13].xxxx 95: MUL TEMP[10].xyz, TEMP[10].xyzz, TEMP[13].xxxx 96: MOV TEMP[13].xyz, -IN[5].xyzx 97: ADD TEMP[14].xyz, CONST[12].xyzz, TEMP[13].xyzz 98: RCP TEMP[15].x, TEMP[10].xxxx 99: RCP TEMP[15].y, TEMP[10].yyyy 100: RCP TEMP[15].z, TEMP[10].zzzz 101: MUL TEMP[14].xyz, TEMP[14].xyzz, TEMP[15].xyzz 102: ADD TEMP[13].xyz, CONST[13].xyzz, TEMP[13].xyzz 103: RCP TEMP[15].x, TEMP[10].xxxx 104: RCP TEMP[15].y, TEMP[10].yyyy 105: RCP TEMP[15].z, TEMP[10].zzzz 106: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[15].xyzz 107: FSLT TEMP[15].xyz, IMM[0].yyyy, TEMP[10].xyzz 108: UIF TEMP[15].xxxx :0 109: MOV TEMP[16].x, TEMP[14].xxxx 110: ELSE :0 111: MOV TEMP[16].x, TEMP[13].xxxx 112: ENDIF 113: UIF TEMP[15].yyyy :0 114: MOV TEMP[17].x, TEMP[14].yyyy 115: ELSE :0 116: MOV TEMP[17].x, TEMP[13].yyyy 117: ENDIF 118: UIF TEMP[15].zzzz :0 119: MOV TEMP[14].x, TEMP[14].zzzz 120: ELSE :0 121: MOV TEMP[14].x, TEMP[13].zzzz 122: ENDIF 123: ADD TEMP[13].xyz, CONST[12].xyzz, CONST[13].xyzz 124: MUL TEMP[13].xyz, TEMP[13].xyzz, IMM[0].wwww 125: MIN TEMP[15].x, TEMP[16].xxxx, TEMP[17].xxxx 126: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[14].xxxx 127: ADD TEMP[15].xyz, TEMP[13].xyzz, -CONST[14].xyzz 128: ADD TEMP[15].xyz, TEMP[15].xyzz, IN[5].xyzz 129: MAD TEMP[10].xyz, TEMP[10].xyzz, TEMP[14].xxxx, TEMP[15].xyzz 130: ADD TEMP[12].xyz, TEMP[10].xyzz, -TEMP[13].xyzz 131: ENDIF 132: ADD TEMP[10].x, IMM[0].xxxx, -CONST[22].xxxx 133: POW TEMP[10].x, TEMP[10].xxxx, IMM[1].xxxx 134: MUL TEMP[10].x, TEMP[10].xxxx, IMM[1].yyyy 135: MOV TEMP[12].xyz, TEMP[12].xyzz 136: MOV TEMP[12].w, TEMP[10].xxxx 137: TXL TEMP[10], TEMP[12], SAMP[1], CUBE 138: POW TEMP[12].x, TEMP[10].wwww, CONST[15].yyyy 139: MUL TEMP[12].x, CONST[15].xxxx, TEMP[12].xxxx 140: MUL TEMP[10].xyz, TEMP[12].xxxx, TEMP[10].xyzz 141: LRP TEMP[7].xyz, CONST[9].wwww, TEMP[11].xyzz, TEMP[10].xyzz 142: ELSE :0 143: MOV TEMP[7].xyz, TEMP[11].xyzx 144: ENDIF 145: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[5].xxxx 146: MOV TEMP[1].xyz, -TEMP[1].xyzx 147: ADD TEMP[5].x, IMM[0].xxxx, -CONST[22].xxxx 148: ADD TEMP[10].xyz, CONST[0].xyzz, TEMP[1].xyzz 149: DP3 TEMP[11].x, TEMP[10].xyzz, TEMP[10].xyzz 150: RSQ TEMP[11].x, TEMP[11].xxxx 151: MUL TEMP[10].xyz, TEMP[10].xyzz, TEMP[11].xxxx 152: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[1].xyzz 153: MAX TEMP[1].x, IMM[0].yyyy, TEMP[1].xxxx 154: DP3 TEMP[11].x, CONST[0].xyzz, TEMP[10].xyzz 155: MAX TEMP[11].x, IMM[0].yyyy, TEMP[11].xxxx 156: MUL TEMP[12].x, TEMP[5].xxxx, TEMP[5].xxxx 157: MUL TEMP[12].x, TEMP[12].xxxx, CONST[18].wwww 158: ADD TEMP[13].x, IMM[0].xxxx, -TEMP[5].xxxx 159: MAD TEMP[13].x, TEMP[13].xxxx, IMM[2].xxxx, IMM[2].yyyy 160: LG2 TEMP[13].x, TEMP[13].xxxx 161: RCP TEMP[13].x, TEMP[13].xxxx 162: MUL TEMP[13].x, IMM[1].wwww, TEMP[13].xxxx 163: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[13].xxxx 164: ADD TEMP[14].x, IMM[0].xxxx, -TEMP[6].xxxx 165: ADD TEMP[15].x, IMM[0].xxxx, -TEMP[1].xxxx 166: MUL TEMP[16].x, IMM[0].zzzz, TEMP[11].xxxx 167: MUL TEMP[5].x, TEMP[11].xxxx, TEMP[5].xxxx 168: MAD TEMP[5].x, TEMP[16].xxxx, TEMP[5].xxxx, IMM[0].wwww 169: ADD TEMP[11].x, IMM[0].xxxx, -TEMP[11].xxxx 170: ADD TEMP[16].x, IMM[0].xxxx, -TEMP[1].xxxx 171: ADD TEMP[4].x, IMM[0].xxxx, -TEMP[4].xxxx 172: ADD TEMP[4].x, CONST[22].xxxx, TEMP[4].xxxx 173: MOV_SAT TEMP[4].x, TEMP[4].xxxx 174: MUL TEMP[17].x, TEMP[16].xxxx, TEMP[16].xxxx 175: MUL TEMP[18].x, TEMP[16].xxxx, TEMP[16].xxxx 176: MUL TEMP[16].x, TEMP[18].xxxx, TEMP[16].xxxx 177: MUL TEMP[16].x, TEMP[17].xxxx, TEMP[16].xxxx 178: LRP TEMP[4].xyz, TEMP[16].xxxx, TEMP[4].xxxx, TEMP[3].xyzz 179: LRP TEMP[16].x, TEMP[6].xxxx, IMM[0].xxxx, TEMP[12].xxxx 180: LRP TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx, TEMP[12].xxxx 181: MAD TEMP[1].x, TEMP[16].xxxx, TEMP[1].xxxx, IMM[2].zzzz 182: RCP TEMP[1].x, TEMP[1].xxxx 183: DP3 TEMP[10].x, TEMP[0].xyzz, TEMP[10].xyzz 184: MAX TEMP[10].x, IMM[0].yyyy, TEMP[10].xxxx 185: POW TEMP[10].x, TEMP[10].xxxx, TEMP[13].xxxx 186: ADD TEMP[12].x, TEMP[13].xxxx, IMM[0].xxxx 187: MUL TEMP[12].x, TEMP[12].xxxx, CONST[18].yyyy 188: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[12].xxxx 189: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[10].xxxx 190: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[6].xxxx 191: MUL TEMP[1].x, TEMP[1].xxxx, CONST[18].xxxx 192: MAX TEMP[1].x, IMM[0].yyyy, TEMP[1].xxxx 193: MUL TEMP[1].xyz, TEMP[1].xxxx, TEMP[9].xyzz 194: ADD TEMP[10].xyz, IMM[0].xxxx, -TEMP[3].xyzz 195: MUL TEMP[12].x, TEMP[11].xxxx, TEMP[11].xxxx 196: MUL TEMP[13].x, TEMP[11].xxxx, TEMP[11].xxxx 197: MUL TEMP[11].x, TEMP[13].xxxx, TEMP[11].xxxx 198: MUL TEMP[11].x, TEMP[12].xxxx, TEMP[11].xxxx 199: MAD TEMP[3].xyz, TEMP[10].xyzz, TEMP[11].xxxx, TEMP[3].xyzz 200: ADD TEMP[10].x, TEMP[5].xxxx, IMM[2].wwww 201: MUL TEMP[11].x, TEMP[14].xxxx, TEMP[14].xxxx 202: MUL TEMP[12].x, TEMP[14].xxxx, TEMP[14].xxxx 203: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[14].xxxx 204: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[12].xxxx 205: MAD TEMP[10].x, TEMP[10].xxxx, TEMP[11].xxxx, IMM[0].xxxx 206: ADD TEMP[5].x, TEMP[5].xxxx, IMM[2].wwww 207: MUL TEMP[11].x, TEMP[15].xxxx, TEMP[15].xxxx 208: MUL TEMP[12].x, TEMP[15].xxxx, TEMP[15].xxxx 209: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[15].xxxx 210: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[12].xxxx 211: MAD TEMP[5].x, TEMP[5].xxxx, TEMP[11].xxxx, IMM[0].xxxx 212: MUL TEMP[5].x, TEMP[10].xxxx, TEMP[5].xxxx 213: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[6].xxxx 214: MAD TEMP[5].xyz, TEMP[9].xyzz, TEMP[5].xxxx, TEMP[8].xyzz 215: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[5].xyzz 216: MAD TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xyzz, TEMP[2].xyzz 217: MAD TEMP[0].xyz, TEMP[7].xyzz, TEMP[4].xyzz, TEMP[1].xyzz 218: MOV TEMP[0].xyz, TEMP[0].xyzx 219: MAD TEMP[1].x, IN[4].xxxx, CONST[5].zzzz, CONST[5].wwww 220: MOV_SAT TEMP[1].x, TEMP[1].xxxx 221: LRP TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[4].xyzz 222: MOV TEMP[0].xyz, TEMP[0].xyzx 223: MOV TEMP[0].w, IMM[0].xxxx 224: MOV OUT[0], TEMP[0] 225: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 172) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200) %57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208) %58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212) %59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216) %60 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224) %61 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228) %62 = call float @llvm.SI.load.const(<16 x i8> %23, i32 232) %63 = call float @llvm.SI.load.const(<16 x i8> %23, i32 236) %64 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240) %65 = call float @llvm.SI.load.const(<16 x i8> %23, i32 244) %66 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256) %67 = call float @llvm.SI.load.const(<16 x i8> %23, i32 260) %68 = call float @llvm.SI.load.const(<16 x i8> %23, i32 264) %69 = call float @llvm.SI.load.const(<16 x i8> %23, i32 268) %70 = call float @llvm.SI.load.const(<16 x i8> %23, i32 272) %71 = call float @llvm.SI.load.const(<16 x i8> %23, i32 276) %72 = call float @llvm.SI.load.const(<16 x i8> %23, i32 280) %73 = call float @llvm.SI.load.const(<16 x i8> %23, i32 288) %74 = call float @llvm.SI.load.const(<16 x i8> %23, i32 292) %75 = call float @llvm.SI.load.const(<16 x i8> %23, i32 300) %76 = call float @llvm.SI.load.const(<16 x i8> %23, i32 304) %77 = call float @llvm.SI.load.const(<16 x i8> %23, i32 308) %78 = call float @llvm.SI.load.const(<16 x i8> %23, i32 312) %79 = call float @llvm.SI.load.const(<16 x i8> %23, i32 336) %80 = call float @llvm.SI.load.const(<16 x i8> %23, i32 352) %81 = call float @llvm.SI.load.const(<16 x i8> %23, i32 384) %82 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %83 = load <32 x i8>, <32 x i8> addrspace(2)* %82, align 32, !tbaa !0 %84 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %85 = load <16 x i8>, <16 x i8> addrspace(2)* %84, align 16, !tbaa !0 %86 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %87 = bitcast <8 x i32> addrspace(2)* %86 to <32 x i8> addrspace(2)* %88 = load <32 x i8>, <32 x i8> addrspace(2)* %87, align 32, !tbaa !0 %89 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %90 = bitcast <4 x i32> addrspace(2)* %89 to <16 x i8> addrspace(2)* %91 = load <16 x i8>, <16 x i8> addrspace(2)* %90, align 16, !tbaa !0 %92 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %93 = bitcast <8 x i32> addrspace(2)* %92 to <32 x i8> addrspace(2)* %94 = load <32 x i8>, <32 x i8> addrspace(2)* %93, align 32, !tbaa !0 %95 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %96 = bitcast <4 x i32> addrspace(2)* %95 to <16 x i8> addrspace(2)* %97 = load <16 x i8>, <16 x i8> addrspace(2)* %96, align 16, !tbaa !0 %98 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %99 = bitcast <8 x i32> addrspace(2)* %98 to <32 x i8> addrspace(2)* %100 = load <32 x i8>, <32 x i8> addrspace(2)* %99, align 32, !tbaa !0 %101 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %102 = bitcast <4 x i32> addrspace(2)* %101 to <16 x i8> addrspace(2)* %103 = load <16 x i8>, <16 x i8> addrspace(2)* %102, align 16, !tbaa !0 %104 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %105 = bitcast <8 x i32> addrspace(2)* %104 to <32 x i8> addrspace(2)* %106 = load <32 x i8>, <32 x i8> addrspace(2)* %105, align 32, !tbaa !0 %107 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %108 = bitcast <4 x i32> addrspace(2)* %107 to <16 x i8> addrspace(2)* %109 = load <16 x i8>, <16 x i8> addrspace(2)* %108, align 16, !tbaa !0 %110 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %111 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %112 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %113 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %114 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %115 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %116 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %117 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %118 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %119 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %120 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7) %121 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %122 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %123 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %124 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7) %125 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7) %126 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %127 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7) %128 = fmul float %112, %112 %129 = fmul float %113, %113 %130 = fadd float %129, %128 %131 = fmul float %114, %114 %132 = fadd float %130, %131 %133 = call float @llvm.AMDGPU.rsq.clamped.f32(float %132) %134 = fmul float %112, %133 %135 = fmul float %113, %133 %136 = fmul float %114, %133 %137 = fmul float %122, %122 %138 = fmul float %123, %123 %139 = fadd float %138, %137 %140 = fmul float %124, %124 %141 = fadd float %139, %140 %142 = call float @llvm.AMDGPU.rsq.clamped.f32(float %141) %143 = fmul float %122, %142 %144 = fmul float %123, %142 %145 = fmul float %124, %142 %146 = bitcast float %110 to i32 %147 = bitcast float %111 to i32 %148 = insertelement <2 x i32> undef, i32 %146, i32 0 %149 = insertelement <2 x i32> %148, i32 %147, i32 1 %150 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %149, <32 x i8> %94, <16 x i8> %97, i32 2) %151 = extractelement <4 x float> %150, i32 0 %152 = extractelement <4 x float> %150, i32 1 %153 = extractelement <4 x float> %150, i32 2 %154 = fmul float %76, %151 %155 = fmul float %77, %152 %156 = fmul float %78, %153 %157 = call float @llvm.AMDGPU.lrp(float %79, float %154, float %66) %158 = call float @llvm.AMDGPU.lrp(float %79, float %155, float %67) %159 = call float @llvm.AMDGPU.lrp(float %79, float %156, float %68) %160 = fmul float %79, %69 %161 = fsub float %69, %160 %162 = fmul float %154, %161 %163 = fmul float %155, %161 %164 = fmul float %156, %161 %165 = bitcast float %110 to i32 %166 = bitcast float %111 to i32 %167 = insertelement <2 x i32> undef, i32 %165, i32 0 %168 = insertelement <2 x i32> %167, i32 %166, i32 1 %169 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %168, <32 x i8> %100, <16 x i8> %103, i32 2) %170 = extractelement <4 x float> %169, i32 1 %171 = fsub float 1.000000e+00, %81 %172 = fmul float %170, %81 %173 = fadd float %172, %171 %174 = fmul float %134, %24 %175 = fmul float %135, %25 %176 = fadd float %175, %174 %177 = fmul float %136, %26 %178 = fadd float %176, %177 %179 = call float @llvm.maxnum.f32(float %178, float 0.000000e+00) %180 = fmul float %27, %134 %181 = fmul float %28, %135 %182 = fadd float %180, %181 %183 = fmul float %29, %136 %184 = fadd float %182, %183 %185 = fadd float %184, %30 %186 = fmul float %31, %134 %187 = fmul float %32, %135 %188 = fadd float %186, %187 %189 = fmul float %33, %136 %190 = fadd float %188, %189 %191 = fadd float %190, %34 %192 = fmul float %35, %134 %193 = fmul float %36, %135 %194 = fadd float %192, %193 %195 = fmul float %37, %136 %196 = fadd float %194, %195 %197 = fadd float %196, %38 %198 = fadd float %115, %185 %199 = fadd float %116, %191 %200 = fadd float %117, %197 %201 = fdiv float %118, %120 %202 = fdiv float %119, %120 %203 = bitcast float %201 to i32 %204 = bitcast float %202 to i32 %205 = insertelement <2 x i32> undef, i32 %203, i32 0 %206 = insertelement <2 x i32> %205, i32 %204, i32 1 %207 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %206, <32 x i8> %106, <16 x i8> %109, i32 2) %208 = extractelement <4 x float> %207, i32 0 %209 = fmul float %70, %208 %210 = fmul float %71, %208 %211 = fmul float %72, %208 %212 = fmul float %198, %173 %213 = fmul float %199, %173 %214 = fmul float %200, %173 %215 = fmul float %134, %143 %216 = fmul float %135, %144 %217 = fadd float %216, %215 %218 = fmul float %136, %145 %219 = fadd float %217, %218 %220 = fmul float %219, %134 %221 = fmul float %219, %135 %222 = fmul float %219, %136 %223 = fmul float %220, 2.000000e+00 %224 = fmul float %221, 2.000000e+00 %225 = fmul float %222, 2.000000e+00 %226 = fsub float %143, %223 %227 = fsub float %144, %224 %228 = fsub float %145, %225 %229 = fcmp ogt float %51, 0.000000e+00 br i1 %229, label %IF, label %ENDIF IF: ; preds = %main_body %230 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %231 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %232 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %233 = fmul float %226, %226 %234 = fmul float %227, %227 %235 = fadd float %234, %233 %236 = fmul float %228, %228 %237 = fadd float %235, %236 %238 = call float @llvm.AMDGPU.rsq.clamped.f32(float %237) %239 = fmul float %226, %238 %240 = fmul float %227, %238 %241 = fmul float %228, %238 %242 = fsub float %44, %125 %243 = fsub float %45, %126 %244 = fsub float %46, %127 %245 = fdiv float 1.000000e+00, %239 %246 = fdiv float 1.000000e+00, %240 %247 = fdiv float 1.000000e+00, %241 %248 = fmul float %242, %245 %249 = fmul float %243, %246 %250 = fmul float %244, %247 %251 = fsub float %47, %125 %252 = fsub float %48, %126 %253 = fsub float %49, %127 %254 = fdiv float 1.000000e+00, %239 %255 = fdiv float 1.000000e+00, %240 %256 = fdiv float 1.000000e+00, %241 %257 = fmul float %251, %254 %258 = fmul float %252, %255 %259 = fmul float %253, %256 %260 = fcmp ogt float %239, 0.000000e+00 %261 = fcmp ogt float %240, 0.000000e+00 %262 = fcmp ogt float %241, 0.000000e+00 %. = select i1 %260, float %248, float %257 %temp68.0 = select i1 %261, float %249, float %258 %.100 = select i1 %262, float %250, float %259 %263 = fadd float %44, %47 %264 = fadd float %45, %48 %265 = fadd float %46, %49 %266 = fmul float %263, 5.000000e-01 %267 = fmul float %264, 5.000000e-01 %268 = fmul float %265, 5.000000e-01 %269 = call float @llvm.minnum.f32(float %., float %temp68.0) %270 = call float @llvm.minnum.f32(float %269, float %.100) %271 = fsub float %266, %232 %272 = fsub float %267, %231 %273 = fsub float %268, %230 %274 = fadd float %271, %125 %275 = fadd float %272, %126 %276 = fadd float %273, %127 %277 = fmul float %239, %270 %278 = fadd float %277, %274 %279 = fmul float %240, %270 %280 = fadd float %279, %275 %281 = fmul float %241, %270 %282 = fadd float %281, %276 %283 = fsub float %278, %266 %284 = fsub float %280, %267 %285 = fsub float %282, %268 br label %ENDIF ENDIF: ; preds = %main_body, %IF %temp44.0 = phi float [ %283, %IF ], [ %226, %main_body ] %temp45.0 = phi float [ %284, %IF ], [ %227, %main_body ] %temp46.0 = phi float [ %285, %IF ], [ %228, %main_body ] %286 = fsub float 1.000000e+00, %80 %287 = call float @llvm.pow.f32(float %286, float 7.500000e-01) %288 = fmul float %287, 7.000000e+00 %289 = insertelement <4 x float> undef, float %temp44.0, i32 0 %290 = insertelement <4 x float> %289, float %temp45.0, i32 1 %291 = insertelement <4 x float> %290, float %temp46.0, i32 2 %292 = insertelement <4 x float> %291, float %288, i32 3 %293 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %292) %294 = extractelement <4 x float> %293, i32 0 %295 = extractelement <4 x float> %293, i32 1 %296 = extractelement <4 x float> %293, i32 2 %297 = extractelement <4 x float> %293, i32 3 %298 = call float @llvm.fabs.f32(float %296) %299 = fdiv float 1.000000e+00, %298 %300 = fmul float %294, %299 %301 = fadd float %300, 1.500000e+00 %302 = fmul float %295, %299 %303 = fadd float %302, 1.500000e+00 %304 = bitcast float %303 to i32 %305 = bitcast float %301 to i32 %306 = bitcast float %297 to i32 %307 = bitcast float %288 to i32 %308 = insertelement <4 x i32> undef, i32 %304, i32 0 %309 = insertelement <4 x i32> %308, i32 %305, i32 1 %310 = insertelement <4 x i32> %309, i32 %306, i32 2 %311 = insertelement <4 x i32> %310, i32 %307, i32 3 %312 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %311, <32 x i8> %83, <16 x i8> %85, i32 4) %313 = extractelement <4 x float> %312, i32 0 %314 = extractelement <4 x float> %312, i32 1 %315 = extractelement <4 x float> %312, i32 2 %316 = extractelement <4 x float> %312, i32 3 %317 = call float @llvm.pow.f32(float %316, float %53) %318 = fmul float %52, %317 %319 = fmul float %318, %313 %320 = fmul float %318, %314 %321 = fmul float %318, %315 %322 = fcmp olt float %50, 0x3FEFFFEB00000000 br i1 %322, label %IF86, label %ENDIF85 IF86: ; preds = %ENDIF %323 = fcmp ogt float %63, 0.000000e+00 br i1 %323, label %IF89, label %ENDIF88 ENDIF85: ; preds = %ENDIF, %ENDIF88 %temp28.0 = phi float [ %551, %ENDIF88 ], [ %319, %ENDIF ] %temp29.0 = phi float [ %552, %ENDIF88 ], [ %320, %ENDIF ] %temp30.0 = phi float [ %553, %ENDIF88 ], [ %321, %ENDIF ] %324 = fmul float %temp28.0, %173 %325 = fmul float %temp29.0, %173 %326 = fmul float %temp30.0, %173 %327 = fsub float 1.000000e+00, %80 %328 = fsub float %24, %143 %329 = fsub float %25, %144 %330 = fsub float %26, %145 %331 = fmul float %328, %328 %332 = fmul float %329, %329 %333 = fadd float %332, %331 %334 = fmul float %330, %330 %335 = fadd float %333, %334 %336 = call float @llvm.AMDGPU.rsq.clamped.f32(float %335) %337 = fmul float %328, %336 %338 = fmul float %329, %336 %339 = fmul float %330, %336 %340 = fmul float %143, %134 %341 = fsub float -0.000000e+00, %340 %342 = fmul float %144, %135 %343 = fsub float %341, %342 %344 = fmul float %145, %136 %345 = fsub float %343, %344 %346 = call float @llvm.maxnum.f32(float %345, float 0.000000e+00) %347 = fmul float %24, %337 %348 = fmul float %25, %338 %349 = fadd float %348, %347 %350 = fmul float %26, %339 %351 = fadd float %349, %350 %352 = call float @llvm.maxnum.f32(float %351, float 0.000000e+00) %353 = fmul float %327, %327 %354 = fmul float %353, %75 %355 = fsub float 1.000000e+00, %327 %356 = fmul float %355, 0x3FEEF9DB20000000 %357 = fadd float %356, 0x3F9EB851E0000000 %358 = call float @llvm.log2.f32(float %357) %359 = fdiv float 1.000000e+00, %358 %360 = fmul float %359, 1.000000e+01 %361 = fmul float %360, %360 %362 = fsub float 1.000000e+00, %179 %363 = fsub float 1.000000e+00, %346 %364 = fmul float %352, 2.000000e+00 %365 = fmul float %352, %327 %366 = fmul float %364, %365 %367 = fadd float %366, 5.000000e-01 %368 = fsub float 1.000000e+00, %352 %369 = fsub float 1.000000e+00, %346 %370 = fsub float 1.000000e+00, %161 %371 = fadd float %80, %370 %372 = call float @llvm.AMDIL.clamp.(float %371, float 0.000000e+00, float 1.000000e+00) %373 = fmul float %369, %369 %374 = fmul float %369, %369 %375 = fmul float %374, %369 %376 = fmul float %373, %375 %377 = call float @llvm.AMDGPU.lrp(float %376, float %372, float %157) %378 = call float @llvm.AMDGPU.lrp(float %376, float %372, float %158) %379 = call float @llvm.AMDGPU.lrp(float %376, float %372, float %159) %380 = call float @llvm.AMDGPU.lrp(float %179, float 1.000000e+00, float %354) %381 = call float @llvm.AMDGPU.lrp(float %346, float 1.000000e+00, float %354) %382 = fmul float %380, %381 %383 = fadd float %382, 0x3F1A36E2E0000000 %384 = fdiv float 1.000000e+00, %383 %385 = fmul float %134, %337 %386 = fmul float %135, %338 %387 = fadd float %386, %385 %388 = fmul float %136, %339 %389 = fadd float %387, %388 %390 = call float @llvm.maxnum.f32(float %389, float 0.000000e+00) %391 = call float @llvm.pow.f32(float %390, float %361) %392 = fadd float %361, 1.000000e+00 %393 = fmul float %392, %74 %394 = fmul float %391, %393 %395 = fmul float %384, %394 %396 = fmul float %395, %179 %397 = fmul float %396, %73 %398 = call float @llvm.maxnum.f32(float %397, float 0.000000e+00) %399 = fmul float %398, %209 %400 = fmul float %398, %210 %401 = fmul float %398, %211 %402 = fsub float 1.000000e+00, %157 %403 = fsub float 1.000000e+00, %158 %404 = fsub float 1.000000e+00, %159 %405 = fmul float %368, %368 %406 = fmul float %368, %368 %407 = fmul float %406, %368 %408 = fmul float %405, %407 %409 = fmul float %402, %408 %410 = fadd float %409, %157 %411 = fmul float %403, %408 %412 = fadd float %411, %158 %413 = fmul float %404, %408 %414 = fadd float %413, %159 %415 = fadd float %367, -1.000000e+00 %416 = fmul float %362, %362 %417 = fmul float %362, %362 %418 = fmul float %417, %362 %419 = fmul float %416, %418 %420 = fmul float %415, %419 %421 = fadd float %420, 1.000000e+00 %422 = fadd float %367, -1.000000e+00 %423 = fmul float %363, %363 %424 = fmul float %363, %363 %425 = fmul float %424, %363 %426 = fmul float %423, %425 %427 = fmul float %422, %426 %428 = fadd float %427, 1.000000e+00 %429 = fmul float %421, %428 %430 = fmul float %429, %179 %431 = fmul float %209, %430 %432 = fadd float %431, %212 %433 = fmul float %210, %430 %434 = fadd float %433, %213 %435 = fmul float %211, %430 %436 = fadd float %435, %214 %437 = fmul float %162, %432 %438 = fmul float %163, %434 %439 = fmul float %164, %436 %440 = fmul float %399, %410 %441 = fadd float %440, %437 %442 = fmul float %400, %412 %443 = fadd float %442, %438 %444 = fmul float %401, %414 %445 = fadd float %444, %439 %446 = fmul float %324, %377 %447 = fadd float %446, %441 %448 = fmul float %325, %378 %449 = fadd float %448, %443 %450 = fmul float %326, %379 %451 = fadd float %450, %445 %452 = fmul float %121, %42 %453 = fadd float %452, %43 %454 = call float @llvm.AMDIL.clamp.(float %453, float 0.000000e+00, float 1.000000e+00) %455 = call float @llvm.AMDGPU.lrp(float %454, float %447, float %39) %456 = call float @llvm.AMDGPU.lrp(float %454, float %449, float %40) %457 = call float @llvm.AMDGPU.lrp(float %454, float %451, float %41) %458 = call i32 @llvm.SI.packf16(float %455, float %456) %459 = bitcast i32 %458 to float %460 = call i32 @llvm.SI.packf16(float %457, float 1.000000e+00) %461 = bitcast i32 %460 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %459, float %461, float %459, float %461) ret void IF89: ; preds = %IF86 %462 = fmul float %226, %226 %463 = fmul float %227, %227 %464 = fadd float %463, %462 %465 = fmul float %228, %228 %466 = fadd float %464, %465 %467 = call float @llvm.AMDGPU.rsq.clamped.f32(float %466) %468 = fmul float %226, %467 %469 = fmul float %227, %467 %470 = fmul float %228, %467 %471 = fsub float %54, %125 %472 = fsub float %55, %126 %473 = fsub float %56, %127 %474 = fdiv float 1.000000e+00, %468 %475 = fdiv float 1.000000e+00, %469 %476 = fdiv float 1.000000e+00, %470 %477 = fmul float %471, %474 %478 = fmul float %472, %475 %479 = fmul float %473, %476 %480 = fsub float %57, %125 %481 = fsub float %58, %126 %482 = fsub float %59, %127 %483 = fdiv float 1.000000e+00, %468 %484 = fdiv float 1.000000e+00, %469 %485 = fdiv float 1.000000e+00, %470 %486 = fmul float %480, %483 %487 = fmul float %481, %484 %488 = fmul float %482, %485 %489 = fcmp ogt float %468, 0.000000e+00 %490 = fcmp ogt float %469, 0.000000e+00 %491 = fcmp ogt float %470, 0.000000e+00 %.101 = select i1 %489, float %477, float %486 %temp68.1 = select i1 %490, float %478, float %487 %.102 = select i1 %491, float %479, float %488 %492 = fadd float %54, %57 %493 = fadd float %55, %58 %494 = fadd float %56, %59 %495 = fmul float %492, 5.000000e-01 %496 = fmul float %493, 5.000000e-01 %497 = fmul float %494, 5.000000e-01 %498 = call float @llvm.minnum.f32(float %.101, float %temp68.1) %499 = call float @llvm.minnum.f32(float %498, float %.102) %500 = fsub float %495, %60 %501 = fsub float %496, %61 %502 = fsub float %497, %62 %503 = fadd float %500, %125 %504 = fadd float %501, %126 %505 = fadd float %502, %127 %506 = fmul float %468, %499 %507 = fadd float %506, %503 %508 = fmul float %469, %499 %509 = fadd float %508, %504 %510 = fmul float %470, %499 %511 = fadd float %510, %505 %512 = fsub float %507, %495 %513 = fsub float %509, %496 %514 = fsub float %511, %497 br label %ENDIF88 ENDIF88: ; preds = %IF86, %IF89 %temp48.0 = phi float [ %512, %IF89 ], [ %226, %IF86 ] %temp49.0 = phi float [ %513, %IF89 ], [ %227, %IF86 ] %temp50.0 = phi float [ %514, %IF89 ], [ %228, %IF86 ] %515 = fsub float 1.000000e+00, %80 %516 = call float @llvm.pow.f32(float %515, float 7.500000e-01) %517 = fmul float %516, 7.000000e+00 %518 = insertelement <4 x float> undef, float %temp48.0, i32 0 %519 = insertelement <4 x float> %518, float %temp49.0, i32 1 %520 = insertelement <4 x float> %519, float %temp50.0, i32 2 %521 = insertelement <4 x float> %520, float %517, i32 3 %522 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %521) %523 = extractelement <4 x float> %522, i32 0 %524 = extractelement <4 x float> %522, i32 1 %525 = extractelement <4 x float> %522, i32 2 %526 = extractelement <4 x float> %522, i32 3 %527 = call float @llvm.fabs.f32(float %525) %528 = fdiv float 1.000000e+00, %527 %529 = fmul float %523, %528 %530 = fadd float %529, 1.500000e+00 %531 = fmul float %524, %528 %532 = fadd float %531, 1.500000e+00 %533 = bitcast float %532 to i32 %534 = bitcast float %530 to i32 %535 = bitcast float %526 to i32 %536 = bitcast float %517 to i32 %537 = insertelement <4 x i32> undef, i32 %533, i32 0 %538 = insertelement <4 x i32> %537, i32 %534, i32 1 %539 = insertelement <4 x i32> %538, i32 %535, i32 2 %540 = insertelement <4 x i32> %539, i32 %536, i32 3 %541 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %540, <32 x i8> %88, <16 x i8> %91, i32 4) %542 = extractelement <4 x float> %541, i32 0 %543 = extractelement <4 x float> %541, i32 1 %544 = extractelement <4 x float> %541, i32 2 %545 = extractelement <4 x float> %541, i32 3 %546 = call float @llvm.pow.f32(float %545, float %65) %547 = fmul float %64, %546 %548 = fmul float %547, %542 %549 = fmul float %547, %543 %550 = fmul float %547, %544 %551 = call float @llvm.AMDGPU.lrp(float %50, float %319, float %548) %552 = call float @llvm.AMDGPU.lrp(float %50, float %320, float %549) %553 = call float @llvm.AMDGPU.lrp(float %50, float %321, float %550) br label %ENDIF85 } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v13, v0, 0, 0, [m0] ; C8340000 v_interp_p2_f32 v13, [v13], v1, 0, 0, [m0] ; C8350001 v_interp_p1_f32 v14, v0, 1, 0, [m0] ; C8380100 v_interp_p2_f32 v14, [v14], v1, 1, 0, [m0] ; C8390101 v_interp_p1_f32 v2, v0, 0, 1, [m0] ; C8080400 v_interp_p2_f32 v2, [v2], v1, 0, 1, [m0] ; C8090401 v_interp_p1_f32 v3, v0, 1, 1, [m0] ; C80C0500 v_interp_p2_f32 v3, [v3], v1, 1, 1, [m0] ; C80D0501 v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600 v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601 v_interp_p1_f32 v5, v0, 0, 2, [m0] ; C8140800 v_interp_p2_f32 v5, [v5], v1, 0, 2, [m0] ; C8150801 v_interp_p1_f32 v6, v0, 1, 2, [m0] ; C8180900 v_interp_p2_f32 v6, [v6], v1, 1, 2, [m0] ; C8190901 v_interp_p1_f32 v7, v0, 2, 2, [m0] ; C81C0A00 v_interp_p2_f32 v7, [v7], v1, 2, 2, [m0] ; C81D0A01 v_interp_p1_f32 v15, v0, 0, 3, [m0] ; C83C0C00 v_interp_p2_f32 v15, [v15], v1, 0, 3, [m0] ; C83D0C01 v_interp_p1_f32 v16, v0, 1, 3, [m0] ; C8400D00 v_interp_p2_f32 v16, [v16], v1, 1, 3, [m0] ; C8410D01 v_interp_p1_f32 v17, v0, 3, 3, [m0] ; C8440F00 v_interp_p2_f32 v17, [v17], v1, 3, 3, [m0] ; C8450F01 v_interp_p1_f32 v9, v0, 0, 4, [m0] ; C8241000 v_interp_p2_f32 v9, [v9], v1, 0, 4, [m0] ; C8251001 v_interp_p1_f32 v18, v0, 1, 4, [m0] ; C8481100 v_interp_p2_f32 v18, [v18], v1, 1, 4, [m0] ; C8491101 v_interp_p1_f32 v19, v0, 2, 4, [m0] ; C84C1200 v_interp_p2_f32 v19, [v19], v1, 2, 4, [m0] ; C84D1201 v_interp_p1_f32 v20, v0, 3, 4, [m0] ; C8501300 v_interp_p2_f32 v20, [v20], v1, 3, 4, [m0] ; C8511301 v_mul_f32_e32 v4, v2, v2 ; 10080502 v_mac_f32_e32 v4, v3, v3 ; 3E080703 v_mac_f32_e32 v4, v8, v8 ; 3E081108 v_rsq_clamp_f32_e32 v10, v4 ; 7E145904 v_mul_f32_e32 v4, v18, v18 ; 10082512 v_mac_f32_e32 v4, v19, v19 ; 3E082713 v_mac_f32_e32 v4, v20, v20 ; 3E082914 v_rsq_clamp_f32_e32 v21, v4 ; 7E2A5904 v_mul_f32_e32 v4, v10, v2 ; 1008050A v_mul_f32_e32 v3, v10, v3 ; 1006070A v_mul_f32_e32 v2, v10, v8 ; 1004110A v_mul_f32_e32 v11, v21, v18 ; 10162515 v_mul_f32_e32 v10, v21, v19 ; 10142715 v_mul_f32_e32 v8, v11, v4 ; 1010090B v_mac_f32_e32 v8, v10, v3 ; 3E10070A v_mul_f32_e32 v12, v21, v20 ; 10182915 v_mac_f32_e32 v8, v12, v2 ; 3E10050C v_mul_f32_e32 v22, v4, v8 ; 102C1104 v_mac_f32_e32 v22, v4, v8 ; 3E2C1104 v_mul_f32_e32 v23, v3, v8 ; 102E1103 v_mac_f32_e32 v23, v3, v8 ; 3E2E1103 v_mad_f32 v25, v18, v21, -v22 ; D2820019 845A2B12 v_mad_f32 v26, v19, v21, -v23 ; D282001A 845E2B13 v_interp_p1_f32 v28, v0, 0, 5, [m0] ; C8701400 v_interp_p2_f32 v28, [v28], v1, 0, 5, [m0] ; C8711401 v_interp_p1_f32 v24, v0, 1, 5, [m0] ; C8601500 v_interp_p2_f32 v24, [v24], v1, 1, 5, [m0] ; C8611501 s_load_dwordx4 s[12:15], s[4:5], 0x8 ; C0860508 s_load_dwordx8 s[16:23], s[6:7], 0x10 ; C0C80710 s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300 v_interp_p1_f32 v29, v0, 2, 5, [m0] ; C8741600 v_interp_p2_f32 v29, [v29], v1, 2, 5, [m0] ; C8751601 v_mul_f32_e32 v0, v2, v8 ; 10001102 v_mac_f32_e32 v0, v2, v8 ; 3E001102 s_load_dwordx4 s[24:27], s[4:5], 0xc ; C08C050C s_load_dwordx4 s[36:39], s[4:5], 0x10 ; C0920510 s_load_dwordx8 s[40:47], s[6:7], 0x18 ; C0D40718 s_load_dwordx8 s[48:55], s[6:7], 0x20 ; C0D80720 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[30:32], 7, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[16:23], s[12:15] ; F0800700 00641E0D s_buffer_load_dword s0, s[8:11], 0x4c ; C200094C s_buffer_load_dword s1, s[8:11], 0x4d ; C200894D s_buffer_load_dword s2, s[8:11], 0x4e ; C201094E v_mad_f32 v27, v20, v21, -v0 ; D282001B 84022B14 v_mov_b32_e32 v0, 0x6f800000 ; 7E0002FF 6F800000 v_cmp_gt_f32_e64 vcc, |v17|, v0 ; D008016A 00020111 v_mov_b32_e32 v0, 0x2f800000 ; 7E0002FF 2F800000 v_cndmask_b32_e32 v0, 1.0, v0 ; 000000F2 v_mul_f32_e32 v1, v0, v17 ; 10022300 v_rcp_f32_e32 v1, v1 ; 7E025501 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v21, s0, v30 ; 102A3C00 v_mul_f32_e32 v22, s1, v31 ; 102C3E01 v_mul_f32_e32 v23, s2, v32 ; 102E4002 v_mul_f32_e32 v8, v1, v15 ; 10101F01 v_mul_f32_e32 v1, v1, v16 ; 10022101 s_buffer_load_dword s1, s[8:11], 0x40 ; C2008940 s_buffer_load_dword s31, s[8:11], 0x54 ; C20F8954 s_buffer_load_dword s2, s[8:11], 0x41 ; C2010941 s_buffer_load_dword s3, s[8:11], 0x42 ; C2018942 v_mul_f32_e32 v15, v8, v0 ; 101E0108 v_mul_f32_e32 v16, v1, v0 ; 10200101 s_buffer_load_dword s12, s[8:11], 0x27 ; C2060927 s_buffer_load_dword s13, s[8:11], 0x2b ; C206892B s_buffer_load_dword s32, s[8:11], 0x2c ; C210092C s_buffer_load_dword s33, s[8:11], 0x2d ; C210892D s_buffer_load_dword s0, s[8:11], 0x58 ; C2000958 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e64 v0, 1.0, s31 ; D2080000 00003EF2 v_mul_f32_e32 v8, s1, v0 ; 10100001 v_mul_f32_e32 v1, s2, v0 ; 10020002 v_mul_f32_e32 v0, s3, v0 ; 10000003 v_mac_f32_e32 v8, s31, v21 ; 3E102A1F v_mov_b32_e32 v30, v25 ; 7E3C0319 v_mac_f32_e32 v1, s31, v22 ; 3E022C1F v_mov_b32_e32 v31, v26 ; 7E3E031A v_mac_f32_e32 v0, s31, v23 ; 3E002E1F v_mov_b32_e32 v32, v27 ; 7E40031B v_cmp_lt_f32_e64 s[2:3], 0, s13 ; D0020002 00001A80 image_sample v[17:20], 15, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[40:47], s[24:27] ; F0800F00 00CA110D image_sample v[13:16], 15, 0, 0, 0, 0, 0, 0, 0, v[15:16], s[48:55], s[36:39] ; F0800F00 012C0D0F s_waitcnt vmcnt(0) ; BF8C0770 s_and_saveexec_b64 s[14:15], s[2:3] ; BE8E2402 s_xor_b64 s[14:15], exec, s[14:15] ; 898E0E7E s_cbranch_execz BB0_2 ; BF880000 s_buffer_load_dword s1, s[8:11], 0x20 ; C2008920 s_buffer_load_dword s2, s[8:11], 0x21 ; C2010921 s_buffer_load_dword s3, s[8:11], 0x22 ; C2018922 s_buffer_load_dword s13, s[8:11], 0x24 ; C2068924 s_buffer_load_dword s16, s[8:11], 0x25 ; C2080925 v_mul_f32_e32 v14, v25, v25 ; 101C3319 v_mac_f32_e32 v14, v26, v26 ; 3E1C351A v_mac_f32_e32 v14, v27, v27 ; 3E1C371B v_rsq_clamp_f32_e32 v14, v14 ; 7E1C590E s_buffer_load_dword s17, s[8:11], 0x26 ; C2088926 s_buffer_load_dword s18, s[8:11], 0x28 ; C2090928 s_buffer_load_dword s19, s[8:11], 0x29 ; C2098929 s_buffer_load_dword s20, s[8:11], 0x2a ; C20A092A v_mul_f32_e32 v15, v14, v25 ; 101E330E v_mul_f32_e32 v16, v14, v26 ; 1020350E v_mul_f32_e32 v14, v14, v27 ; 101C370E v_rcp_f32_e32 v17, v15 ; 7E22550F s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v19, s1, v28 ; 08263801 v_sub_f32_e32 v20, s2, v24 ; 08283002 v_rcp_f32_e32 v30, v16 ; 7E3C5510 v_mul_f32_e32 v19, v17, v19 ; 10262711 v_sub_f32_e32 v31, s13, v28 ; 083E380D v_mul_f32_e32 v17, v17, v31 ; 10223F11 v_cmp_lt_f32_e32 vcc, 0, v15 ; 7C021E80 v_cndmask_b32_e32 v17, v17, v19 ; 00222711 v_rcp_f32_e32 v19, v14 ; 7E26550E v_mul_f32_e32 v20, v30, v20 ; 1028291E v_sub_f32_e32 v31, s16, v24 ; 083E3010 v_mul_f32_e32 v30, v30, v31 ; 103C3F1E v_cmp_lt_f32_e32 vcc, 0, v16 ; 7C022080 v_cndmask_b32_e32 v20, v30, v20 ; 0028291E v_sub_f32_e32 v30, s3, v29 ; 083C3A03 v_mul_f32_e32 v30, v19, v30 ; 103C3D13 v_sub_f32_e32 v31, s17, v29 ; 083E3A11 v_mul_f32_e32 v19, v19, v31 ; 10263F13 v_cmp_lt_f32_e32 vcc, 0, v14 ; 7C021C80 v_cndmask_b32_e32 v19, v19, v30 ; 00263D13 v_min3_f32 v17, v17, v20, v19 ; D2A20011 044E2911 v_mov_b32_e32 v19, s13 ; 7E26020D v_add_f32_e32 v19, s1, v19 ; 06262601 v_mov_b32_e32 v20, s16 ; 7E280210 v_add_f32_e32 v20, s2, v20 ; 06282802 v_mov_b32_e32 v30, s17 ; 7E3C0211 v_add_f32_e32 v32, s3, v30 ; 06403C03 v_mad_f32 v30, 0.5, v19, -s18 ; D282001E 804A26F0 v_add_f32_e32 v30, v28, v30 ; 063C3D1C v_mac_f32_e32 v30, v17, v15 ; 3E3C1F11 v_mad_f32 v15, 0.5, v20, -s19 ; D282000F 804E28F0 v_add_f32_e32 v15, v24, v15 ; 061E1F18 v_mac_f32_e32 v15, v17, v16 ; 3E1E2111 v_mad_f32 v16, 0.5, v32, -s20 ; D2820010 805240F0 v_add_f32_e32 v16, v29, v16 ; 0620211D v_mac_f32_e32 v16, v17, v14 ; 3E201D11 v_mad_f32 v30, 0.5, -v19, v30 ; D282001E 447A26F0 v_mad_f32 v31, 0.5, -v20, v15 ; D282001F 443E28F0 v_mad_f32 v32, 0.5, -v32, v16 ; D2820020 444240F0 s_or_b64 exec, exec, s[14:15] ; 88FE0E7E s_buffer_load_dword s27, s[8:11], 0x17 ; C20D8917 s_buffer_load_dword s28, s[8:11], 0x43 ; C20E0943 s_buffer_load_dword s26, s[8:11], 0x44 ; C20D0944 s_buffer_load_dword s19, s[8:11], 0x45 ; C2098945 s_buffer_load_dword s18, s[8:11], 0x46 ; C2090946 s_buffer_load_dword s2, s[8:11], 0x0 ; C2010900 s_buffer_load_dword s3, s[8:11], 0x1 ; C2018901 s_buffer_load_dword s1, s[8:11], 0x2 ; C2008902 s_buffer_load_dword s13, s[8:11], 0x4 ; C2068904 s_buffer_load_dword s14, s[8:11], 0x5 ; C2070905 s_buffer_load_dword s15, s[8:11], 0x6 ; C2078906 s_buffer_load_dword s16, s[8:11], 0x7 ; C2080907 s_buffer_load_dword s17, s[8:11], 0x8 ; C2088908 s_buffer_load_dword s20, s[8:11], 0x9 ; C20A0909 s_buffer_load_dword s21, s[8:11], 0xa ; C20A890A s_buffer_load_dword s22, s[8:11], 0xb ; C20B090B s_buffer_load_dword s23, s[8:11], 0xc ; C20B890C s_buffer_load_dword s24, s[8:11], 0xd ; C20C090D s_buffer_load_dword s25, s[8:11], 0xe ; C20C890E v_sub_f32_e64 v14, 1.0, s0 ; D208000E 000000F2 v_log_f32_e32 v14, v14 ; 7E1C4F0E v_mul_legacy_f32_e32 v14, 0x3f400000, v14 ; 0E1C1CFF 3F400000 v_exp_f32_e32 v14, v14 ; 7E1C4B0E v_mul_f32_e32 v33, 0x40e00000, v14 ; 10421CFF 40E00000 v_cubeid_f32 v17, v30, v31, v32 ; D2880011 04823F1E v_cubema_f32 v16, v30, v31, v32 ; D28E0010 04823F1E s_load_dwordx4 s[36:39], s[4:5], 0x0 ; C0920500 s_load_dwordx8 s[40:47], s[6:7], 0x0 ; C0D40700 v_cubesc_f32 v15, v30, v31, v32 ; D28A000F 04823F1E v_cubetc_f32 v14, v30, v31, v32 ; D28C000E 04823F1E v_rcp_f32_e64 v16, |v16| ; D3540110 00000110 v_mov_b32_e32 v30, 0x3fc00000 ; 7E3C02FF 3FC00000 v_mad_f32 v31, v16, v14, v30 ; D282001F 047A1D10 v_mac_f32_e32 v30, v16, v15 ; 3E3C1F10 v_mov_b32_e32 v32, v17 ; 7E400311 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[30:33], 15, 0, 0, 0, 0, 0, 0, 0, v[30:33], s[40:47], s[36:39] ; F0900F00 012A1E1E s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v14, v33 ; 7E1C4F21 s_buffer_load_dword s29, s[8:11], 0xf ; C20E890F s_buffer_load_dword s30, s[8:11], 0x60 ; C20F0960 v_mul_legacy_f32_e32 v14, s33, v14 ; 0E1C1C21 v_exp_f32_e32 v14, v14 ; 7E1C4B0E v_mul_f32_e32 v14, s32, v14 ; 101C1C20 v_mul_f32_e32 v16, v30, v14 ; 10201D1E v_mul_f32_e32 v15, v31, v14 ; 101E1D1F v_mul_f32_e32 v14, v32, v14 ; 101C1D20 v_mov_b32_e32 v17, s31 ; 7E22021F v_mov_b32_e32 v19, 0x3f7fff58 ; 7E2602FF 3F7FFF58 v_cmp_lt_f32_e32 vcc, s12, v19 ; 7C02260C s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[32:33], vcc ; BEA0246A s_xor_b64 s[32:33], exec, s[32:33] ; 89A0207E s_cbranch_execz BB0_6 ; BF880000 s_buffer_load_dword s35, s[8:11], 0x3b ; C211893B s_buffer_load_dword s31, s[8:11], 0x3c ; C20F893C s_buffer_load_dword s34, s[8:11], 0x3d ; C211093D s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_lt_f32_e64 s[36:37], 0, s35 ; D0020024 00004680 s_and_saveexec_b64 s[36:37], s[36:37] ; BEA42424 s_xor_b64 s[36:37], exec, s[36:37] ; 89A4247E s_cbranch_execz BB0_7 ; BF880000 s_buffer_load_dword s35, s[8:11], 0x36 ; C2118936 s_buffer_load_dword s38, s[8:11], 0x38 ; C2130938 s_buffer_load_dword s39, s[8:11], 0x39 ; C2138939 s_buffer_load_dword s40, s[8:11], 0x3a ; C214093A s_buffer_load_dword s41, s[8:11], 0x30 ; C2148930 s_buffer_load_dword s42, s[8:11], 0x31 ; C2150931 s_buffer_load_dword s43, s[8:11], 0x32 ; C2158932 s_buffer_load_dword s44, s[8:11], 0x34 ; C2160934 s_buffer_load_dword s45, s[8:11], 0x35 ; C2168935 v_mul_f32_e32 v19, v25, v25 ; 10263319 v_mac_f32_e32 v19, v26, v26 ; 3E26351A v_mac_f32_e32 v19, v27, v27 ; 3E26371B v_rsq_clamp_f32_e32 v19, v19 ; 7E265913 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v20, s35, v29 ; 08283A23 v_mov_b32_e32 v30, s35 ; 7E3C0223 v_sub_f32_e32 v31, s41, v28 ; 083E3829 v_sub_f32_e32 v32, s42, v24 ; 0840302A v_add_f32_e32 v30, s43, v30 ; 063C3C2B v_sub_f32_e32 v33, s43, v29 ; 08423A2B v_mad_f32 v34, 0.5, v30, -s40 ; D2820022 80A23CF0 v_add_f32_e32 v29, v29, v34 ; 063A451D v_mul_f32_e32 v25, v19, v25 ; 10323313 v_mul_f32_e32 v26, v19, v26 ; 10343513 v_mul_f32_e32 v19, v19, v27 ; 10263713 v_rcp_f32_e32 v27, v25 ; 7E365519 v_rcp_f32_e32 v34, v26 ; 7E44551A v_rcp_f32_e32 v35, v19 ; 7E465513 v_sub_f32_e32 v36, s44, v28 ; 0848382C v_mov_b32_e32 v37, s44 ; 7E4A022C v_add_f32_e32 v37, s41, v37 ; 064A4A29 v_mul_f32_e32 v31, v27, v31 ; 103E3F1B v_mul_f32_e32 v27, v27, v36 ; 1036491B v_mul_f32_e32 v32, v34, v32 ; 10404122 v_mul_f32_e32 v33, v35, v33 ; 10424323 v_mul_f32_e32 v20, v35, v20 ; 10282923 v_mad_f32 v35, 0.5, v37, -s38 ; D2820023 809A4AF0 v_add_f32_e32 v28, v28, v35 ; 0638471C v_sub_f32_e32 v35, s45, v24 ; 0846302D v_mov_b32_e32 v36, s45 ; 7E48022D v_mul_f32_e32 v34, v34, v35 ; 10444722 v_add_f32_e32 v35, s42, v36 ; 0646482A v_cmp_lt_f32_e32 vcc, 0, v25 ; 7C023280 v_cndmask_b32_e32 v27, v27, v31 ; 00363F1B v_cmp_lt_f32_e32 vcc, 0, v26 ; 7C023480 v_cndmask_b32_e32 v31, v34, v32 ; 003E4122 v_cmp_lt_f32_e32 vcc, 0, v19 ; 7C022680 v_cndmask_b32_e32 v20, v20, v33 ; 00284314 v_min3_f32 v20, v27, v31, v20 ; D2A20014 04523F1B v_mad_f32 v27, 0.5, v35, -s39 ; D282001B 809E46F0 v_add_f32_e32 v24, v24, v27 ; 06303718 v_mac_f32_e32 v28, v20, v25 ; 3E383314 v_mac_f32_e32 v24, v20, v26 ; 3E303514 v_mac_f32_e32 v29, v20, v19 ; 3E3A2714 v_mad_f32 v25, 0.5, -v37, v28 ; D2820019 44724AF0 v_mad_f32 v26, 0.5, -v35, v24 ; D282001A 446246F0 v_mad_f32 v27, 0.5, -v30, v29 ; D282001B 44763CF0 s_or_b64 exec, exec, s[36:37] ; 88FE247E v_sub_f32_e64 v19, 1.0, s0 ; D2080013 000000F2 v_log_f32_e32 v19, v19 ; 7E264F13 s_load_dwordx4 s[36:39], s[4:5], 0x4 ; C0920504 v_mul_legacy_f32_e32 v19, 0x3f400000, v19 ; 0E2626FF 3F400000 v_exp_f32_e32 v19, v19 ; 7E264B13 v_mul_f32_e32 v28, 0x40e00000, v19 ; 103826FF 40E00000 v_cubeid_f32 v32, v25, v26, v27 ; D2880020 046E3519 v_cubema_f32 v31, v25, v26, v27 ; D28E001F 046E3519 s_load_dwordx8 s[40:47], s[6:7], 0x8 ; C0D40708 v_cubesc_f32 v30, v25, v26, v27 ; D28A001E 046E3519 v_cubetc_f32 v29, v25, v26, v27 ; D28C001D 046E3519 v_rcp_f32_e64 v19, |v31| ; D3540113 0000011F v_mov_b32_e32 v25, 0x3fc00000 ; 7E3202FF 3FC00000 v_mad_f32 v26, v19, v29, v25 ; D282001A 04663B13 v_mac_f32_e32 v25, v19, v30 ; 3E323D13 v_mov_b32_e32 v27, v32 ; 7E360320 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[24:27], 15, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[40:47], s[36:39] ; F0900F00 012A1819 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v19, v27 ; 7E264F1B v_sub_f32_e64 v20, 1.0, s12 ; D2080014 000018F2 v_mul_legacy_f32_e32 v19, s34, v19 ; 0E262622 v_exp_f32_e32 v19, v19 ; 7E264B13 v_mul_f32_e32 v19, s31, v19 ; 1026261F v_mul_f32_e32 v24, v24, v19 ; 10302718 v_mul_f32_e32 v25, v25, v19 ; 10322719 v_mul_f32_e32 v19, v26, v19 ; 1026271A v_mul_f32_e32 v24, v24, v20 ; 10302918 v_mul_f32_e32 v25, v25, v20 ; 10322919 v_mul_f32_e32 v19, v19, v20 ; 10262913 v_mac_f32_e32 v24, s12, v16 ; 3E30200C v_mac_f32_e32 v25, s12, v15 ; 3E321E0C v_mac_f32_e32 v19, s12, v14 ; 3E261C0C v_mov_b32_e32 v14, v19 ; 7E1C0313 v_mov_b32_e32 v15, v25 ; 7E1E0319 v_mov_b32_e32 v16, v24 ; 7E200318 s_or_b64 exec, exec, s[32:33] ; 88FE207E v_mad_f32 v24, -v17, s28, s28 ; D2820018 20703911 v_mov_b32_e32 v17, s27 ; 7E22021B v_mul_f32_e32 v21, v24, v21 ; 102A2B18 v_mul_f32_e32 v20, v24, v22 ; 10282D18 v_mul_f32_e32 v19, v24, v23 ; 10262F18 v_mul_f32_e32 v22, s26, v13 ; 102C1A1A v_sub_f32_e64 v25, 1.0, s30 ; D2080019 00003CF2 v_mac_f32_e32 v25, s30, v18 ; 3E32241E v_mul_f32_e32 v18, s19, v13 ; 10241A13 v_mul_f32_e32 v13, s18, v13 ; 101A1A12 s_buffer_load_dword s6, s[8:11], 0x10 ; C2030910 s_buffer_load_dword s5, s[8:11], 0x11 ; C2028911 s_buffer_load_dword s4, s[8:11], 0x12 ; C2020912 s_buffer_load_dword s18, s[8:11], 0x16 ; C2090916 s_buffer_load_dword s7, s[8:11], 0x48 ; C2038948 s_buffer_load_dword s12, s[8:11], 0x49 ; C2060949 s_buffer_load_dword s8, s[8:11], 0x4b ; C204094B v_mul_f32_e32 v23, s14, v3 ; 102E060E v_mac_f32_e32 v23, s13, v4 ; 3E2E080D v_mac_f32_e32 v23, s15, v2 ; 3E2E040F v_add_f32_e32 v23, s16, v23 ; 062E2E10 v_mul_f32_e32 v26, s20, v3 ; 10340614 v_mac_f32_e32 v26, s17, v4 ; 3E340811 v_mac_f32_e32 v26, s21, v2 ; 3E340415 v_add_f32_e32 v26, s22, v26 ; 06343416 v_mul_f32_e32 v27, s24, v3 ; 10360618 v_mac_f32_e32 v27, s23, v4 ; 3E360817 v_mac_f32_e32 v27, s25, v2 ; 3E360419 v_add_f32_e32 v27, s29, v27 ; 0636361D v_add_f32_e32 v5, v23, v5 ; 060A0B17 v_add_f32_e32 v6, v26, v6 ; 060C0D1A v_add_f32_e32 v26, v27, v7 ; 06340F1B v_mul_f32_e32 v7, s2, v4 ; 100E0802 v_mac_f32_e32 v7, s3, v3 ; 3E0E0603 v_mac_f32_e32 v7, s1, v2 ; 3E0E0401 v_max_f32_e32 v23, 0, v7 ; 202E0E80 v_mul_f32_e32 v7, v25, v5 ; 100E0B19 v_mul_f32_e32 v5, v25, v6 ; 100A0D19 v_mul_f32_e32 v6, v25, v26 ; 100C3519 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v17, s18, v9 ; 3E221212 v_mul_f32_e32 v9, v25, v16 ; 10122119 v_mul_f32_e32 v15, v25, v15 ; 101E1F19 v_mul_f32_e32 v14, v25, v14 ; 101C1D19 v_sub_f32_e32 v16, 1.0, v24 ; 082030F2 v_add_f32_e32 v16, s0, v16 ; 06202000 v_add_f32_e64 v16, 0, v16 clamp ; D2060810 00022080 v_sub_f32_e32 v24, s2, v11 ; 08301602 v_sub_f32_e32 v25, s3, v10 ; 08321403 v_mul_f32_e32 v26, v24, v24 ; 10343118 v_mac_f32_e32 v26, v25, v25 ; 3E343319 v_sub_f32_e32 v27, s1, v12 ; 08361801 v_mac_f32_e32 v26, v27, v27 ; 3E34371B v_rsq_clamp_f32_e32 v26, v26 ; 7E34591A v_mul_f32_e32 v24, v26, v24 ; 1030311A v_mul_f32_e32 v25, v26, v25 ; 1032331A v_mul_f32_e32 v26, v26, v27 ; 1034371A v_mul_f32_e32 v11, v11, v4 ; 1016090B v_mad_f32 v10, -v10, v3, -v11 ; D282000A A42E070A v_mad_f32 v10, -v12, v2, v10 ; D282000A 242A050C v_mul_f32_e32 v4, v24, v4 ; 10080918 v_mac_f32_e32 v4, v25, v3 ; 3E080719 v_mul_f32_e32 v3, s2, v24 ; 10063002 v_mac_f32_e32 v3, s3, v25 ; 3E063203 v_mac_f32_e32 v4, v26, v2 ; 3E08051A v_mac_f32_e32 v3, s1, v26 ; 3E063401 v_max_f32_e32 v2, 0, v3 ; 20040680 v_sub_f32_e32 v3, 1.0, v2 ; 080604F2 v_mul_f32_e32 v11, v3, v3 ; 10160703 v_mul_f32_e32 v3, v3, v11 ; 10061703 v_mul_f32_e32 v3, v3, v11 ; 10061703 v_max_f32_e32 v10, 0, v10 ; 20141480 v_sub_f32_e32 v11, 1.0, v10 ; 081614F2 v_mul_f32_e32 v12, v11, v11 ; 1018170B v_mul_f32_e32 v24, v11, v12 ; 1030190B v_mad_f32 v25, -v12, v24, 1.0 ; D2820019 23CA310C v_mul_f32_e32 v26, v8, v25 ; 10343308 v_sub_f32_e32 v27, 1.0, v8 ; 083610F2 v_mac_f32_e32 v8, v3, v27 ; 3E103703 v_mul_f32_e32 v27, v1, v25 ; 10363301 v_sub_f32_e32 v28, 1.0, v1 ; 083802F2 v_mac_f32_e32 v1, v3, v28 ; 3E023903 v_mul_f32_e32 v25, v0, v25 ; 10323300 v_sub_f32_e32 v28, 1.0, v0 ; 083800F2 v_mac_f32_e32 v0, v3, v28 ; 3E003903 v_sub_f32_e64 v3, 1.0, s0 ; D2080003 000000F2 v_sub_f32_e32 v28, 1.0, v3 ; 083806F2 v_mov_b32_e32 v29, 0x3cf5c28f ; 7E3A02FF 3CF5C28F v_madmk_f32_e32 v28, v28, v29, 0x3f77ced9 ; 40383B1C 3F77CED9 v_add_f32_e32 v29, v2, v2 ; 063A0502 v_mul_f32_e32 v2, v3, v2 ; 10040503 v_mad_f32 v2, v29, v2, 0.5 ; D2820002 03C2051D v_mul_f32_e32 v12, v24, v12 ; 10181918 v_mac_f32_e32 v26, v16, v12 ; 3E341910 v_mac_f32_e32 v27, v16, v12 ; 3E361910 v_mac_f32_e32 v25, v16, v12 ; 3E321910 v_mul_f32_e32 v3, v3, v3 ; 10060703 v_log_f32_e32 v16, v28 ; 7E204F1C v_mul_f32_e32 v3, s8, v3 ; 10060608 v_mul_f32_e32 v11, v3, v11 ; 10161703 v_mac_f32_e32 v11, 1.0, v10 ; 3E1614F2 v_rcp_f32_e32 v10, v16 ; 7E145510 v_sub_f32_e32 v16, 1.0, v23 ; 08202EF2 v_mul_f32_e32 v3, v3, v16 ; 10062103 v_mac_f32_e32 v3, 1.0, v23 ; 3E062EF2 v_max_f32_e32 v4, 0, v4 ; 20080880 v_log_f32_e32 v4, v4 ; 7E084F04 v_madak_f32_e32 v3, v3, v11, 0x38d1b717 ; 42061703 38D1B717 v_mul_f32_e32 v10, 0x41200000, v10 ; 101414FF 41200000 v_mul_f32_e32 v11, v10, v10 ; 1016150A v_mul_legacy_f32_e32 v4, v11, v4 ; 0E08090B v_rcp_f32_e32 v3, v3 ; 7E065503 v_mad_f32 v10, v10, v10, 1.0 ; D282000A 03CA150A v_mul_f32_e32 v10, s12, v10 ; 1014140C v_exp_f32_e32 v4, v4 ; 7E084B04 v_mul_f32_e32 v4, v10, v4 ; 1008090A v_mul_f32_e32 v3, v4, v3 ; 10060704 v_mul_f32_e32 v4, v16, v16 ; 10082110 v_mul_f32_e32 v10, v16, v4 ; 10140910 v_mul_f32_e32 v4, v10, v4 ; 1008090A v_add_f32_e32 v2, -1.0, v2 ; 060404F3 v_mad_f32 v4, v2, v4, 1.0 ; D2820004 03CA0902 v_mad_f32 v2, v2, v12, 1.0 ; D2820002 03CA1902 v_mul_f32_e32 v2, v2, v4 ; 10040902 v_mul_f32_e32 v3, v23, v3 ; 10060717 v_mul_f32_e32 v3, s7, v3 ; 10060607 v_mul_f32_e32 v2, v23, v2 ; 10040517 v_mac_f32_e32 v7, v2, v22 ; 3E0E2D02 v_mul_f32_e32 v4, v7, v21 ; 10082B07 v_max_f32_e32 v3, 0, v3 ; 20060680 v_mul_f32_e32 v7, v22, v3 ; 100E0716 v_mac_f32_e32 v4, v8, v7 ; 3E080F08 v_mac_f32_e32 v5, v2, v18 ; 3E0A2502 v_mac_f32_e32 v6, v2, v13 ; 3E0C1B02 v_mul_f32_e32 v2, v18, v3 ; 10040712 v_mul_f32_e32 v3, v13, v3 ; 1006070D v_mul_f32_e32 v5, v5, v20 ; 100A2905 v_mul_f32_e32 v6, v6, v19 ; 100C2706 v_mac_f32_e32 v5, v1, v2 ; 3E0A0501 v_mac_f32_e32 v6, v0, v3 ; 3E0C0700 v_mac_f32_e32 v4, v26, v9 ; 3E08131A v_mac_f32_e32 v5, v27, v15 ; 3E0A1F1B v_mac_f32_e32 v6, v25, v14 ; 3E0C1D19 v_add_f32_e64 v0, 0, v17 clamp ; D2060800 00022280 v_sub_f32_e32 v1, 1.0, v0 ; 080200F2 v_mul_f32_e32 v2, s6, v1 ; 10040206 v_mac_f32_e32 v2, v4, v0 ; 3E040104 v_mul_f32_e32 v3, s5, v1 ; 10060205 v_mac_f32_e32 v3, v5, v0 ; 3E060105 v_mul_f32_e32 v1, s4, v1 ; 10020204 v_mac_f32_e32 v1, v6, v0 ; 3E020106 v_cvt_pkrtz_f16_f32_e32 v0, v2, v3 ; 5E000702 v_cvt_pkrtz_f16_f32_e64 v1, v1, 1.0 ; D25E0001 0001E501 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 64 VGPRS: 40 Code Size: 2220 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL OUT[5], GENERIC[4] DCL OUT[6], GENERIC[5] DCL OUT[7], GENERIC[6] DCL OUT[8], GENERIC[7] DCL CONST[0..20] DCL TEMP[0..10], LOCAL IMM[0] FLT32 { 0.0000, 0.5000, 0.0000, 0.0000} 0: MUL TEMP[0], CONST[6], IN[0].xxxx 1: MAD TEMP[0], CONST[7], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[8], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0].xyz, CONST[9], IN[0].wwww, TEMP[0] 4: MUL TEMP[1], CONST[17], IN[0].xxxx 5: MAD TEMP[1], CONST[18], IN[0].yyyy, TEMP[1] 6: MAD TEMP[1], CONST[19], IN[0].zzzz, TEMP[1] 7: MAD TEMP[1], CONST[20], IN[0].wwww, TEMP[1] 8: MAD TEMP[2].xy, IN[2].xyyy, CONST[14].xyyy, CONST[14].zwww 9: FSEQ TEMP[3].x, CONST[16].xxxx, IMM[0].xxxx 10: UIF TEMP[3].xxxx :0 11: MOV TEMP[3].xy, IN[2].xyxx 12: ELSE :0 13: MOV TEMP[3].xy, IN[3].xyxx 14: ENDIF 15: MAD TEMP[3].xy, TEMP[3].xyyy, CONST[15].xyyy, CONST[15].zwww 16: MOV TEMP[2].zw, TEMP[3].yyxy 17: MOV TEMP[3].x, CONST[10].xxxx 18: MOV TEMP[3].y, CONST[11].xxxx 19: MOV TEMP[3].z, CONST[12].xxxx 20: MOV TEMP[4].x, CONST[10].yyyy 21: MOV TEMP[4].y, CONST[11].yyyy 22: MOV TEMP[4].z, CONST[12].yyyy 23: MOV TEMP[5].x, CONST[10].zzzz 24: MOV TEMP[5].y, CONST[11].zzzz 25: MOV TEMP[5].z, CONST[12].zzzz 26: MUL TEMP[3].xyz, TEMP[3].xyzz, IN[1].xxxx 27: MAD TEMP[3].xyz, TEMP[4].xyzz, IN[1].yyyy, TEMP[3].xyzz 28: MAD TEMP[3].xyz, TEMP[5].xyzz, IN[1].zzzz, TEMP[3].xyzz 29: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz 30: RSQ TEMP[4].x, TEMP[4].xxxx 31: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx 32: MUL TEMP[4].xyz, CONST[6].xyzz, IN[4].xxxx 33: MAD TEMP[4].xyz, CONST[7].xyzz, IN[4].yyyy, TEMP[4].xyzz 34: MAD TEMP[4].xyz, CONST[8].xyzz, IN[4].zzzz, TEMP[4].xyzz 35: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz 36: RSQ TEMP[5].x, TEMP[5].xxxx 37: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx 38: MUL TEMP[5].xyz, TEMP[3].zxyy, TEMP[4].yzxx 39: MAD TEMP[5].xyz, TEMP[3].yzxx, TEMP[4].zxyy, -TEMP[5].xyzz 40: MUL TEMP[5].xyz, TEMP[5].xyzz, IN[4].wwww 41: MOV TEMP[4].xyz, TEMP[4].xyzx 42: MOV TEMP[5].xyz, TEMP[5].xyzx 43: MOV TEMP[6].xyz, TEMP[3].xyzx 44: MUL TEMP[7].xyw, TEMP[1], IMM[0].yyyy 45: MOV TEMP[8].x, TEMP[7].xxxx 46: MUL TEMP[9].x, TEMP[7].yyyy, CONST[1].xxxx 47: MOV TEMP[8].y, TEMP[9].xxxx 48: ADD TEMP[7].xy, TEMP[8].xyyy, TEMP[7].wwww 49: MOV TEMP[7].zw, TEMP[1].wwzw 50: MUL TEMP[8], TEMP[3].xyzz, TEMP[3].yzzx 51: DP4 TEMP[9].x, CONST[2], TEMP[8] 52: DP4 TEMP[10].x, CONST[3], TEMP[8] 53: MOV TEMP[9].y, TEMP[10].xxxx 54: DP4 TEMP[8].x, CONST[4], TEMP[8] 55: MOV TEMP[9].z, TEMP[8].xxxx 56: MUL TEMP[8].x, TEMP[3].yyyy, TEMP[3].yyyy 57: MAD TEMP[3].x, TEMP[3].xxxx, TEMP[3].xxxx, -TEMP[8].xxxx 58: MAD TEMP[3].xyz, CONST[5].xyzz, TEMP[3].xxxx, TEMP[9].xyzz 59: ADD TEMP[8].xyz, TEMP[0].xyzz, -CONST[0].xyzz 60: MOV TEMP[8].yzw, TEMP[8].yxyz 61: MOV TEMP[8].x, TEMP[1].zzzz 62: MOV TEMP[0].xyz, TEMP[0].xyzx 63: MOV OUT[8], TEMP[0] 64: MOV OUT[1], TEMP[2] 65: MOV OUT[3], TEMP[5] 66: MOV OUT[2], TEMP[4] 67: MOV OUT[4], TEMP[6] 68: MOV OUT[5], TEMP[3] 69: MOV OUT[6], TEMP[7] 70: MOV OUT[0], TEMP[1] 71: MOV OUT[7], TEMP[8] 72: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248) %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 252) %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256) %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272) %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276) %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280) %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284) %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288) %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292) %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296) %72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300) %73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304) %74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308) %75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312) %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316) %77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320) %78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 324) %79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 328) %80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 332) %81 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %82 = load <16 x i8>, <16 x i8> addrspace(2)* %81, align 16, !tbaa !0 %83 = add i32 %5, %7 %84 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %82, i32 0, i32 %83) %85 = extractelement <4 x float> %84, i32 0 %86 = extractelement <4 x float> %84, i32 1 %87 = extractelement <4 x float> %84, i32 2 %88 = extractelement <4 x float> %84, i32 3 %89 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %90 = load <16 x i8>, <16 x i8> addrspace(2)* %89, align 16, !tbaa !0 %91 = add i32 %5, %7 %92 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %90, i32 0, i32 %91) %93 = extractelement <4 x float> %92, i32 0 %94 = extractelement <4 x float> %92, i32 1 %95 = extractelement <4 x float> %92, i32 2 %96 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %97 = load <16 x i8>, <16 x i8> addrspace(2)* %96, align 16, !tbaa !0 %98 = add i32 %5, %7 %99 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %97, i32 0, i32 %98) %100 = extractelement <4 x float> %99, i32 0 %101 = extractelement <4 x float> %99, i32 1 %102 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %103 = load <16 x i8>, <16 x i8> addrspace(2)* %102, align 16, !tbaa !0 %104 = add i32 %5, %7 %105 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %103, i32 0, i32 %104) %106 = extractelement <4 x float> %105, i32 0 %107 = extractelement <4 x float> %105, i32 1 %108 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4 %109 = load <16 x i8>, <16 x i8> addrspace(2)* %108, align 16, !tbaa !0 %110 = add i32 %5, %7 %111 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %109, i32 0, i32 %110) %112 = extractelement <4 x float> %111, i32 0 %113 = extractelement <4 x float> %111, i32 1 %114 = extractelement <4 x float> %111, i32 2 %115 = extractelement <4 x float> %111, i32 3 %116 = fmul float %32, %85 %117 = fmul float %33, %85 %118 = fmul float %34, %85 %119 = fmul float %35, %85 %120 = fmul float %36, %86 %121 = fadd float %120, %116 %122 = fmul float %37, %86 %123 = fadd float %122, %117 %124 = fmul float %38, %86 %125 = fadd float %124, %118 %126 = fmul float %39, %86 %127 = fadd float %126, %119 %128 = fmul float %40, %87 %129 = fadd float %128, %121 %130 = fmul float %41, %87 %131 = fadd float %130, %123 %132 = fmul float %42, %87 %133 = fadd float %132, %125 %134 = fmul float %43, %87 %135 = fadd float %134, %127 %136 = fmul float %44, %88 %137 = fadd float %136, %129 %138 = fmul float %45, %88 %139 = fadd float %138, %131 %140 = fmul float %46, %88 %141 = fadd float %140, %133 %142 = fmul float %65, %85 %143 = fmul float %66, %85 %144 = fmul float %67, %85 %145 = fmul float %68, %85 %146 = fmul float %69, %86 %147 = fadd float %146, %142 %148 = fmul float %70, %86 %149 = fadd float %148, %143 %150 = fmul float %71, %86 %151 = fadd float %150, %144 %152 = fmul float %72, %86 %153 = fadd float %152, %145 %154 = fmul float %73, %87 %155 = fadd float %154, %147 %156 = fmul float %74, %87 %157 = fadd float %156, %149 %158 = fmul float %75, %87 %159 = fadd float %158, %151 %160 = fmul float %76, %87 %161 = fadd float %160, %153 %162 = fmul float %77, %88 %163 = fadd float %162, %155 %164 = fmul float %78, %88 %165 = fadd float %164, %157 %166 = fmul float %79, %88 %167 = fadd float %166, %159 %168 = fmul float %80, %88 %169 = fadd float %168, %161 %170 = fmul float %100, %56 %171 = fadd float %170, %58 %172 = fmul float %101, %57 %173 = fadd float %172, %59 %174 = fcmp oeq float %64, 0.000000e+00 %. = select i1 %174, float %100, float %106 %.44 = select i1 %174, float %101, float %107 %175 = fmul float %., %60 %176 = fadd float %175, %62 %177 = fmul float %.44, %61 %178 = fadd float %177, %63 %179 = fmul float %47, %93 %180 = fmul float %50, %93 %181 = fmul float %53, %93 %182 = fmul float %48, %94 %183 = fadd float %182, %179 %184 = fmul float %51, %94 %185 = fadd float %184, %180 %186 = fmul float %54, %94 %187 = fadd float %186, %181 %188 = fmul float %49, %95 %189 = fadd float %188, %183 %190 = fmul float %52, %95 %191 = fadd float %190, %185 %192 = fmul float %55, %95 %193 = fadd float %192, %187 %194 = fmul float %189, %189 %195 = fmul float %191, %191 %196 = fadd float %195, %194 %197 = fmul float %193, %193 %198 = fadd float %196, %197 %199 = call float @llvm.AMDGPU.rsq.clamped.f32(float %198) %200 = fmul float %189, %199 %201 = fmul float %191, %199 %202 = fmul float %193, %199 %203 = fmul float %32, %112 %204 = fmul float %33, %112 %205 = fmul float %34, %112 %206 = fmul float %36, %113 %207 = fadd float %206, %203 %208 = fmul float %37, %113 %209 = fadd float %208, %204 %210 = fmul float %38, %113 %211 = fadd float %210, %205 %212 = fmul float %40, %114 %213 = fadd float %212, %207 %214 = fmul float %41, %114 %215 = fadd float %214, %209 %216 = fmul float %42, %114 %217 = fadd float %216, %211 %218 = fmul float %213, %213 %219 = fmul float %215, %215 %220 = fadd float %219, %218 %221 = fmul float %217, %217 %222 = fadd float %220, %221 %223 = call float @llvm.AMDGPU.rsq.clamped.f32(float %222) %224 = fmul float %213, %223 %225 = fmul float %215, %223 %226 = fmul float %217, %223 %227 = fmul float %202, %225 %228 = fmul float %200, %226 %229 = fmul float %201, %224 %230 = fmul float %201, %226 %231 = fsub float %230, %227 %232 = fmul float %202, %224 %233 = fsub float %232, %228 %234 = fmul float %200, %225 %235 = fsub float %234, %229 %236 = fmul float %231, %115 %237 = fmul float %233, %115 %238 = fmul float %235, %115 %239 = fmul float %163, 5.000000e-01 %240 = fmul float %165, 5.000000e-01 %241 = fmul float %169, 5.000000e-01 %242 = fmul float %240, %16 %243 = fadd float %239, %241 %244 = fadd float %242, %241 %245 = fmul float %200, %201 %246 = fmul float %201, %202 %247 = fmul float %202, %202 %248 = fmul float %202, %200 %249 = fmul float %17, %245 %250 = fmul float %18, %246 %251 = fadd float %249, %250 %252 = fmul float %19, %247 %253 = fadd float %251, %252 %254 = fmul float %20, %248 %255 = fadd float %253, %254 %256 = fmul float %21, %245 %257 = fmul float %22, %246 %258 = fadd float %256, %257 %259 = fmul float %23, %247 %260 = fadd float %258, %259 %261 = fmul float %24, %248 %262 = fadd float %260, %261 %263 = fmul float %25, %245 %264 = fmul float %26, %246 %265 = fadd float %263, %264 %266 = fmul float %27, %247 %267 = fadd float %265, %266 %268 = fmul float %28, %248 %269 = fadd float %267, %268 %270 = fmul float %201, %201 %271 = fmul float %200, %200 %272 = fsub float %271, %270 %273 = fmul float %29, %272 %274 = fadd float %273, %255 %275 = fmul float %30, %272 %276 = fadd float %275, %262 %277 = fmul float %31, %272 %278 = fadd float %277, %269 %279 = fsub float %137, %13 %280 = fsub float %139, %14 %281 = fsub float %141, %15 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %171, float %173, float %176, float %178) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %224, float %225, float %226, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %236, float %237, float %238, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %200, float %201, float %202, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %274, float %276, float %278, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %243, float %244, float %167, float %169) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %167, float %279, float %280, float %281) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 39, i32 0, float %137, float %139, float %141, float %135) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %163, float %165, float %167, float %169) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0 ; 7E020280 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[28:31], s[2:3], 0x0 ; C08E0300 s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 s_load_dwordx4 s[12:15], s[8:9], 0x8 ; C0860908 s_load_dwordx4 s[16:19], s[8:9], 0xc ; C088090C s_load_dwordx4 s[8:11], s[8:9], 0x10 ; C0840910 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s32, s[28:31], 0x23 ; C2101D23 buffer_load_format_xyzw v[2:5], v0, s[0:3], 0 idxen ; E00C2000 80000200 buffer_load_format_xyzw v[6:9], v0, s[4:7], 0 idxen ; E00C2000 80010600 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[9:12], v0, s[12:15], 0 idxen ; E00C2000 80030900 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[11:14], v0, s[16:19], 0 idxen ; E00C2000 80040B00 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[13:16], v0, s[8:11], 0 idxen ; E00C2000 80020D00 s_buffer_load_dword s33, s[28:31], 0x24 ; C2109D24 s_buffer_load_dword s34, s[28:31], 0x25 ; C2111D25 s_buffer_load_dword s35, s[28:31], 0x26 ; C2119D26 s_buffer_load_dword s36, s[28:31], 0x28 ; C2121D28 s_buffer_load_dword s3, s[28:31], 0x13 ; C2019D13 s_buffer_load_dword s2, s[28:31], 0x14 ; C2011D14 s_buffer_load_dword s0, s[28:31], 0x15 ; C2001D15 s_buffer_load_dword s1, s[28:31], 0x16 ; C2009D16 s_buffer_load_dword s18, s[28:31], 0x18 ; C2091D18 s_buffer_load_dword s37, s[28:31], 0x29 ; C2129D29 s_buffer_load_dword s38, s[28:31], 0x2a ; C2131D2A s_buffer_load_dword s39, s[28:31], 0x2c ; C2139D2C s_buffer_load_dword s40, s[28:31], 0x2d ; C2141D2D s_buffer_load_dword s41, s[28:31], 0x2e ; C2149D2E s_buffer_load_dword s20, s[28:31], 0x19 ; C20A1D19 s_buffer_load_dword s19, s[28:31], 0x1a ; C2099D1A s_buffer_load_dword s42, s[28:31], 0x1b ; C2151D1B s_buffer_load_dword s23, s[28:31], 0x1c ; C20B9D1C s_buffer_load_dword s22, s[28:31], 0x1d ; C20B1D1D s_buffer_load_dword s43, s[28:31], 0x30 ; C2159D30 s_buffer_load_dword s44, s[28:31], 0x31 ; C2161D31 s_buffer_load_dword s45, s[28:31], 0x32 ; C2169D32 s_buffer_load_dword s46, s[28:31], 0x38 ; C2171D38 s_buffer_load_dword s47, s[28:31], 0x39 ; C2179D39 s_buffer_load_dword s24, s[28:31], 0x1e ; C20C1D1E s_buffer_load_dword s48, s[28:31], 0x1f ; C2181D1F s_buffer_load_dword s26, s[28:31], 0x20 ; C20D1D20 s_buffer_load_dword s27, s[28:31], 0x21 ; C20D9D21 s_buffer_load_dword s25, s[28:31], 0x22 ; C20C9D22 s_buffer_load_dword s4, s[28:31], 0x3f ; C2021D3F s_buffer_load_dword s5, s[28:31], 0x40 ; C2029D40 s_buffer_load_dword s49, s[28:31], 0x44 ; C2189D44 s_buffer_load_dword s50, s[28:31], 0x45 ; C2191D45 s_buffer_load_dword s51, s[28:31], 0x46 ; C2199D46 s_buffer_load_dword s6, s[28:31], 0x3a ; C2031D3A s_buffer_load_dword s8, s[28:31], 0x3b ; C2041D3B s_buffer_load_dword s52, s[28:31], 0x3c ; C21A1D3C s_buffer_load_dword s53, s[28:31], 0x3d ; C21A9D3D s_buffer_load_dword s14, s[28:31], 0x3e ; C2071D3E s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v0, s4 ; 7E000204 s_buffer_load_dword s13, s[28:31], 0x9 ; C2069D09 v_cmp_eq_f32_e64 vcc, 0, s5 ; D004006A 00000A80 s_buffer_load_dword s9, s[28:31], 0xa ; C2049D0A s_buffer_load_dword s7, s[28:31], 0xb ; C2039D0B s_buffer_load_dword s12, s[28:31], 0xc ; C2061D0C v_mov_b32_e32 v17, s6 ; 7E220206 s_buffer_load_dword s16, s[28:31], 0xd ; C2081D0D v_mov_b32_e32 v18, s8 ; 7E240208 s_buffer_load_dword s11, s[28:31], 0xe ; C2059D0E s_buffer_load_dword s10, s[28:31], 0xf ; C2051D0F s_buffer_load_dword s15, s[28:31], 0x10 ; C2079D10 v_mov_b32_e32 v19, s14 ; 7E26020E s_buffer_load_dword s17, s[28:31], 0x11 ; C2089D11 s_buffer_load_dword s14, s[28:31], 0x12 ; C2071D12 s_buffer_load_dword s54, s[28:31], 0x47 ; C21B1D47 s_buffer_load_dword s55, s[28:31], 0x48 ; C21B9D48 s_buffer_load_dword s56, s[28:31], 0x49 ; C21C1D49 s_buffer_load_dword s57, s[28:31], 0x4a ; C21C9D4A s_buffer_load_dword s58, s[28:31], 0x4b ; C21D1D4B s_buffer_load_dword s4, s[28:31], 0x0 ; C2021D00 s_buffer_load_dword s5, s[28:31], 0x1 ; C2029D01 s_buffer_load_dword s6, s[28:31], 0x2 ; C2031D02 s_buffer_load_dword s8, s[28:31], 0x4 ; C2041D04 s_buffer_load_dword s21, s[28:31], 0x8 ; C20A9D08 s_buffer_load_dword s59, s[28:31], 0x4c ; C21D9D4C s_buffer_load_dword s60, s[28:31], 0x4d ; C21E1D4D s_buffer_load_dword s61, s[28:31], 0x4e ; C21E9D4E s_buffer_load_dword s62, s[28:31], 0x4f ; C21F1D4F s_buffer_load_dword s63, s[28:31], 0x50 ; C21F9D50 s_buffer_load_dword s64, s[28:31], 0x51 ; C2201D51 s_buffer_load_dword s65, s[28:31], 0x52 ; C2209D52 s_buffer_load_dword s28, s[28:31], 0x53 ; C20E1D53 v_mul_f32_e32 v20, s42, v2 ; 1028042A v_mac_f32_e32 v20, s48, v3 ; 3E280630 v_mac_f32_e32 v20, s32, v4 ; 3E280820 v_mac_f32_e32 v17, s46, v9 ; 3E22122E v_mac_f32_e32 v18, s47, v10 ; 3E24142F v_mul_f32_e32 v21, s49, v2 ; 102A0431 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v21, s55, v3 ; 3E2A0637 v_mac_f32_e32 v21, s59, v4 ; 3E2A083B v_mac_f32_e32 v21, s63, v5 ; 3E2A0A3F v_mul_f32_e32 v22, s50, v2 ; 102C0432 v_mac_f32_e32 v22, s56, v3 ; 3E2C0638 v_mac_f32_e32 v22, s60, v4 ; 3E2C083C v_mac_f32_e32 v22, s64, v5 ; 3E2C0A40 v_mul_f32_e32 v23, s51, v2 ; 102E0433 v_mac_f32_e32 v23, s57, v3 ; 3E2E0639 v_mac_f32_e32 v23, s61, v4 ; 3E2E083D v_mac_f32_e32 v23, s65, v5 ; 3E2E0A41 v_mul_f32_e32 v24, s54, v2 ; 10300436 v_mac_f32_e32 v24, s58, v3 ; 3E30063A v_mac_f32_e32 v24, s62, v4 ; 3E30083E v_mac_f32_e32 v24, s28, v5 ; 3E300A1C v_cndmask_b32_e32 v9, v11, v9 ; 0012130B v_cndmask_b32_e32 v10, v12, v10 ; 0014150C v_mul_f32_e32 v11, s36, v6 ; 10160C24 v_mac_f32_e32 v11, s37, v7 ; 3E160E25 v_mul_f32_e32 v12, s39, v6 ; 10180C27 v_mac_f32_e32 v12, s40, v7 ; 3E180E28 v_mul_f32_e32 v6, s43, v6 ; 100C0C2B v_mac_f32_e32 v6, s44, v7 ; 3E0C0E2C v_mac_f32_e32 v11, s38, v8 ; 3E161026 v_mac_f32_e32 v12, s41, v8 ; 3E181029 v_mac_f32_e32 v6, s45, v8 ; 3E0C102D v_mul_f32_e32 v7, s18, v2 ; 100E0412 v_mac_f32_e32 v7, s23, v3 ; 3E0E0617 v_mac_f32_e32 v7, s26, v4 ; 3E0E081A v_mac_f32_e32 v7, s33, v5 ; 3E0E0A21 v_mul_f32_e32 v8, s20, v2 ; 10100414 v_mac_f32_e32 v8, s22, v3 ; 3E100616 v_mac_f32_e32 v8, s27, v4 ; 3E10081B v_mac_f32_e32 v8, s34, v5 ; 3E100A22 v_mul_f32_e32 v2, s19, v2 ; 10040413 v_mac_f32_e32 v2, s24, v3 ; 3E040618 v_mac_f32_e32 v2, s25, v4 ; 3E040819 v_mac_f32_e32 v2, s35, v5 ; 3E040A23 v_mac_f32_e32 v19, s52, v9 ; 3E261234 v_mac_f32_e32 v0, s53, v10 ; 3E001435 exp 15, 32, 0, 0, 0, v17, v18, v19, v0 ; F800020F 00131211 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s18, v13 ; 10001A12 v_mac_f32_e32 v0, s23, v14 ; 3E001C17 v_mul_f32_e32 v3, s20, v13 ; 10061A14 v_mac_f32_e32 v3, s22, v14 ; 3E061C16 v_mul_f32_e32 v4, s19, v13 ; 10081A13 v_mac_f32_e32 v4, s24, v14 ; 3E081C18 v_mac_f32_e32 v0, s26, v15 ; 3E001E1A v_mac_f32_e32 v3, s27, v15 ; 3E061E1B v_mac_f32_e32 v4, s25, v15 ; 3E081E19 v_mul_f32_e32 v5, v11, v11 ; 100A170B v_mac_f32_e32 v5, v12, v12 ; 3E0A190C v_mac_f32_e32 v5, v6, v6 ; 3E0A0D06 v_rsq_clamp_f32_e32 v5, v5 ; 7E0A5905 v_mul_f32_e32 v9, v0, v0 ; 10120100 v_mac_f32_e32 v9, v3, v3 ; 3E120703 v_mac_f32_e32 v9, v4, v4 ; 3E120904 v_rsq_clamp_f32_e32 v9, v9 ; 7E125909 v_mul_f32_e32 v10, v5, v11 ; 10141705 v_mul_f32_e32 v11, v5, v12 ; 10161905 v_mul_f32_e32 v5, v5, v6 ; 100A0D05 v_mul_f32_e32 v0, v9, v0 ; 10000109 v_mul_f32_e32 v3, v9, v3 ; 10060709 v_mul_f32_e32 v4, v9, v4 ; 10080909 v_mul_f32_e32 v6, v3, v5 ; 100C0B03 v_mad_f32 v6, v11, v4, -v6 ; D2820006 841A090B v_mul_f32_e32 v9, v4, v10 ; 10121504 v_mad_f32 v9, v5, v0, -v9 ; D2820009 84260105 v_mul_f32_e32 v12, v0, v11 ; 10181700 v_mad_f32 v12, v10, v3, -v12 ; D282000C 8432070A v_mul_f32_e32 v6, v16, v6 ; 100C0D10 v_mul_f32_e32 v9, v16, v9 ; 10121310 v_mul_f32_e32 v12, v16, v12 ; 10181910 exp 15, 33, 0, 0, 0, v0, v3, v4, v1 ; F800021F 01040300 exp 15, 34, 0, 0, 0, v6, v9, v12, v1 ; F800022F 010C0906 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, v5, v11 ; 10001705 v_mul_f32_e32 v3, s13, v0 ; 1006000D v_mul_f32_e32 v4, s16, v0 ; 10080010 v_mul_f32_e32 v0, s17, v0 ; 10000011 v_mul_f32_e32 v6, v11, v10 ; 100C150B v_mac_f32_e32 v3, s21, v6 ; 3E060C15 v_mac_f32_e32 v4, s12, v6 ; 3E080C0C v_mac_f32_e32 v0, s15, v6 ; 3E000C0F v_mul_f32_e32 v6, v5, v5 ; 100C0B05 v_mac_f32_e32 v3, s9, v6 ; 3E060C09 v_mac_f32_e32 v4, s11, v6 ; 3E080C0B v_mac_f32_e32 v0, s14, v6 ; 3E000C0E v_mul_f32_e32 v6, v10, v5 ; 100C0B0A v_mac_f32_e32 v3, s7, v6 ; 3E060C07 v_mac_f32_e32 v4, s10, v6 ; 3E080C0A v_mac_f32_e32 v0, s3, v6 ; 3E000C03 exp 15, 35, 0, 0, 0, v10, v11, v5, v1 ; F800023F 01050B0A s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v5, v11, v11 ; 100A170B v_mad_f32 v5, v10, v10, -v5 ; D2820005 8416150A v_mac_f32_e32 v3, s2, v5 ; 3E060A02 v_mac_f32_e32 v4, s0, v5 ; 3E080A00 v_mac_f32_e32 v0, s1, v5 ; 3E000A01 v_mul_f32_e32 v5, 0.5, v22 ; 100A2CF0 v_mul_f32_e32 v6, 0.5, v24 ; 100C30F0 exp 15, 36, 0, 0, 0, v3, v4, v0, v1 ; F800024F 01000403 s_waitcnt expcnt(0) ; BF8C070F v_mad_f32 v0, 0.5, v21, v6 ; D2820000 041A2AF0 v_mac_f32_e32 v6, s8, v5 ; 3E0C0A08 exp 15, 37, 0, 0, 0, v0, v6, v23, v24 ; F800025F 18170600 s_waitcnt expcnt(0) ; BF8C070F v_subrev_f32_e32 v0, s4, v7 ; 0A000E04 v_subrev_f32_e32 v1, s5, v8 ; 0A021005 v_subrev_f32_e32 v3, s6, v2 ; 0A060406 exp 15, 38, 0, 0, 0, v23, v0, v1, v3 ; F800026F 03010017 exp 15, 39, 0, 0, 0, v7, v8, v2, v20 ; F800027F 14020807 exp 15, 12, 0, 1, 0, v21, v22, v23, v24 ; F80008CF 18171615 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 72 VGPRS: 28 Code Size: 932 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL IN[4], GENERIC[4], PERSPECTIVE DCL IN[5], GENERIC[5], PERSPECTIVE DCL IN[6], GENERIC[6], PERSPECTIVE DCL IN[7], GENERIC[7], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SVIEW[0], CUBE, FLOAT DCL SVIEW[1], CUBE, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL SVIEW[4], 2D, FLOAT DCL SVIEW[5], 2D, FLOAT DCL CONST[0..5] DCL CONST[8..19] DCL CONST[22..24] DCL CONST[26] DCL TEMP[0..18], LOCAL IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 0.0000} IMM[1] FLT32 { 0.5000, 0.7500, 7.0000, 1.0000} IMM[2] FLT32 { 10.0000, 0.9680, 0.0300, 0.0001} 0: MOV TEMP[0].x, IN[1].xxxx 1: MOV TEMP[0].y, IN[2].xxxx 2: MOV TEMP[0].z, IN[3].xxxx 3: MOV TEMP[1].x, IN[1].yyyy 4: MOV TEMP[1].y, IN[2].yyyy 5: MOV TEMP[1].z, IN[3].yyyy 6: MOV TEMP[2].x, IN[1].zzzz 7: MOV TEMP[2].y, IN[2].zzzz 8: MOV TEMP[2].z, IN[3].zzzz 9: MOV TEMP[3].xy, IN[0].xyyy 10: TEX TEMP[3].yw, TEMP[3], SAMP[3], 2D 11: MAD TEMP[3].xy, TEMP[3].wyyy, IMM[0].xxxx, IMM[0].yyyy 12: MUL TEMP[3].xy, TEMP[3].xyyy, CONST[22].xxxx 13: DP2 TEMP[4].x, TEMP[3].xyyy, TEMP[3].xyyy 14: MOV_SAT TEMP[4].x, TEMP[4].xxxx 15: ADD TEMP[4].x, IMM[0].zzzz, -TEMP[4].xxxx 16: SQRT TEMP[4].x, TEMP[4].xxxx 17: MOV TEMP[3].z, TEMP[4].xxxx 18: DP3 TEMP[0].x, TEMP[3].xyzz, TEMP[0].xyzz 19: DP3 TEMP[1].x, TEMP[3].xyzz, TEMP[1].xyzz 20: MOV TEMP[0].y, TEMP[1].xxxx 21: DP3 TEMP[1].x, TEMP[3].xyzz, TEMP[2].xyzz 22: MOV TEMP[0].z, TEMP[1].xxxx 23: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[0].xyzz 24: RSQ TEMP[1].x, TEMP[1].xxxx 25: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xxxx 26: DP3 TEMP[1].x, IN[6].yzww, IN[6].yzww 27: RSQ TEMP[1].x, TEMP[1].xxxx 28: MUL TEMP[1].xyz, IN[6].yzww, TEMP[1].xxxx 29: MOV TEMP[2].xy, IN[0].xyyy 30: TEX TEMP[2].xyz, TEMP[2], SAMP[2], 2D 31: MUL TEMP[2].xyz, CONST[19].xyzz, TEMP[2].xyzz 32: LRP TEMP[3].xyz, CONST[23].xxxx, TEMP[2].xyzz, CONST[16].xyzz 33: MUL TEMP[4].x, CONST[23].xxxx, CONST[16].wwww 34: ADD TEMP[4].x, CONST[16].wwww, -TEMP[4].xxxx 35: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[4].xxxx 36: MOV TEMP[5].xy, IN[0].xyyy 37: TEX TEMP[5].y, TEMP[5], SAMP[4], 2D 38: ADD TEMP[6].x, IMM[0].zzzz, -CONST[26].xxxx 39: MAD TEMP[5].x, TEMP[5].yyyy, CONST[26].xxxx, TEMP[6].xxxx 40: DP3 TEMP[6].x, TEMP[0].xyzz, CONST[0].xyzz 41: MAX TEMP[6].x, IMM[0].wwww, TEMP[6].xxxx 42: MOV TEMP[7].xyz, IMM[0].wwww 43: MOV TEMP[8].w, IMM[0].zzzz 44: MOV TEMP[8].xyz, TEMP[0].xyzx 45: DP4 TEMP[9].x, CONST[1], TEMP[8] 46: DP4 TEMP[10].x, CONST[2], TEMP[8] 47: MOV TEMP[9].y, TEMP[10].xxxx 48: DP4 TEMP[8].x, CONST[3], TEMP[8] 49: MOV TEMP[9].z, TEMP[8].xxxx 50: ADD TEMP[8].xyz, IN[4].xyzz, TEMP[9].xyzz 51: MOV TEMP[9].xy, IN[5].xyyy 52: MOV TEMP[9].w, IN[5].wwww 53: TXP TEMP[9].x, TEMP[9], SAMP[5], 2D 54: MUL TEMP[9].xyz, CONST[17].xyzz, TEMP[9].xxxx 55: MUL TEMP[8].xyz, TEMP[8].xyzz, TEMP[5].xxxx 56: DP3 TEMP[10].x, TEMP[0].xyzz, TEMP[1].xyzz 57: MUL TEMP[10].xyz, TEMP[10].xxxx, TEMP[0].xyzz 58: MUL TEMP[10].xyz, IMM[0].xxxx, TEMP[10].xyzz 59: ADD TEMP[10].xyz, TEMP[1].xyzz, -TEMP[10].xyzz 60: MOV TEMP[11].xyz, TEMP[10].xyzx 61: FSLT TEMP[12].x, IMM[0].wwww, CONST[10].wwww 62: UIF TEMP[12].xxxx :0 63: DP3 TEMP[12].x, TEMP[10].xyzz, TEMP[10].xyzz 64: RSQ TEMP[12].x, TEMP[12].xxxx 65: MUL TEMP[12].xyz, TEMP[10].xyzz, TEMP[12].xxxx 66: MOV TEMP[13].xyz, -IN[7].xyzx 67: ADD TEMP[14].xyz, CONST[8].xyzz, TEMP[13].xyzz 68: RCP TEMP[15].x, TEMP[12].xxxx 69: RCP TEMP[15].y, TEMP[12].yyyy 70: RCP TEMP[15].z, TEMP[12].zzzz 71: MUL TEMP[14].xyz, TEMP[14].xyzz, TEMP[15].xyzz 72: ADD TEMP[13].xyz, CONST[9].xyzz, TEMP[13].xyzz 73: RCP TEMP[15].x, TEMP[12].xxxx 74: RCP TEMP[15].y, TEMP[12].yyyy 75: RCP TEMP[15].z, TEMP[12].zzzz 76: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[15].xyzz 77: FSLT TEMP[15].xyz, IMM[0].wwww, TEMP[12].xyzz 78: UIF TEMP[15].xxxx :0 79: MOV TEMP[16].x, TEMP[14].xxxx 80: ELSE :0 81: MOV TEMP[16].x, TEMP[13].xxxx 82: ENDIF 83: UIF TEMP[15].yyyy :0 84: MOV TEMP[17].x, TEMP[14].yyyy 85: ELSE :0 86: MOV TEMP[17].x, TEMP[13].yyyy 87: ENDIF 88: UIF TEMP[15].zzzz :0 89: MOV TEMP[14].x, TEMP[14].zzzz 90: ELSE :0 91: MOV TEMP[14].x, TEMP[13].zzzz 92: ENDIF 93: ADD TEMP[13].xyz, CONST[8].xyzz, CONST[9].xyzz 94: MUL TEMP[13].xyz, TEMP[13].xyzz, IMM[1].xxxx 95: MIN TEMP[15].x, TEMP[16].xxxx, TEMP[17].xxxx 96: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[14].xxxx 97: ADD TEMP[15].xyz, TEMP[13].xyzz, -CONST[10].xyzz 98: ADD TEMP[15].xyz, TEMP[15].xyzz, IN[7].xyzz 99: MAD TEMP[12].xyz, TEMP[12].xyzz, TEMP[14].xxxx, TEMP[15].xyzz 100: ADD TEMP[11].xyz, TEMP[12].xyzz, -TEMP[13].xyzz 101: ENDIF 102: ADD TEMP[12].x, IMM[0].zzzz, -CONST[24].xxxx 103: POW TEMP[12].x, TEMP[12].xxxx, IMM[1].yyyy 104: MUL TEMP[12].x, TEMP[12].xxxx, IMM[1].zzzz 105: MOV TEMP[11].xyz, TEMP[11].xyzz 106: MOV TEMP[11].w, TEMP[12].xxxx 107: TXL TEMP[11], TEMP[11], SAMP[0], CUBE 108: POW TEMP[12].x, TEMP[11].wwww, CONST[11].yyyy 109: MUL TEMP[12].x, CONST[11].xxxx, TEMP[12].xxxx 110: MUL TEMP[11].xyz, TEMP[12].xxxx, TEMP[11].xyzz 111: FSLT TEMP[12].x, CONST[9].wwww, IMM[1].wwww 112: UIF TEMP[12].xxxx :0 113: MOV TEMP[12].xyz, TEMP[10].xyzx 114: FSLT TEMP[13].x, IMM[0].wwww, CONST[14].wwww 115: UIF TEMP[13].xxxx :0 116: DP3 TEMP[13].x, TEMP[10].xyzz, TEMP[10].xyzz 117: RSQ TEMP[13].x, TEMP[13].xxxx 118: MUL TEMP[10].xyz, TEMP[10].xyzz, TEMP[13].xxxx 119: MOV TEMP[13].xyz, -IN[7].xyzx 120: ADD TEMP[14].xyz, CONST[12].xyzz, TEMP[13].xyzz 121: RCP TEMP[15].x, TEMP[10].xxxx 122: RCP TEMP[15].y, TEMP[10].yyyy 123: RCP TEMP[15].z, TEMP[10].zzzz 124: MUL TEMP[14].xyz, TEMP[14].xyzz, TEMP[15].xyzz 125: ADD TEMP[13].xyz, CONST[13].xyzz, TEMP[13].xyzz 126: RCP TEMP[15].x, TEMP[10].xxxx 127: RCP TEMP[15].y, TEMP[10].yyyy 128: RCP TEMP[15].z, TEMP[10].zzzz 129: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[15].xyzz 130: FSLT TEMP[15].xyz, IMM[0].wwww, TEMP[10].xyzz 131: UIF TEMP[15].xxxx :0 132: MOV TEMP[16].x, TEMP[14].xxxx 133: ELSE :0 134: MOV TEMP[16].x, TEMP[13].xxxx 135: ENDIF 136: UIF TEMP[15].yyyy :0 137: MOV TEMP[17].x, TEMP[14].yyyy 138: ELSE :0 139: MOV TEMP[17].x, TEMP[13].yyyy 140: ENDIF 141: UIF TEMP[15].zzzz :0 142: MOV TEMP[14].x, TEMP[14].zzzz 143: ELSE :0 144: MOV TEMP[14].x, TEMP[13].zzzz 145: ENDIF 146: ADD TEMP[13].xyz, CONST[12].xyzz, CONST[13].xyzz 147: MUL TEMP[13].xyz, TEMP[13].xyzz, IMM[1].xxxx 148: MIN TEMP[15].x, TEMP[16].xxxx, TEMP[17].xxxx 149: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[14].xxxx 150: ADD TEMP[15].xyz, TEMP[13].xyzz, -CONST[14].xyzz 151: ADD TEMP[15].xyz, TEMP[15].xyzz, IN[7].xyzz 152: MAD TEMP[10].xyz, TEMP[10].xyzz, TEMP[14].xxxx, TEMP[15].xyzz 153: ADD TEMP[12].xyz, TEMP[10].xyzz, -TEMP[13].xyzz 154: ENDIF 155: ADD TEMP[10].x, IMM[0].zzzz, -CONST[24].xxxx 156: POW TEMP[10].x, TEMP[10].xxxx, IMM[1].yyyy 157: MUL TEMP[10].x, TEMP[10].xxxx, IMM[1].zzzz 158: MOV TEMP[12].xyz, TEMP[12].xyzz 159: MOV TEMP[12].w, TEMP[10].xxxx 160: TXL TEMP[10], TEMP[12], SAMP[1], CUBE 161: POW TEMP[12].x, TEMP[10].wwww, CONST[15].yyyy 162: MUL TEMP[12].x, CONST[15].xxxx, TEMP[12].xxxx 163: MUL TEMP[10].xyz, TEMP[12].xxxx, TEMP[10].xyzz 164: LRP TEMP[7].xyz, CONST[9].wwww, TEMP[11].xyzz, TEMP[10].xyzz 165: ELSE :0 166: MOV TEMP[7].xyz, TEMP[11].xyzx 167: ENDIF 168: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[5].xxxx 169: MOV TEMP[1].xyz, -TEMP[1].xyzx 170: ADD TEMP[5].x, IMM[0].zzzz, -CONST[24].xxxx 171: ADD TEMP[10].xyz, CONST[0].xyzz, TEMP[1].xyzz 172: DP3 TEMP[11].x, TEMP[10].xyzz, TEMP[10].xyzz 173: RSQ TEMP[11].x, TEMP[11].xxxx 174: MUL TEMP[10].xyz, TEMP[10].xyzz, TEMP[11].xxxx 175: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[1].xyzz 176: MAX TEMP[1].x, IMM[0].wwww, TEMP[1].xxxx 177: DP3 TEMP[11].x, CONST[0].xyzz, TEMP[10].xyzz 178: MAX TEMP[11].x, IMM[0].wwww, TEMP[11].xxxx 179: MUL TEMP[12].x, TEMP[5].xxxx, TEMP[5].xxxx 180: MUL TEMP[12].x, TEMP[12].xxxx, CONST[18].wwww 181: ADD TEMP[13].x, IMM[0].zzzz, -TEMP[5].xxxx 182: MAD TEMP[13].x, TEMP[13].xxxx, IMM[2].yyyy, IMM[2].zzzz 183: LG2 TEMP[13].x, TEMP[13].xxxx 184: RCP TEMP[13].x, TEMP[13].xxxx 185: MUL TEMP[13].x, IMM[2].xxxx, TEMP[13].xxxx 186: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[13].xxxx 187: ADD TEMP[14].x, IMM[0].zzzz, -TEMP[6].xxxx 188: ADD TEMP[15].x, IMM[0].zzzz, -TEMP[1].xxxx 189: MUL TEMP[16].x, IMM[0].xxxx, TEMP[11].xxxx 190: MUL TEMP[5].x, TEMP[11].xxxx, TEMP[5].xxxx 191: MAD TEMP[5].x, TEMP[16].xxxx, TEMP[5].xxxx, IMM[1].xxxx 192: ADD TEMP[11].x, IMM[0].zzzz, -TEMP[11].xxxx 193: ADD TEMP[16].x, IMM[0].zzzz, -TEMP[1].xxxx 194: ADD TEMP[4].x, IMM[0].zzzz, -TEMP[4].xxxx 195: ADD TEMP[4].x, CONST[24].xxxx, TEMP[4].xxxx 196: MOV_SAT TEMP[4].x, TEMP[4].xxxx 197: MUL TEMP[17].x, TEMP[16].xxxx, TEMP[16].xxxx 198: MUL TEMP[18].x, TEMP[16].xxxx, TEMP[16].xxxx 199: MUL TEMP[16].x, TEMP[18].xxxx, TEMP[16].xxxx 200: MUL TEMP[16].x, TEMP[17].xxxx, TEMP[16].xxxx 201: LRP TEMP[4].xyz, TEMP[16].xxxx, TEMP[4].xxxx, TEMP[3].xyzz 202: LRP TEMP[16].x, TEMP[6].xxxx, IMM[0].zzzz, TEMP[12].xxxx 203: LRP TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz, TEMP[12].xxxx 204: MAD TEMP[1].x, TEMP[16].xxxx, TEMP[1].xxxx, IMM[2].wwww 205: RCP TEMP[1].x, TEMP[1].xxxx 206: DP3 TEMP[10].x, TEMP[0].xyzz, TEMP[10].xyzz 207: MAX TEMP[10].x, IMM[0].wwww, TEMP[10].xxxx 208: POW TEMP[10].x, TEMP[10].xxxx, TEMP[13].xxxx 209: ADD TEMP[12].x, TEMP[13].xxxx, IMM[0].zzzz 210: MUL TEMP[12].x, TEMP[12].xxxx, CONST[18].yyyy 211: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[12].xxxx 212: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[10].xxxx 213: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[6].xxxx 214: MUL TEMP[1].x, TEMP[1].xxxx, CONST[18].xxxx 215: MAX TEMP[1].x, IMM[0].wwww, TEMP[1].xxxx 216: MUL TEMP[1].xyz, TEMP[1].xxxx, TEMP[9].xyzz 217: ADD TEMP[10].xyz, IMM[0].zzzz, -TEMP[3].xyzz 218: MUL TEMP[12].x, TEMP[11].xxxx, TEMP[11].xxxx 219: MUL TEMP[13].x, TEMP[11].xxxx, TEMP[11].xxxx 220: MUL TEMP[11].x, TEMP[13].xxxx, TEMP[11].xxxx 221: MUL TEMP[11].x, TEMP[12].xxxx, TEMP[11].xxxx 222: MAD TEMP[3].xyz, TEMP[10].xyzz, TEMP[11].xxxx, TEMP[3].xyzz 223: ADD TEMP[10].x, TEMP[5].xxxx, IMM[0].yyyy 224: MUL TEMP[11].x, TEMP[14].xxxx, TEMP[14].xxxx 225: MUL TEMP[12].x, TEMP[14].xxxx, TEMP[14].xxxx 226: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[14].xxxx 227: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[12].xxxx 228: MAD TEMP[10].x, TEMP[10].xxxx, TEMP[11].xxxx, IMM[0].zzzz 229: ADD TEMP[5].x, TEMP[5].xxxx, IMM[0].yyyy 230: MUL TEMP[11].x, TEMP[15].xxxx, TEMP[15].xxxx 231: MUL TEMP[12].x, TEMP[15].xxxx, TEMP[15].xxxx 232: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[15].xxxx 233: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[12].xxxx 234: MAD TEMP[5].x, TEMP[5].xxxx, TEMP[11].xxxx, IMM[0].zzzz 235: MUL TEMP[5].x, TEMP[10].xxxx, TEMP[5].xxxx 236: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[6].xxxx 237: MAD TEMP[5].xyz, TEMP[9].xyzz, TEMP[5].xxxx, TEMP[8].xyzz 238: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[5].xyzz 239: MAD TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xyzz, TEMP[2].xyzz 240: MAD TEMP[0].xyz, TEMP[7].xyzz, TEMP[4].xyzz, TEMP[1].xyzz 241: MOV TEMP[0].xyz, TEMP[0].xyzx 242: MAD TEMP[1].x, IN[6].xxxx, CONST[5].zzzz, CONST[5].wwww 243: MOV_SAT TEMP[1].x, TEMP[1].xxxx 244: LRP TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[4].xyzz 245: MOV TEMP[0].xyz, TEMP[0].xyzx 246: MOV TEMP[0].w, IMM[0].zzzz 247: MOV OUT[0], TEMP[0] 248: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 172) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200) %57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208) %58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212) %59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216) %60 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224) %61 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228) %62 = call float @llvm.SI.load.const(<16 x i8> %23, i32 232) %63 = call float @llvm.SI.load.const(<16 x i8> %23, i32 236) %64 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240) %65 = call float @llvm.SI.load.const(<16 x i8> %23, i32 244) %66 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256) %67 = call float @llvm.SI.load.const(<16 x i8> %23, i32 260) %68 = call float @llvm.SI.load.const(<16 x i8> %23, i32 264) %69 = call float @llvm.SI.load.const(<16 x i8> %23, i32 268) %70 = call float @llvm.SI.load.const(<16 x i8> %23, i32 272) %71 = call float @llvm.SI.load.const(<16 x i8> %23, i32 276) %72 = call float @llvm.SI.load.const(<16 x i8> %23, i32 280) %73 = call float @llvm.SI.load.const(<16 x i8> %23, i32 288) %74 = call float @llvm.SI.load.const(<16 x i8> %23, i32 292) %75 = call float @llvm.SI.load.const(<16 x i8> %23, i32 300) %76 = call float @llvm.SI.load.const(<16 x i8> %23, i32 304) %77 = call float @llvm.SI.load.const(<16 x i8> %23, i32 308) %78 = call float @llvm.SI.load.const(<16 x i8> %23, i32 312) %79 = call float @llvm.SI.load.const(<16 x i8> %23, i32 352) %80 = call float @llvm.SI.load.const(<16 x i8> %23, i32 368) %81 = call float @llvm.SI.load.const(<16 x i8> %23, i32 384) %82 = call float @llvm.SI.load.const(<16 x i8> %23, i32 416) %83 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %84 = load <32 x i8>, <32 x i8> addrspace(2)* %83, align 32, !tbaa !0 %85 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %86 = load <16 x i8>, <16 x i8> addrspace(2)* %85, align 16, !tbaa !0 %87 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %88 = bitcast <8 x i32> addrspace(2)* %87 to <32 x i8> addrspace(2)* %89 = load <32 x i8>, <32 x i8> addrspace(2)* %88, align 32, !tbaa !0 %90 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %91 = bitcast <4 x i32> addrspace(2)* %90 to <16 x i8> addrspace(2)* %92 = load <16 x i8>, <16 x i8> addrspace(2)* %91, align 16, !tbaa !0 %93 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %94 = bitcast <8 x i32> addrspace(2)* %93 to <32 x i8> addrspace(2)* %95 = load <32 x i8>, <32 x i8> addrspace(2)* %94, align 32, !tbaa !0 %96 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %97 = bitcast <4 x i32> addrspace(2)* %96 to <16 x i8> addrspace(2)* %98 = load <16 x i8>, <16 x i8> addrspace(2)* %97, align 16, !tbaa !0 %99 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %100 = bitcast <8 x i32> addrspace(2)* %99 to <32 x i8> addrspace(2)* %101 = load <32 x i8>, <32 x i8> addrspace(2)* %100, align 32, !tbaa !0 %102 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %103 = bitcast <4 x i32> addrspace(2)* %102 to <16 x i8> addrspace(2)* %104 = load <16 x i8>, <16 x i8> addrspace(2)* %103, align 16, !tbaa !0 %105 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %106 = bitcast <8 x i32> addrspace(2)* %105 to <32 x i8> addrspace(2)* %107 = load <32 x i8>, <32 x i8> addrspace(2)* %106, align 32, !tbaa !0 %108 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %109 = bitcast <4 x i32> addrspace(2)* %108 to <16 x i8> addrspace(2)* %110 = load <16 x i8>, <16 x i8> addrspace(2)* %109, align 16, !tbaa !0 %111 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5 %112 = bitcast <8 x i32> addrspace(2)* %111 to <32 x i8> addrspace(2)* %113 = load <32 x i8>, <32 x i8> addrspace(2)* %112, align 32, !tbaa !0 %114 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5 %115 = bitcast <4 x i32> addrspace(2)* %114 to <16 x i8> addrspace(2)* %116 = load <16 x i8>, <16 x i8> addrspace(2)* %115, align 16, !tbaa !0 %117 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %118 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %119 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %120 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %121 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %122 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %123 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %124 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %125 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %126 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %127 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %128 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %129 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %130 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %131 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7) %132 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %133 = call float @llvm.SI.fs.interp(i32 3, i32 5, i32 %5, <2 x i32> %7) %134 = call float @llvm.SI.fs.interp(i32 0, i32 6, i32 %5, <2 x i32> %7) %135 = call float @llvm.SI.fs.interp(i32 1, i32 6, i32 %5, <2 x i32> %7) %136 = call float @llvm.SI.fs.interp(i32 2, i32 6, i32 %5, <2 x i32> %7) %137 = call float @llvm.SI.fs.interp(i32 3, i32 6, i32 %5, <2 x i32> %7) %138 = call float @llvm.SI.fs.interp(i32 0, i32 7, i32 %5, <2 x i32> %7) %139 = call float @llvm.SI.fs.interp(i32 1, i32 7, i32 %5, <2 x i32> %7) %140 = call float @llvm.SI.fs.interp(i32 2, i32 7, i32 %5, <2 x i32> %7) %141 = bitcast float %117 to i32 %142 = bitcast float %118 to i32 %143 = insertelement <2 x i32> undef, i32 %141, i32 0 %144 = insertelement <2 x i32> %143, i32 %142, i32 1 %145 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %144, <32 x i8> %101, <16 x i8> %104, i32 2) %146 = extractelement <4 x float> %145, i32 1 %147 = extractelement <4 x float> %145, i32 3 %148 = fmul float %147, 2.000000e+00 %149 = fadd float %148, -1.000000e+00 %150 = fmul float %146, 2.000000e+00 %151 = fadd float %150, -1.000000e+00 %152 = fmul float %149, %79 %153 = fmul float %151, %79 %154 = fmul float %152, %152 %155 = fmul float %153, %153 %156 = fadd float %154, %155 %157 = call float @llvm.AMDIL.clamp.(float %156, float 0.000000e+00, float 1.000000e+00) %158 = fsub float 1.000000e+00, %157 %159 = call float @llvm.sqrt.f32(float %158) %160 = fmul float %152, %119 %161 = fmul float %153, %122 %162 = fadd float %161, %160 %163 = fmul float %159, %125 %164 = fadd float %162, %163 %165 = fmul float %152, %120 %166 = fmul float %153, %123 %167 = fadd float %166, %165 %168 = fmul float %159, %126 %169 = fadd float %167, %168 %170 = fmul float %152, %121 %171 = fmul float %153, %124 %172 = fadd float %171, %170 %173 = fmul float %159, %127 %174 = fadd float %172, %173 %175 = fmul float %164, %164 %176 = fmul float %169, %169 %177 = fadd float %176, %175 %178 = fmul float %174, %174 %179 = fadd float %177, %178 %180 = call float @llvm.AMDGPU.rsq.clamped.f32(float %179) %181 = fmul float %164, %180 %182 = fmul float %169, %180 %183 = fmul float %174, %180 %184 = fmul float %135, %135 %185 = fmul float %136, %136 %186 = fadd float %185, %184 %187 = fmul float %137, %137 %188 = fadd float %186, %187 %189 = call float @llvm.AMDGPU.rsq.clamped.f32(float %188) %190 = fmul float %135, %189 %191 = fmul float %136, %189 %192 = fmul float %137, %189 %193 = bitcast float %117 to i32 %194 = bitcast float %118 to i32 %195 = insertelement <2 x i32> undef, i32 %193, i32 0 %196 = insertelement <2 x i32> %195, i32 %194, i32 1 %197 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %196, <32 x i8> %95, <16 x i8> %98, i32 2) %198 = extractelement <4 x float> %197, i32 0 %199 = extractelement <4 x float> %197, i32 1 %200 = extractelement <4 x float> %197, i32 2 %201 = fmul float %76, %198 %202 = fmul float %77, %199 %203 = fmul float %78, %200 %204 = call float @llvm.AMDGPU.lrp(float %80, float %201, float %66) %205 = call float @llvm.AMDGPU.lrp(float %80, float %202, float %67) %206 = call float @llvm.AMDGPU.lrp(float %80, float %203, float %68) %207 = fmul float %80, %69 %208 = fsub float %69, %207 %209 = fmul float %201, %208 %210 = fmul float %202, %208 %211 = fmul float %203, %208 %212 = bitcast float %117 to i32 %213 = bitcast float %118 to i32 %214 = insertelement <2 x i32> undef, i32 %212, i32 0 %215 = insertelement <2 x i32> %214, i32 %213, i32 1 %216 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %215, <32 x i8> %107, <16 x i8> %110, i32 2) %217 = extractelement <4 x float> %216, i32 1 %218 = fsub float 1.000000e+00, %82 %219 = fmul float %217, %82 %220 = fadd float %219, %218 %221 = fmul float %181, %24 %222 = fmul float %182, %25 %223 = fadd float %222, %221 %224 = fmul float %183, %26 %225 = fadd float %223, %224 %226 = call float @llvm.maxnum.f32(float %225, float 0.000000e+00) %227 = fmul float %27, %181 %228 = fmul float %28, %182 %229 = fadd float %227, %228 %230 = fmul float %29, %183 %231 = fadd float %229, %230 %232 = fadd float %231, %30 %233 = fmul float %31, %181 %234 = fmul float %32, %182 %235 = fadd float %233, %234 %236 = fmul float %33, %183 %237 = fadd float %235, %236 %238 = fadd float %237, %34 %239 = fmul float %35, %181 %240 = fmul float %36, %182 %241 = fadd float %239, %240 %242 = fmul float %37, %183 %243 = fadd float %241, %242 %244 = fadd float %243, %38 %245 = fadd float %128, %232 %246 = fadd float %129, %238 %247 = fadd float %130, %244 %248 = fdiv float %131, %133 %249 = fdiv float %132, %133 %250 = bitcast float %248 to i32 %251 = bitcast float %249 to i32 %252 = insertelement <2 x i32> undef, i32 %250, i32 0 %253 = insertelement <2 x i32> %252, i32 %251, i32 1 %254 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %253, <32 x i8> %113, <16 x i8> %116, i32 2) %255 = extractelement <4 x float> %254, i32 0 %256 = fmul float %70, %255 %257 = fmul float %71, %255 %258 = fmul float %72, %255 %259 = fmul float %245, %220 %260 = fmul float %246, %220 %261 = fmul float %247, %220 %262 = fmul float %181, %190 %263 = fmul float %182, %191 %264 = fadd float %263, %262 %265 = fmul float %183, %192 %266 = fadd float %264, %265 %267 = fmul float %266, %181 %268 = fmul float %266, %182 %269 = fmul float %266, %183 %270 = fmul float %267, 2.000000e+00 %271 = fmul float %268, 2.000000e+00 %272 = fmul float %269, 2.000000e+00 %273 = fsub float %190, %270 %274 = fsub float %191, %271 %275 = fsub float %192, %272 %276 = fcmp ogt float %51, 0.000000e+00 br i1 %276, label %IF, label %ENDIF IF: ; preds = %main_body %277 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %278 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %279 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %280 = fmul float %273, %273 %281 = fmul float %274, %274 %282 = fadd float %281, %280 %283 = fmul float %275, %275 %284 = fadd float %282, %283 %285 = call float @llvm.AMDGPU.rsq.clamped.f32(float %284) %286 = fmul float %273, %285 %287 = fmul float %274, %285 %288 = fmul float %275, %285 %289 = fsub float %44, %138 %290 = fsub float %45, %139 %291 = fsub float %46, %140 %292 = fdiv float 1.000000e+00, %286 %293 = fdiv float 1.000000e+00, %287 %294 = fdiv float 1.000000e+00, %288 %295 = fmul float %289, %292 %296 = fmul float %290, %293 %297 = fmul float %291, %294 %298 = fsub float %47, %138 %299 = fsub float %48, %139 %300 = fsub float %49, %140 %301 = fdiv float 1.000000e+00, %286 %302 = fdiv float 1.000000e+00, %287 %303 = fdiv float 1.000000e+00, %288 %304 = fmul float %298, %301 %305 = fmul float %299, %302 %306 = fmul float %300, %303 %307 = fcmp ogt float %286, 0.000000e+00 %308 = fcmp ogt float %287, 0.000000e+00 %309 = fcmp ogt float %288, 0.000000e+00 %. = select i1 %307, float %295, float %304 %temp68.0 = select i1 %308, float %296, float %305 %.100 = select i1 %309, float %297, float %306 %310 = fadd float %44, %47 %311 = fadd float %45, %48 %312 = fadd float %46, %49 %313 = fmul float %310, 5.000000e-01 %314 = fmul float %311, 5.000000e-01 %315 = fmul float %312, 5.000000e-01 %316 = call float @llvm.minnum.f32(float %., float %temp68.0) %317 = call float @llvm.minnum.f32(float %316, float %.100) %318 = fsub float %313, %279 %319 = fsub float %314, %278 %320 = fsub float %315, %277 %321 = fadd float %318, %138 %322 = fadd float %319, %139 %323 = fadd float %320, %140 %324 = fmul float %286, %317 %325 = fadd float %324, %321 %326 = fmul float %287, %317 %327 = fadd float %326, %322 %328 = fmul float %288, %317 %329 = fadd float %328, %323 %330 = fsub float %325, %313 %331 = fsub float %327, %314 %332 = fsub float %329, %315 br label %ENDIF ENDIF: ; preds = %main_body, %IF %temp44.0 = phi float [ %330, %IF ], [ %273, %main_body ] %temp45.0 = phi float [ %331, %IF ], [ %274, %main_body ] %temp46.0 = phi float [ %332, %IF ], [ %275, %main_body ] %333 = fsub float 1.000000e+00, %81 %334 = call float @llvm.pow.f32(float %333, float 7.500000e-01) %335 = fmul float %334, 7.000000e+00 %336 = insertelement <4 x float> undef, float %temp44.0, i32 0 %337 = insertelement <4 x float> %336, float %temp45.0, i32 1 %338 = insertelement <4 x float> %337, float %temp46.0, i32 2 %339 = insertelement <4 x float> %338, float %335, i32 3 %340 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %339) %341 = extractelement <4 x float> %340, i32 0 %342 = extractelement <4 x float> %340, i32 1 %343 = extractelement <4 x float> %340, i32 2 %344 = extractelement <4 x float> %340, i32 3 %345 = call float @llvm.fabs.f32(float %343) %346 = fdiv float 1.000000e+00, %345 %347 = fmul float %341, %346 %348 = fadd float %347, 1.500000e+00 %349 = fmul float %342, %346 %350 = fadd float %349, 1.500000e+00 %351 = bitcast float %350 to i32 %352 = bitcast float %348 to i32 %353 = bitcast float %344 to i32 %354 = bitcast float %335 to i32 %355 = insertelement <4 x i32> undef, i32 %351, i32 0 %356 = insertelement <4 x i32> %355, i32 %352, i32 1 %357 = insertelement <4 x i32> %356, i32 %353, i32 2 %358 = insertelement <4 x i32> %357, i32 %354, i32 3 %359 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %358, <32 x i8> %84, <16 x i8> %86, i32 4) %360 = extractelement <4 x float> %359, i32 0 %361 = extractelement <4 x float> %359, i32 1 %362 = extractelement <4 x float> %359, i32 2 %363 = extractelement <4 x float> %359, i32 3 %364 = call float @llvm.pow.f32(float %363, float %53) %365 = fmul float %52, %364 %366 = fmul float %365, %360 %367 = fmul float %365, %361 %368 = fmul float %365, %362 %369 = fcmp olt float %50, 0x3FEFFFEB00000000 br i1 %369, label %IF86, label %ENDIF85 IF86: ; preds = %ENDIF %370 = fcmp ogt float %63, 0.000000e+00 br i1 %370, label %IF89, label %ENDIF88 ENDIF85: ; preds = %ENDIF, %ENDIF88 %temp28.0 = phi float [ %598, %ENDIF88 ], [ %366, %ENDIF ] %temp29.0 = phi float [ %599, %ENDIF88 ], [ %367, %ENDIF ] %temp30.0 = phi float [ %600, %ENDIF88 ], [ %368, %ENDIF ] %371 = fmul float %temp28.0, %220 %372 = fmul float %temp29.0, %220 %373 = fmul float %temp30.0, %220 %374 = fsub float 1.000000e+00, %81 %375 = fsub float %24, %190 %376 = fsub float %25, %191 %377 = fsub float %26, %192 %378 = fmul float %375, %375 %379 = fmul float %376, %376 %380 = fadd float %379, %378 %381 = fmul float %377, %377 %382 = fadd float %380, %381 %383 = call float @llvm.AMDGPU.rsq.clamped.f32(float %382) %384 = fmul float %375, %383 %385 = fmul float %376, %383 %386 = fmul float %377, %383 %387 = fmul float %190, %181 %388 = fsub float -0.000000e+00, %387 %389 = fmul float %191, %182 %390 = fsub float %388, %389 %391 = fmul float %192, %183 %392 = fsub float %390, %391 %393 = call float @llvm.maxnum.f32(float %392, float 0.000000e+00) %394 = fmul float %24, %384 %395 = fmul float %25, %385 %396 = fadd float %395, %394 %397 = fmul float %26, %386 %398 = fadd float %396, %397 %399 = call float @llvm.maxnum.f32(float %398, float 0.000000e+00) %400 = fmul float %374, %374 %401 = fmul float %400, %75 %402 = fsub float 1.000000e+00, %374 %403 = fmul float %402, 0x3FEEF9DB20000000 %404 = fadd float %403, 0x3F9EB851E0000000 %405 = call float @llvm.log2.f32(float %404) %406 = fdiv float 1.000000e+00, %405 %407 = fmul float %406, 1.000000e+01 %408 = fmul float %407, %407 %409 = fsub float 1.000000e+00, %226 %410 = fsub float 1.000000e+00, %393 %411 = fmul float %399, 2.000000e+00 %412 = fmul float %399, %374 %413 = fmul float %411, %412 %414 = fadd float %413, 5.000000e-01 %415 = fsub float 1.000000e+00, %399 %416 = fsub float 1.000000e+00, %393 %417 = fsub float 1.000000e+00, %208 %418 = fadd float %81, %417 %419 = call float @llvm.AMDIL.clamp.(float %418, float 0.000000e+00, float 1.000000e+00) %420 = fmul float %416, %416 %421 = fmul float %416, %416 %422 = fmul float %421, %416 %423 = fmul float %420, %422 %424 = call float @llvm.AMDGPU.lrp(float %423, float %419, float %204) %425 = call float @llvm.AMDGPU.lrp(float %423, float %419, float %205) %426 = call float @llvm.AMDGPU.lrp(float %423, float %419, float %206) %427 = call float @llvm.AMDGPU.lrp(float %226, float 1.000000e+00, float %401) %428 = call float @llvm.AMDGPU.lrp(float %393, float 1.000000e+00, float %401) %429 = fmul float %427, %428 %430 = fadd float %429, 0x3F1A36E2E0000000 %431 = fdiv float 1.000000e+00, %430 %432 = fmul float %181, %384 %433 = fmul float %182, %385 %434 = fadd float %433, %432 %435 = fmul float %183, %386 %436 = fadd float %434, %435 %437 = call float @llvm.maxnum.f32(float %436, float 0.000000e+00) %438 = call float @llvm.pow.f32(float %437, float %408) %439 = fadd float %408, 1.000000e+00 %440 = fmul float %439, %74 %441 = fmul float %438, %440 %442 = fmul float %431, %441 %443 = fmul float %442, %226 %444 = fmul float %443, %73 %445 = call float @llvm.maxnum.f32(float %444, float 0.000000e+00) %446 = fmul float %445, %256 %447 = fmul float %445, %257 %448 = fmul float %445, %258 %449 = fsub float 1.000000e+00, %204 %450 = fsub float 1.000000e+00, %205 %451 = fsub float 1.000000e+00, %206 %452 = fmul float %415, %415 %453 = fmul float %415, %415 %454 = fmul float %453, %415 %455 = fmul float %452, %454 %456 = fmul float %449, %455 %457 = fadd float %456, %204 %458 = fmul float %450, %455 %459 = fadd float %458, %205 %460 = fmul float %451, %455 %461 = fadd float %460, %206 %462 = fadd float %414, -1.000000e+00 %463 = fmul float %409, %409 %464 = fmul float %409, %409 %465 = fmul float %464, %409 %466 = fmul float %463, %465 %467 = fmul float %462, %466 %468 = fadd float %467, 1.000000e+00 %469 = fadd float %414, -1.000000e+00 %470 = fmul float %410, %410 %471 = fmul float %410, %410 %472 = fmul float %471, %410 %473 = fmul float %470, %472 %474 = fmul float %469, %473 %475 = fadd float %474, 1.000000e+00 %476 = fmul float %468, %475 %477 = fmul float %476, %226 %478 = fmul float %256, %477 %479 = fadd float %478, %259 %480 = fmul float %257, %477 %481 = fadd float %480, %260 %482 = fmul float %258, %477 %483 = fadd float %482, %261 %484 = fmul float %209, %479 %485 = fmul float %210, %481 %486 = fmul float %211, %483 %487 = fmul float %446, %457 %488 = fadd float %487, %484 %489 = fmul float %447, %459 %490 = fadd float %489, %485 %491 = fmul float %448, %461 %492 = fadd float %491, %486 %493 = fmul float %371, %424 %494 = fadd float %493, %488 %495 = fmul float %372, %425 %496 = fadd float %495, %490 %497 = fmul float %373, %426 %498 = fadd float %497, %492 %499 = fmul float %134, %42 %500 = fadd float %499, %43 %501 = call float @llvm.AMDIL.clamp.(float %500, float 0.000000e+00, float 1.000000e+00) %502 = call float @llvm.AMDGPU.lrp(float %501, float %494, float %39) %503 = call float @llvm.AMDGPU.lrp(float %501, float %496, float %40) %504 = call float @llvm.AMDGPU.lrp(float %501, float %498, float %41) %505 = call i32 @llvm.SI.packf16(float %502, float %503) %506 = bitcast i32 %505 to float %507 = call i32 @llvm.SI.packf16(float %504, float 1.000000e+00) %508 = bitcast i32 %507 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %506, float %508, float %506, float %508) ret void IF89: ; preds = %IF86 %509 = fmul float %273, %273 %510 = fmul float %274, %274 %511 = fadd float %510, %509 %512 = fmul float %275, %275 %513 = fadd float %511, %512 %514 = call float @llvm.AMDGPU.rsq.clamped.f32(float %513) %515 = fmul float %273, %514 %516 = fmul float %274, %514 %517 = fmul float %275, %514 %518 = fsub float %54, %138 %519 = fsub float %55, %139 %520 = fsub float %56, %140 %521 = fdiv float 1.000000e+00, %515 %522 = fdiv float 1.000000e+00, %516 %523 = fdiv float 1.000000e+00, %517 %524 = fmul float %518, %521 %525 = fmul float %519, %522 %526 = fmul float %520, %523 %527 = fsub float %57, %138 %528 = fsub float %58, %139 %529 = fsub float %59, %140 %530 = fdiv float 1.000000e+00, %515 %531 = fdiv float 1.000000e+00, %516 %532 = fdiv float 1.000000e+00, %517 %533 = fmul float %527, %530 %534 = fmul float %528, %531 %535 = fmul float %529, %532 %536 = fcmp ogt float %515, 0.000000e+00 %537 = fcmp ogt float %516, 0.000000e+00 %538 = fcmp ogt float %517, 0.000000e+00 %.101 = select i1 %536, float %524, float %533 %temp68.1 = select i1 %537, float %525, float %534 %.102 = select i1 %538, float %526, float %535 %539 = fadd float %54, %57 %540 = fadd float %55, %58 %541 = fadd float %56, %59 %542 = fmul float %539, 5.000000e-01 %543 = fmul float %540, 5.000000e-01 %544 = fmul float %541, 5.000000e-01 %545 = call float @llvm.minnum.f32(float %.101, float %temp68.1) %546 = call float @llvm.minnum.f32(float %545, float %.102) %547 = fsub float %542, %60 %548 = fsub float %543, %61 %549 = fsub float %544, %62 %550 = fadd float %547, %138 %551 = fadd float %548, %139 %552 = fadd float %549, %140 %553 = fmul float %515, %546 %554 = fadd float %553, %550 %555 = fmul float %516, %546 %556 = fadd float %555, %551 %557 = fmul float %517, %546 %558 = fadd float %557, %552 %559 = fsub float %554, %542 %560 = fsub float %556, %543 %561 = fsub float %558, %544 br label %ENDIF88 ENDIF88: ; preds = %IF86, %IF89 %temp48.0 = phi float [ %559, %IF89 ], [ %273, %IF86 ] %temp49.0 = phi float [ %560, %IF89 ], [ %274, %IF86 ] %temp50.0 = phi float [ %561, %IF89 ], [ %275, %IF86 ] %562 = fsub float 1.000000e+00, %81 %563 = call float @llvm.pow.f32(float %562, float 7.500000e-01) %564 = fmul float %563, 7.000000e+00 %565 = insertelement <4 x float> undef, float %temp48.0, i32 0 %566 = insertelement <4 x float> %565, float %temp49.0, i32 1 %567 = insertelement <4 x float> %566, float %temp50.0, i32 2 %568 = insertelement <4 x float> %567, float %564, i32 3 %569 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %568) %570 = extractelement <4 x float> %569, i32 0 %571 = extractelement <4 x float> %569, i32 1 %572 = extractelement <4 x float> %569, i32 2 %573 = extractelement <4 x float> %569, i32 3 %574 = call float @llvm.fabs.f32(float %572) %575 = fdiv float 1.000000e+00, %574 %576 = fmul float %570, %575 %577 = fadd float %576, 1.500000e+00 %578 = fmul float %571, %575 %579 = fadd float %578, 1.500000e+00 %580 = bitcast float %579 to i32 %581 = bitcast float %577 to i32 %582 = bitcast float %573 to i32 %583 = bitcast float %564 to i32 %584 = insertelement <4 x i32> undef, i32 %580, i32 0 %585 = insertelement <4 x i32> %584, i32 %581, i32 1 %586 = insertelement <4 x i32> %585, i32 %582, i32 2 %587 = insertelement <4 x i32> %586, i32 %583, i32 3 %588 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %587, <32 x i8> %89, <16 x i8> %92, i32 4) %589 = extractelement <4 x float> %588, i32 0 %590 = extractelement <4 x float> %588, i32 1 %591 = extractelement <4 x float> %588, i32 2 %592 = extractelement <4 x float> %588, i32 3 %593 = call float @llvm.pow.f32(float %592, float %65) %594 = fmul float %64, %593 %595 = fmul float %594, %589 %596 = fmul float %594, %590 %597 = fmul float %594, %591 %598 = call float @llvm.AMDGPU.lrp(float %50, float %366, float %595) %599 = call float @llvm.AMDGPU.lrp(float %50, float %367, float %596) %600 = call float @llvm.AMDGPU.lrp(float %50, float %368, float %597) br label %ENDIF85 } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[12:15], s[2:3], 0x0 ; C0860300 s_load_dwordx4 s[40:43], s[4:5], 0x8 ; C0940508 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v16, v0, 0, 0, [m0] ; C8400000 v_interp_p2_f32 v16, [v16], v1, 0, 0, [m0] ; C8410001 v_interp_p1_f32 v17, v0, 1, 0, [m0] ; C8440100 v_interp_p2_f32 v17, [v17], v1, 1, 0, [m0] ; C8450101 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s2, s[12:15], 0x58 ; C2010D58 s_buffer_load_dword s1, s[12:15], 0x5c ; C2008D5C s_buffer_load_dword s0, s[12:15], 0x60 ; C2000D60 v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600 v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601 v_interp_p1_f32 v9, v0, 0, 2, [m0] ; C8240800 v_interp_p2_f32 v9, [v9], v1, 0, 2, [m0] ; C8250801 v_interp_p1_f32 v10, v0, 1, 2, [m0] ; C8280900 v_interp_p2_f32 v10, [v10], v1, 1, 2, [m0] ; C8290901 v_interp_p1_f32 v11, v0, 2, 2, [m0] ; C82C0A00 v_interp_p2_f32 v11, [v11], v1, 2, 2, [m0] ; C82D0A01 v_interp_p1_f32 v13, v0, 0, 3, [m0] ; C8340C00 v_interp_p2_f32 v13, [v13], v1, 0, 3, [m0] ; C8350C01 v_interp_p1_f32 v18, v0, 1, 3, [m0] ; C8480D00 v_interp_p2_f32 v18, [v18], v1, 1, 3, [m0] ; C8490D01 v_interp_p1_f32 v19, v0, 2, 3, [m0] ; C84C0E00 v_interp_p2_f32 v19, [v19], v1, 2, 3, [m0] ; C84D0E01 v_interp_p1_f32 v2, v0, 0, 4, [m0] ; C8081000 v_interp_p2_f32 v2, [v2], v1, 0, 4, [m0] ; C8091001 v_interp_p1_f32 v3, v0, 1, 4, [m0] ; C80C1100 v_interp_p2_f32 v3, [v3], v1, 1, 4, [m0] ; C80D1101 v_interp_p1_f32 v5, v0, 2, 4, [m0] ; C8141200 v_interp_p2_f32 v5, [v5], v1, 2, 4, [m0] ; C8151201 v_interp_p1_f32 v20, v0, 0, 5, [m0] ; C8501400 v_interp_p2_f32 v20, [v20], v1, 0, 5, [m0] ; C8511401 v_interp_p1_f32 v21, v0, 1, 5, [m0] ; C8541500 v_interp_p2_f32 v21, [v21], v1, 1, 5, [m0] ; C8551501 v_interp_p1_f32 v22, v0, 3, 5, [m0] ; C8581700 v_interp_p2_f32 v22, [v22], v1, 3, 5, [m0] ; C8591701 v_interp_p1_f32 v4, v0, 0, 6, [m0] ; C8101800 v_interp_p2_f32 v4, [v4], v1, 0, 6, [m0] ; C8111801 v_interp_p1_f32 v23, v0, 1, 6, [m0] ; C85C1900 v_interp_p2_f32 v23, [v23], v1, 1, 6, [m0] ; C85D1901 v_interp_p1_f32 v24, v0, 2, 6, [m0] ; C8601A00 v_interp_p2_f32 v24, [v24], v1, 2, 6, [m0] ; C8611A01 s_load_dwordx4 s[8:11], s[4:5], 0xc ; C084050C s_load_dwordx4 s[16:19], s[4:5], 0x10 ; C0880510 s_load_dwordx4 s[20:23], s[4:5], 0x14 ; C08A0514 s_load_dwordx8 s[44:51], s[6:7], 0x10 ; C0D60710 v_interp_p1_f32 v25, v0, 3, 6, [m0] ; C8641B00 v_interp_p2_f32 v25, [v25], v1, 3, 6, [m0] ; C8651B01 v_interp_p1_f32 v14, v0, 0, 7, [m0] ; C8381C00 v_interp_p2_f32 v14, [v14], v1, 0, 7, [m0] ; C8391C01 v_interp_p1_f32 v12, v0, 1, 7, [m0] ; C8301D00 v_interp_p2_f32 v12, [v12], v1, 1, 7, [m0] ; C8311D01 v_interp_p1_f32 v15, v0, 2, 7, [m0] ; C83C1E00 v_interp_p2_f32 v15, [v15], v1, 2, 7, [m0] ; C83D1E01 s_load_dwordx8 s[52:59], s[6:7], 0x18 ; C0DA0718 s_load_dwordx8 s[24:31], s[6:7], 0x20 ; C0CC0720 s_load_dwordx8 s[32:39], s[6:7], 0x28 ; C0D00728 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:1], 10, 0, 0, 0, 0, 0, 0, 0, v[16:17], s[52:59], s[8:11] ; F0800A00 004D0010 image_sample v[30:32], 7, 0, 0, 0, 0, 0, 0, 0, v[16:17], s[44:51], s[40:43] ; F0800700 014B1E10 s_waitcnt vmcnt(1) ; BF8C0771 v_mad_f32 v1, 2.0, v1, -1.0 ; D2820001 03CE02F4 v_mad_f32 v0, 2.0, v0, -1.0 ; D2820000 03CE00F4 v_mul_f32_e32 v1, s2, v1 ; 10020202 v_mul_f32_e32 v0, s2, v0 ; 10000002 v_mul_f32_e32 v6, v6, v1 ; 100C0306 v_mac_f32_e32 v6, v9, v0 ; 3E0C0109 v_mul_f32_e32 v7, v7, v1 ; 100E0307 v_mac_f32_e32 v7, v10, v0 ; 3E0E010A v_mul_f32_e32 v10, v8, v1 ; 10140308 v_mac_f32_e32 v10, v11, v0 ; 3E14010B v_mul_f32_e32 v0, v0, v0 ; 10000100 v_mac_f32_e32 v0, v1, v1 ; 3E000301 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_sub_f32_e32 v0, 1.0, v0 ; 080000F2 v_sqrt_f32_e32 v0, v0 ; 7E006700 v_mac_f32_e32 v6, v13, v0 ; 3E0C010D v_mac_f32_e32 v7, v18, v0 ; 3E0E0112 v_mac_f32_e32 v10, v19, v0 ; 3E140113 v_mul_f32_e32 v0, v6, v6 ; 10000D06 v_mac_f32_e32 v0, v7, v7 ; 3E000F07 v_mac_f32_e32 v0, v10, v10 ; 3E00150A v_rsq_clamp_f32_e32 v0, v0 ; 7E005900 v_mul_f32_e32 v1, v23, v23 ; 10022F17 v_mac_f32_e32 v1, v24, v24 ; 3E023118 v_mac_f32_e32 v1, v25, v25 ; 3E023319 v_rsq_clamp_f32_e32 v1, v1 ; 7E025901 v_mul_f32_e32 v9, v0, v6 ; 10120D00 v_mul_f32_e32 v8, v0, v7 ; 10100F00 v_mul_f32_e32 v7, v0, v10 ; 100E1500 v_mul_f32_e32 v11, v1, v23 ; 10162F01 v_mul_f32_e32 v10, v1, v24 ; 10143101 v_mul_f32_e32 v0, v11, v9 ; 1000130B v_mac_f32_e32 v0, v10, v8 ; 3E00110A v_mul_f32_e32 v13, v1, v25 ; 101A3301 v_mac_f32_e32 v0, v13, v7 ; 3E000F0D v_mul_f32_e32 v6, v9, v0 ; 100C0109 v_mac_f32_e32 v6, v9, v0 ; 3E0C0109 v_mul_f32_e32 v18, v8, v0 ; 10240108 v_mac_f32_e32 v18, v8, v0 ; 3E240108 v_mad_f32 v27, v23, v1, -v6 ; D282001B 841A0317 v_mad_f32 v28, v24, v1, -v18 ; D282001C 844A0318 v_mul_f32_e32 v6, v7, v0 ; 100C0107 v_mac_f32_e32 v6, v7, v0 ; 3E0C0107 s_buffer_load_dword s2, s[12:15], 0x4c ; C2010D4C s_buffer_load_dword s3, s[12:15], 0x4d ; C2018D4D s_buffer_load_dword s8, s[12:15], 0x4e ; C2040D4E v_mad_f32 v29, v25, v1, -v6 ; D282001D 841A0319 v_mov_b32_e32 v0, 0x6f800000 ; 7E0002FF 6F800000 v_cmp_gt_f32_e64 vcc, |v22|, v0 ; D008016A 00020116 v_mov_b32_e32 v0, 0x2f800000 ; 7E0002FF 2F800000 v_cndmask_b32_e32 v0, 1.0, v0 ; 000000F2 v_mul_f32_e32 v1, v0, v22 ; 10022D00 v_rcp_f32_e32 v1, v1 ; 7E025501 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v24, s2, v30 ; 10303C02 v_mul_f32_e32 v25, s3, v31 ; 10323E03 v_mul_f32_e32 v26, s8, v32 ; 10344008 v_mul_f32_e32 v6, v1, v20 ; 100C2901 v_mul_f32_e32 v1, v1, v21 ; 10022B01 s_buffer_load_dword s2, s[12:15], 0x40 ; C2010D40 s_buffer_load_dword s3, s[12:15], 0x41 ; C2018D41 s_buffer_load_dword s8, s[12:15], 0x42 ; C2040D42 v_mul_f32_e32 v18, v6, v0 ; 10240106 v_mul_f32_e32 v19, v1, v0 ; 10260101 s_buffer_load_dword s9, s[12:15], 0x27 ; C2048D27 s_buffer_load_dword s10, s[12:15], 0x2b ; C2050D2B s_buffer_load_dword s40, s[12:15], 0x2c ; C2140D2C s_buffer_load_dword s41, s[12:15], 0x2d ; C2148D2D v_sub_f32_e64 v0, 1.0, s1 ; D2080000 000002F2 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s2, v0 ; 100C0002 v_mul_f32_e32 v1, s3, v0 ; 10020003 v_mul_f32_e32 v0, s8, v0 ; 10000008 v_mac_f32_e32 v6, s1, v24 ; 3E0C3001 v_mov_b32_e32 v30, v27 ; 7E3C031B v_mac_f32_e32 v1, s1, v25 ; 3E023201 v_mov_b32_e32 v31, v28 ; 7E3E031C v_mac_f32_e32 v0, s1, v26 ; 3E003401 v_mov_b32_e32 v32, v29 ; 7E40031D v_cmp_lt_f32_e64 s[2:3], 0, s10 ; D0020002 00001480 image_sample v[20:23], 15, 0, 0, 0, 0, 0, 0, 0, v[16:17], s[24:31], s[16:19] ; F0800F00 00861410 image_sample v[16:19], 15, 0, 0, 0, 0, 0, 0, 0, v[18:19], s[32:39], s[20:23] ; F0800F00 00A81012 s_waitcnt vmcnt(0) ; BF8C0770 s_and_saveexec_b64 s[18:19], s[2:3] ; BE922402 s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E s_cbranch_execz BB0_2 ; BF880000 s_buffer_load_dword s2, s[12:15], 0x20 ; C2010D20 s_buffer_load_dword s3, s[12:15], 0x21 ; C2018D21 s_buffer_load_dword s8, s[12:15], 0x22 ; C2040D22 s_buffer_load_dword s10, s[12:15], 0x24 ; C2050D24 s_buffer_load_dword s11, s[12:15], 0x25 ; C2058D25 v_mul_f32_e32 v17, v27, v27 ; 1022371B v_mac_f32_e32 v17, v28, v28 ; 3E22391C v_mac_f32_e32 v17, v29, v29 ; 3E223B1D v_rsq_clamp_f32_e32 v17, v17 ; 7E225911 s_buffer_load_dword s16, s[12:15], 0x26 ; C2080D26 s_buffer_load_dword s17, s[12:15], 0x28 ; C2088D28 s_buffer_load_dword s20, s[12:15], 0x29 ; C20A0D29 s_buffer_load_dword s21, s[12:15], 0x2a ; C20A8D2A v_mul_f32_e32 v18, v17, v27 ; 10243711 v_mul_f32_e32 v19, v17, v28 ; 10263911 v_mul_f32_e32 v17, v17, v29 ; 10223B11 v_rcp_f32_e32 v20, v18 ; 7E285512 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v22, s2, v14 ; 082C1C02 v_sub_f32_e32 v23, s3, v12 ; 082E1803 v_rcp_f32_e32 v30, v19 ; 7E3C5513 v_mul_f32_e32 v22, v20, v22 ; 102C2D14 v_sub_f32_e32 v31, s10, v14 ; 083E1C0A v_mul_f32_e32 v20, v20, v31 ; 10283F14 v_cmp_lt_f32_e32 vcc, 0, v18 ; 7C022480 v_cndmask_b32_e32 v20, v20, v22 ; 00282D14 v_rcp_f32_e32 v22, v17 ; 7E2C5511 v_mul_f32_e32 v23, v30, v23 ; 102E2F1E v_sub_f32_e32 v31, s11, v12 ; 083E180B v_mul_f32_e32 v30, v30, v31 ; 103C3F1E v_cmp_lt_f32_e32 vcc, 0, v19 ; 7C022680 v_cndmask_b32_e32 v23, v30, v23 ; 002E2F1E v_sub_f32_e32 v30, s8, v15 ; 083C1E08 v_mul_f32_e32 v30, v22, v30 ; 103C3D16 v_sub_f32_e32 v31, s16, v15 ; 083E1E10 v_mul_f32_e32 v22, v22, v31 ; 102C3F16 v_cmp_lt_f32_e32 vcc, 0, v17 ; 7C022280 v_cndmask_b32_e32 v22, v22, v30 ; 002C3D16 v_min3_f32 v20, v20, v23, v22 ; D2A20014 045A2F14 v_mov_b32_e32 v22, s10 ; 7E2C020A v_add_f32_e32 v22, s2, v22 ; 062C2C02 v_mov_b32_e32 v23, s11 ; 7E2E020B v_add_f32_e32 v23, s3, v23 ; 062E2E03 v_mov_b32_e32 v30, s16 ; 7E3C0210 v_add_f32_e32 v32, s8, v30 ; 06403C08 v_mad_f32 v30, 0.5, v22, -s17 ; D282001E 80462CF0 v_add_f32_e32 v30, v14, v30 ; 063C3D0E v_mac_f32_e32 v30, v20, v18 ; 3E3C2514 v_mad_f32 v18, 0.5, v23, -s20 ; D2820012 80522EF0 v_add_f32_e32 v18, v12, v18 ; 0624250C v_mac_f32_e32 v18, v20, v19 ; 3E242714 v_mad_f32 v19, 0.5, v32, -s21 ; D2820013 805640F0 v_add_f32_e32 v19, v15, v19 ; 0626270F v_mac_f32_e32 v19, v20, v17 ; 3E262314 v_mad_f32 v30, 0.5, -v22, v30 ; D282001E 447A2CF0 v_mad_f32 v31, 0.5, -v23, v18 ; D282001F 444A2EF0 v_mad_f32 v32, 0.5, -v32, v19 ; D2820020 444E40F0 s_or_b64 exec, exec, s[18:19] ; 88FE127E s_buffer_load_dword s28, s[12:15], 0x17 ; C20E0D17 s_buffer_load_dword s29, s[12:15], 0x43 ; C20E8D43 s_buffer_load_dword s27, s[12:15], 0x44 ; C20D8D44 s_buffer_load_dword s20, s[12:15], 0x45 ; C20A0D45 s_buffer_load_dword s17, s[12:15], 0x46 ; C2088D46 s_buffer_load_dword s3, s[12:15], 0x0 ; C2018D00 s_buffer_load_dword s8, s[12:15], 0x1 ; C2040D01 s_buffer_load_dword s2, s[12:15], 0x2 ; C2010D02 s_buffer_load_dword s10, s[12:15], 0x4 ; C2050D04 s_buffer_load_dword s11, s[12:15], 0x5 ; C2058D05 s_buffer_load_dword s16, s[12:15], 0x6 ; C2080D06 s_buffer_load_dword s18, s[12:15], 0x7 ; C2090D07 s_buffer_load_dword s19, s[12:15], 0x8 ; C2098D08 s_buffer_load_dword s21, s[12:15], 0x9 ; C20A8D09 s_buffer_load_dword s22, s[12:15], 0xa ; C20B0D0A s_buffer_load_dword s23, s[12:15], 0xb ; C20B8D0B s_buffer_load_dword s24, s[12:15], 0xc ; C20C0D0C s_buffer_load_dword s25, s[12:15], 0xd ; C20C8D0D s_buffer_load_dword s26, s[12:15], 0xe ; C20D0D0E v_sub_f32_e64 v17, 1.0, s0 ; D2080011 000000F2 v_log_f32_e32 v17, v17 ; 7E224F11 v_mul_legacy_f32_e32 v17, 0x3f400000, v17 ; 0E2222FF 3F400000 v_exp_f32_e32 v17, v17 ; 7E224B11 v_mul_f32_e32 v33, 0x40e00000, v17 ; 104222FF 40E00000 v_cubeid_f32 v20, v30, v31, v32 ; D2880014 04823F1E v_cubema_f32 v19, v30, v31, v32 ; D28E0013 04823F1E s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500 s_load_dwordx8 s[44:51], s[6:7], 0x0 ; C0D60700 v_cubesc_f32 v18, v30, v31, v32 ; D28A0012 04823F1E v_cubetc_f32 v17, v30, v31, v32 ; D28C0011 04823F1E v_rcp_f32_e64 v19, |v19| ; D3540113 00000113 v_mov_b32_e32 v30, 0x3fc00000 ; 7E3C02FF 3FC00000 v_mad_f32 v31, v19, v17, v30 ; D282001F 047A2313 v_mac_f32_e32 v30, v19, v18 ; 3E3C2513 v_mov_b32_e32 v32, v20 ; 7E400314 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[30:33], 15, 0, 0, 0, 0, 0, 0, 0, v[30:33], s[44:51], s[32:35] ; F0900F00 010B1E1E s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v17, v33 ; 7E224F21 s_buffer_load_dword s30, s[12:15], 0xf ; C20F0D0F s_buffer_load_dword s31, s[12:15], 0x68 ; C20F8D68 v_mul_legacy_f32_e32 v17, s41, v17 ; 0E222229 v_exp_f32_e32 v17, v17 ; 7E224B11 v_mul_f32_e32 v17, s40, v17 ; 10222228 v_mul_f32_e32 v19, v30, v17 ; 1026231E v_mul_f32_e32 v18, v31, v17 ; 1024231F v_mul_f32_e32 v17, v32, v17 ; 10222320 v_mov_b32_e32 v20, s1 ; 7E280201 v_mov_b32_e32 v22, 0x3f7fff58 ; 7E2C02FF 3F7FFF58 v_cmp_lt_f32_e32 vcc, s9, v22 ; 7C022C09 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[32:33], vcc ; BEA0246A s_xor_b64 s[32:33], exec, s[32:33] ; 89A0207E s_cbranch_execz BB0_6 ; BF880000 s_buffer_load_dword s35, s[12:15], 0x3b ; C2118D3B s_buffer_load_dword s1, s[12:15], 0x3c ; C2008D3C s_buffer_load_dword s34, s[12:15], 0x3d ; C2110D3D s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_lt_f32_e64 s[36:37], 0, s35 ; D0020024 00004680 s_and_saveexec_b64 s[36:37], s[36:37] ; BEA42424 s_xor_b64 s[36:37], exec, s[36:37] ; 89A4247E s_cbranch_execz BB0_7 ; BF880000 s_buffer_load_dword s35, s[12:15], 0x36 ; C2118D36 s_buffer_load_dword s38, s[12:15], 0x38 ; C2130D38 s_buffer_load_dword s39, s[12:15], 0x39 ; C2138D39 s_buffer_load_dword s40, s[12:15], 0x3a ; C2140D3A s_buffer_load_dword s41, s[12:15], 0x30 ; C2148D30 s_buffer_load_dword s42, s[12:15], 0x31 ; C2150D31 s_buffer_load_dword s43, s[12:15], 0x32 ; C2158D32 s_buffer_load_dword s44, s[12:15], 0x34 ; C2160D34 s_buffer_load_dword s45, s[12:15], 0x35 ; C2168D35 v_mul_f32_e32 v22, v27, v27 ; 102C371B v_mac_f32_e32 v22, v28, v28 ; 3E2C391C v_mac_f32_e32 v22, v29, v29 ; 3E2C3B1D v_rsq_clamp_f32_e32 v22, v22 ; 7E2C5916 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v23, s35, v15 ; 082E1E23 v_mov_b32_e32 v30, s35 ; 7E3C0223 v_sub_f32_e32 v31, s41, v14 ; 083E1C29 v_sub_f32_e32 v32, s42, v12 ; 0840182A v_add_f32_e32 v30, s43, v30 ; 063C3C2B v_sub_f32_e32 v33, s43, v15 ; 08421E2B v_mad_f32 v34, 0.5, v30, -s40 ; D2820022 80A23CF0 v_add_f32_e32 v15, v15, v34 ; 061E450F v_mul_f32_e32 v27, v22, v27 ; 10363716 v_mul_f32_e32 v28, v22, v28 ; 10383916 v_mul_f32_e32 v22, v22, v29 ; 102C3B16 v_rcp_f32_e32 v29, v27 ; 7E3A551B v_rcp_f32_e32 v34, v28 ; 7E44551C v_rcp_f32_e32 v35, v22 ; 7E465516 v_sub_f32_e32 v36, s44, v14 ; 08481C2C v_mov_b32_e32 v37, s44 ; 7E4A022C v_add_f32_e32 v37, s41, v37 ; 064A4A29 v_mul_f32_e32 v31, v29, v31 ; 103E3F1D v_mul_f32_e32 v29, v29, v36 ; 103A491D v_mul_f32_e32 v32, v34, v32 ; 10404122 v_mul_f32_e32 v33, v35, v33 ; 10424323 v_mul_f32_e32 v23, v35, v23 ; 102E2F23 v_mad_f32 v35, 0.5, v37, -s38 ; D2820023 809A4AF0 v_add_f32_e32 v14, v14, v35 ; 061C470E v_sub_f32_e32 v35, s45, v12 ; 0846182D v_mov_b32_e32 v36, s45 ; 7E48022D v_mul_f32_e32 v34, v34, v35 ; 10444722 v_add_f32_e32 v35, s42, v36 ; 0646482A v_cmp_lt_f32_e32 vcc, 0, v27 ; 7C023680 v_cndmask_b32_e32 v29, v29, v31 ; 003A3F1D v_cmp_lt_f32_e32 vcc, 0, v28 ; 7C023880 v_cndmask_b32_e32 v31, v34, v32 ; 003E4122 v_cmp_lt_f32_e32 vcc, 0, v22 ; 7C022C80 v_cndmask_b32_e32 v23, v23, v33 ; 002E4317 v_min3_f32 v23, v29, v31, v23 ; D2A20017 045E3F1D v_mad_f32 v29, 0.5, v35, -s39 ; D282001D 809E46F0 v_add_f32_e32 v12, v12, v29 ; 06183B0C v_mac_f32_e32 v14, v23, v27 ; 3E1C3717 v_mac_f32_e32 v12, v23, v28 ; 3E183917 v_mac_f32_e32 v15, v23, v22 ; 3E1E2D17 v_mad_f32 v27, 0.5, -v37, v14 ; D282001B 443A4AF0 v_mad_f32 v28, 0.5, -v35, v12 ; D282001C 443246F0 v_mad_f32 v29, 0.5, -v30, v15 ; D282001D 443E3CF0 s_or_b64 exec, exec, s[36:37] ; 88FE247E v_sub_f32_e64 v12, 1.0, s0 ; D208000C 000000F2 v_log_f32_e32 v12, v12 ; 7E184F0C s_load_dwordx4 s[36:39], s[4:5], 0x4 ; C0920504 v_mul_legacy_f32_e32 v12, 0x3f400000, v12 ; 0E1818FF 3F400000 v_exp_f32_e32 v12, v12 ; 7E184B0C v_mul_f32_e32 v30, 0x40e00000, v12 ; 103C18FF 40E00000 v_cubeid_f32 v34, v27, v28, v29 ; D2880022 0476391B v_cubema_f32 v33, v27, v28, v29 ; D28E0021 0476391B s_load_dwordx8 s[40:47], s[6:7], 0x8 ; C0D40708 v_cubesc_f32 v32, v27, v28, v29 ; D28A0020 0476391B v_cubetc_f32 v31, v27, v28, v29 ; D28C001F 0476391B v_rcp_f32_e64 v12, |v33| ; D354010C 00000121 v_mov_b32_e32 v27, 0x3fc00000 ; 7E3602FF 3FC00000 v_mad_f32 v28, v12, v31, v27 ; D282001C 046E3F0C v_mac_f32_e32 v27, v12, v32 ; 3E36410C v_mov_b32_e32 v29, v34 ; 7E3A0322 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[27:30], 15, 0, 0, 0, 0, 0, 0, 0, v[27:30], s[40:47], s[36:39] ; F0900F00 012A1B1B s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v12, v30 ; 7E184F1E v_sub_f32_e64 v14, 1.0, s9 ; D208000E 000012F2 v_mul_legacy_f32_e32 v12, s34, v12 ; 0E181822 v_exp_f32_e32 v12, v12 ; 7E184B0C v_mul_f32_e32 v12, s1, v12 ; 10181801 v_mul_f32_e32 v15, v27, v12 ; 101E191B v_mul_f32_e32 v22, v28, v12 ; 102C191C v_mul_f32_e32 v12, v29, v12 ; 1018191D v_mul_f32_e32 v15, v15, v14 ; 101E1D0F v_mul_f32_e32 v22, v22, v14 ; 102C1D16 v_mul_f32_e32 v12, v12, v14 ; 10181D0C v_mac_f32_e32 v15, s9, v19 ; 3E1E2609 v_mac_f32_e32 v22, s9, v18 ; 3E2C2409 v_mac_f32_e32 v12, s9, v17 ; 3E182209 v_mov_b32_e32 v17, v12 ; 7E22030C v_mov_b32_e32 v18, v22 ; 7E240316 v_mov_b32_e32 v19, v15 ; 7E26030F s_or_b64 exec, exec, s[32:33] ; 88FE207E v_mad_f32 v27, -v20, s29, s29 ; D282001B 20743B14 v_mov_b32_e32 v12, s28 ; 7E18021C v_mul_f32_e32 v20, v27, v24 ; 1028311B v_mul_f32_e32 v15, v27, v25 ; 101E331B v_mul_f32_e32 v14, v27, v26 ; 101C351B v_mul_f32_e32 v22, s27, v16 ; 102C201B v_sub_f32_e64 v24, 1.0, s31 ; D2080018 00003EF2 v_mac_f32_e32 v24, s31, v21 ; 3E302A1F v_mul_f32_e32 v21, s20, v16 ; 102A2014 v_mul_f32_e32 v16, s17, v16 ; 10202011 s_buffer_load_dword s5, s[12:15], 0x10 ; C2028D10 s_buffer_load_dword s4, s[12:15], 0x11 ; C2020D11 s_buffer_load_dword s1, s[12:15], 0x12 ; C2008D12 s_buffer_load_dword s17, s[12:15], 0x16 ; C2088D16 s_buffer_load_dword s6, s[12:15], 0x48 ; C2030D48 s_buffer_load_dword s7, s[12:15], 0x49 ; C2038D49 s_buffer_load_dword s9, s[12:15], 0x4b ; C2048D4B v_mul_f32_e32 v23, s11, v8 ; 102E100B v_mac_f32_e32 v23, s10, v9 ; 3E2E120A v_mac_f32_e32 v23, s16, v7 ; 3E2E0E10 v_add_f32_e32 v23, s18, v23 ; 062E2E12 v_mul_f32_e32 v25, s21, v8 ; 10321015 v_mac_f32_e32 v25, s19, v9 ; 3E321213 v_mac_f32_e32 v25, s22, v7 ; 3E320E16 v_add_f32_e32 v25, s23, v25 ; 06323217 v_mul_f32_e32 v26, s25, v8 ; 10341019 v_mac_f32_e32 v26, s24, v9 ; 3E341218 v_mac_f32_e32 v26, s26, v7 ; 3E340E1A v_add_f32_e32 v26, s30, v26 ; 0634341E v_add_f32_e32 v2, v23, v2 ; 06040517 v_add_f32_e32 v3, v25, v3 ; 06060719 v_add_f32_e32 v25, v26, v5 ; 06320B1A v_mul_f32_e32 v5, s3, v9 ; 100A1203 v_mac_f32_e32 v5, s8, v8 ; 3E0A1008 v_mac_f32_e32 v5, s2, v7 ; 3E0A0E02 v_max_f32_e32 v23, 0, v5 ; 202E0A80 v_mul_f32_e32 v5, v24, v2 ; 100A0518 v_mul_f32_e32 v2, v24, v3 ; 10040718 v_mul_f32_e32 v3, v24, v25 ; 10063318 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v12, s17, v4 ; 3E180811 v_mul_f32_e32 v4, v24, v19 ; 10082718 v_mul_f32_e32 v18, v24, v18 ; 10242518 v_mul_f32_e32 v17, v24, v17 ; 10222318 v_sub_f32_e32 v19, 1.0, v27 ; 082636F2 v_add_f32_e32 v19, s0, v19 ; 06262600 v_add_f32_e64 v19, 0, v19 clamp ; D2060813 00022680 v_sub_f32_e32 v24, s3, v11 ; 08301603 v_sub_f32_e32 v25, s8, v10 ; 08321408 v_mul_f32_e32 v26, v24, v24 ; 10343118 v_mac_f32_e32 v26, v25, v25 ; 3E343319 v_sub_f32_e32 v27, s2, v13 ; 08361A02 v_mac_f32_e32 v26, v27, v27 ; 3E34371B v_rsq_clamp_f32_e32 v26, v26 ; 7E34591A v_mul_f32_e32 v24, v26, v24 ; 1030311A v_mul_f32_e32 v25, v26, v25 ; 1032331A v_mul_f32_e32 v26, v26, v27 ; 1034371A v_mul_f32_e32 v11, v11, v9 ; 1016130B v_mad_f32 v10, -v10, v8, -v11 ; D282000A A42E110A v_mad_f32 v10, -v13, v7, v10 ; D282000A 242A0F0D v_mul_f32_e32 v9, v24, v9 ; 10121318 v_mac_f32_e32 v9, v25, v8 ; 3E121119 v_mul_f32_e32 v8, s3, v24 ; 10103003 v_mac_f32_e32 v8, s8, v25 ; 3E103208 v_mac_f32_e32 v9, v26, v7 ; 3E120F1A v_mac_f32_e32 v8, s2, v26 ; 3E103402 v_max_f32_e32 v7, 0, v8 ; 200E1080 v_sub_f32_e32 v8, 1.0, v7 ; 08100EF2 v_mul_f32_e32 v11, v8, v8 ; 10161108 v_mul_f32_e32 v8, v8, v11 ; 10101708 v_mul_f32_e32 v8, v8, v11 ; 10101708 v_max_f32_e32 v10, 0, v10 ; 20141480 v_sub_f32_e32 v11, 1.0, v10 ; 081614F2 v_mul_f32_e32 v13, v11, v11 ; 101A170B v_mul_f32_e32 v24, v11, v13 ; 10301B0B v_mad_f32 v25, -v13, v24, 1.0 ; D2820019 23CA310D v_mul_f32_e32 v26, v6, v25 ; 10343306 v_sub_f32_e32 v27, 1.0, v6 ; 08360CF2 v_mac_f32_e32 v6, v8, v27 ; 3E0C3708 v_mul_f32_e32 v27, v1, v25 ; 10363301 v_sub_f32_e32 v28, 1.0, v1 ; 083802F2 v_mac_f32_e32 v1, v8, v28 ; 3E023908 v_mul_f32_e32 v25, v0, v25 ; 10323300 v_sub_f32_e32 v28, 1.0, v0 ; 083800F2 v_mac_f32_e32 v0, v8, v28 ; 3E003908 v_sub_f32_e64 v8, 1.0, s0 ; D2080008 000000F2 v_sub_f32_e32 v28, 1.0, v8 ; 083810F2 v_mov_b32_e32 v29, 0x3cf5c28f ; 7E3A02FF 3CF5C28F v_madmk_f32_e32 v28, v28, v29, 0x3f77ced9 ; 40383B1C 3F77CED9 v_add_f32_e32 v29, v7, v7 ; 063A0F07 v_mul_f32_e32 v7, v8, v7 ; 100E0F08 v_mad_f32 v7, v29, v7, 0.5 ; D2820007 03C20F1D v_mul_f32_e32 v13, v24, v13 ; 101A1B18 v_mac_f32_e32 v26, v19, v13 ; 3E341B13 v_mac_f32_e32 v27, v19, v13 ; 3E361B13 v_mac_f32_e32 v25, v19, v13 ; 3E321B13 v_mul_f32_e32 v8, v8, v8 ; 10101108 v_log_f32_e32 v19, v28 ; 7E264F1C v_mul_f32_e32 v8, s9, v8 ; 10101009 v_mul_f32_e32 v11, v8, v11 ; 10161708 v_mac_f32_e32 v11, 1.0, v10 ; 3E1614F2 v_rcp_f32_e32 v10, v19 ; 7E145513 v_sub_f32_e32 v19, 1.0, v23 ; 08262EF2 v_mul_f32_e32 v8, v8, v19 ; 10102708 v_mac_f32_e32 v8, 1.0, v23 ; 3E102EF2 v_max_f32_e32 v9, 0, v9 ; 20121280 v_log_f32_e32 v9, v9 ; 7E124F09 v_madak_f32_e32 v8, v8, v11, 0x38d1b717 ; 42101708 38D1B717 v_mul_f32_e32 v10, 0x41200000, v10 ; 101414FF 41200000 v_mul_f32_e32 v11, v10, v10 ; 1016150A v_mul_legacy_f32_e32 v9, v11, v9 ; 0E12130B v_rcp_f32_e32 v8, v8 ; 7E105508 v_mad_f32 v10, v10, v10, 1.0 ; D282000A 03CA150A v_mul_f32_e32 v10, s7, v10 ; 10141407 v_exp_f32_e32 v9, v9 ; 7E124B09 v_mul_f32_e32 v9, v10, v9 ; 1012130A v_mul_f32_e32 v8, v9, v8 ; 10101109 v_mul_f32_e32 v9, v19, v19 ; 10122713 v_mul_f32_e32 v10, v19, v9 ; 10141313 v_mul_f32_e32 v9, v10, v9 ; 1012130A v_add_f32_e32 v7, -1.0, v7 ; 060E0EF3 v_mad_f32 v9, v7, v9, 1.0 ; D2820009 03CA1307 v_mad_f32 v7, v7, v13, 1.0 ; D2820007 03CA1B07 v_mul_f32_e32 v7, v7, v9 ; 100E1307 v_mul_f32_e32 v8, v23, v8 ; 10101117 v_mul_f32_e32 v8, s6, v8 ; 10101006 v_mul_f32_e32 v7, v23, v7 ; 100E0F17 v_mac_f32_e32 v5, v7, v22 ; 3E0A2D07 v_mul_f32_e32 v5, v5, v20 ; 100A2905 v_max_f32_e32 v8, 0, v8 ; 20101080 v_mul_f32_e32 v9, v22, v8 ; 10121116 v_mac_f32_e32 v5, v6, v9 ; 3E0A1306 v_mac_f32_e32 v2, v7, v21 ; 3E042B07 v_mac_f32_e32 v3, v7, v16 ; 3E062107 v_mul_f32_e32 v6, v21, v8 ; 100C1115 v_mul_f32_e32 v7, v16, v8 ; 100E1110 v_mul_f32_e32 v2, v2, v15 ; 10041F02 v_mul_f32_e32 v3, v3, v14 ; 10061D03 v_mac_f32_e32 v2, v1, v6 ; 3E040D01 v_mac_f32_e32 v3, v0, v7 ; 3E060F00 v_mac_f32_e32 v5, v26, v4 ; 3E0A091A v_mac_f32_e32 v2, v27, v18 ; 3E04251B v_mac_f32_e32 v3, v25, v17 ; 3E062319 v_add_f32_e64 v0, 0, v12 clamp ; D2060800 00021880 v_sub_f32_e32 v1, 1.0, v0 ; 080200F2 v_mul_f32_e32 v4, s5, v1 ; 10080205 v_mac_f32_e32 v4, v5, v0 ; 3E080105 v_mul_f32_e32 v5, s4, v1 ; 100A0204 v_mac_f32_e32 v5, v2, v0 ; 3E0A0102 v_mul_f32_e32 v1, s1, v1 ; 10020201 v_mac_f32_e32 v1, v3, v0 ; 3E020103 v_cvt_pkrtz_f16_f32_e32 v0, v4, v5 ; 5E000B04 v_cvt_pkrtz_f16_f32_e64 v1, v1, 1.0 ; D25E0001 0001E501 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 64 VGPRS: 40 Code Size: 2380 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL OUT[5], GENERIC[4] DCL OUT[6], GENERIC[5] DCL OUT[7], GENERIC[6] DCL CONST[0..19] DCL TEMP[0..9], LOCAL IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MUL TEMP[0], CONST[5], IN[0].xxxx 1: MAD TEMP[0], CONST[6], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[7], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0].xyz, CONST[8], IN[0].wwww, TEMP[0] 4: MUL TEMP[1], CONST[16], IN[0].xxxx 5: MAD TEMP[1], CONST[17], IN[0].yyyy, TEMP[1] 6: MAD TEMP[1], CONST[18], IN[0].zzzz, TEMP[1] 7: MAD TEMP[1], CONST[19], IN[0].wwww, TEMP[1] 8: MAD TEMP[2].xy, IN[2].xyyy, CONST[13].xyyy, CONST[13].zwww 9: FSEQ TEMP[3].x, CONST[15].xxxx, IMM[0].xxxx 10: UIF TEMP[3].xxxx :0 11: MOV TEMP[3].xy, IN[2].xyxx 12: ELSE :0 13: MOV TEMP[3].xy, IN[3].xyxx 14: ENDIF 15: MAD TEMP[3].xy, TEMP[3].xyyy, CONST[14].xyyy, CONST[14].zwww 16: MOV TEMP[2].zw, TEMP[3].yyxy 17: MOV TEMP[3].x, CONST[9].xxxx 18: MOV TEMP[3].y, CONST[10].xxxx 19: MOV TEMP[3].z, CONST[11].xxxx 20: MOV TEMP[4].x, CONST[9].yyyy 21: MOV TEMP[4].y, CONST[10].yyyy 22: MOV TEMP[4].z, CONST[11].yyyy 23: MOV TEMP[5].x, CONST[9].zzzz 24: MOV TEMP[5].y, CONST[10].zzzz 25: MOV TEMP[5].z, CONST[11].zzzz 26: MUL TEMP[3].xyz, TEMP[3].xyzz, IN[1].xxxx 27: MAD TEMP[3].xyz, TEMP[4].xyzz, IN[1].yyyy, TEMP[3].xyzz 28: MAD TEMP[3].xyz, TEMP[5].xyzz, IN[1].zzzz, TEMP[3].xyzz 29: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz 30: RSQ TEMP[4].x, TEMP[4].xxxx 31: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx 32: MUL TEMP[4].xyz, CONST[5].xyzz, IN[4].xxxx 33: MAD TEMP[4].xyz, CONST[6].xyzz, IN[4].yyyy, TEMP[4].xyzz 34: MAD TEMP[4].xyz, CONST[7].xyzz, IN[4].zzzz, TEMP[4].xyzz 35: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz 36: RSQ TEMP[5].x, TEMP[5].xxxx 37: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx 38: MUL TEMP[5].xyz, TEMP[3].zxyy, TEMP[4].yzxx 39: MAD TEMP[5].xyz, TEMP[3].yzxx, TEMP[4].zxyy, -TEMP[5].xyzz 40: MUL TEMP[5].xyz, TEMP[5].xyzz, IN[4].wwww 41: MOV TEMP[4].xyz, TEMP[4].xyzx 42: MOV TEMP[5].xyz, TEMP[5].xyzx 43: MOV TEMP[6].xyz, TEMP[3].xyzx 44: MUL TEMP[7], TEMP[3].xyzz, TEMP[3].yzzx 45: DP4 TEMP[8].x, CONST[1], TEMP[7] 46: DP4 TEMP[9].x, CONST[2], TEMP[7] 47: MOV TEMP[8].y, TEMP[9].xxxx 48: DP4 TEMP[7].x, CONST[3], TEMP[7] 49: MOV TEMP[8].z, TEMP[7].xxxx 50: MUL TEMP[7].x, TEMP[3].yyyy, TEMP[3].yyyy 51: MAD TEMP[3].x, TEMP[3].xxxx, TEMP[3].xxxx, -TEMP[7].xxxx 52: MAD TEMP[3].xyz, CONST[4].xyzz, TEMP[3].xxxx, TEMP[8].xyzz 53: ADD TEMP[7].xyz, TEMP[0].xyzz, -CONST[0].xyzz 54: MOV TEMP[7].yzw, TEMP[7].yxyz 55: MOV TEMP[7].x, TEMP[1].zzzz 56: MOV TEMP[0].xyz, TEMP[0].xyzx 57: MOV OUT[7], TEMP[0] 58: MOV OUT[1], TEMP[2] 59: MOV OUT[3], TEMP[5] 60: MOV OUT[2], TEMP[4] 61: MOV OUT[4], TEMP[6] 62: MOV OUT[5], TEMP[3] 63: MOV OUT[0], TEMP[1] 64: MOV OUT[6], TEMP[7] 65: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236) %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240) %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256) %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260) %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264) %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 268) %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272) %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276) %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280) %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284) %72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288) %73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292) %74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296) %75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300) %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304) %77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308) %78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312) %79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316) %80 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %81 = load <16 x i8>, <16 x i8> addrspace(2)* %80, align 16, !tbaa !0 %82 = add i32 %5, %7 %83 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %81, i32 0, i32 %82) %84 = extractelement <4 x float> %83, i32 0 %85 = extractelement <4 x float> %83, i32 1 %86 = extractelement <4 x float> %83, i32 2 %87 = extractelement <4 x float> %83, i32 3 %88 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %89 = load <16 x i8>, <16 x i8> addrspace(2)* %88, align 16, !tbaa !0 %90 = add i32 %5, %7 %91 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %89, i32 0, i32 %90) %92 = extractelement <4 x float> %91, i32 0 %93 = extractelement <4 x float> %91, i32 1 %94 = extractelement <4 x float> %91, i32 2 %95 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %96 = load <16 x i8>, <16 x i8> addrspace(2)* %95, align 16, !tbaa !0 %97 = add i32 %5, %7 %98 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %96, i32 0, i32 %97) %99 = extractelement <4 x float> %98, i32 0 %100 = extractelement <4 x float> %98, i32 1 %101 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %102 = load <16 x i8>, <16 x i8> addrspace(2)* %101, align 16, !tbaa !0 %103 = add i32 %5, %7 %104 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %102, i32 0, i32 %103) %105 = extractelement <4 x float> %104, i32 0 %106 = extractelement <4 x float> %104, i32 1 %107 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4 %108 = load <16 x i8>, <16 x i8> addrspace(2)* %107, align 16, !tbaa !0 %109 = add i32 %5, %7 %110 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %108, i32 0, i32 %109) %111 = extractelement <4 x float> %110, i32 0 %112 = extractelement <4 x float> %110, i32 1 %113 = extractelement <4 x float> %110, i32 2 %114 = extractelement <4 x float> %110, i32 3 %115 = fmul float %31, %84 %116 = fmul float %32, %84 %117 = fmul float %33, %84 %118 = fmul float %34, %84 %119 = fmul float %35, %85 %120 = fadd float %119, %115 %121 = fmul float %36, %85 %122 = fadd float %121, %116 %123 = fmul float %37, %85 %124 = fadd float %123, %117 %125 = fmul float %38, %85 %126 = fadd float %125, %118 %127 = fmul float %39, %86 %128 = fadd float %127, %120 %129 = fmul float %40, %86 %130 = fadd float %129, %122 %131 = fmul float %41, %86 %132 = fadd float %131, %124 %133 = fmul float %42, %86 %134 = fadd float %133, %126 %135 = fmul float %43, %87 %136 = fadd float %135, %128 %137 = fmul float %44, %87 %138 = fadd float %137, %130 %139 = fmul float %45, %87 %140 = fadd float %139, %132 %141 = fmul float %64, %84 %142 = fmul float %65, %84 %143 = fmul float %66, %84 %144 = fmul float %67, %84 %145 = fmul float %68, %85 %146 = fadd float %145, %141 %147 = fmul float %69, %85 %148 = fadd float %147, %142 %149 = fmul float %70, %85 %150 = fadd float %149, %143 %151 = fmul float %71, %85 %152 = fadd float %151, %144 %153 = fmul float %72, %86 %154 = fadd float %153, %146 %155 = fmul float %73, %86 %156 = fadd float %155, %148 %157 = fmul float %74, %86 %158 = fadd float %157, %150 %159 = fmul float %75, %86 %160 = fadd float %159, %152 %161 = fmul float %76, %87 %162 = fadd float %161, %154 %163 = fmul float %77, %87 %164 = fadd float %163, %156 %165 = fmul float %78, %87 %166 = fadd float %165, %158 %167 = fmul float %79, %87 %168 = fadd float %167, %160 %169 = fmul float %99, %55 %170 = fadd float %169, %57 %171 = fmul float %100, %56 %172 = fadd float %171, %58 %173 = fcmp oeq float %63, 0.000000e+00 %. = select i1 %173, float %99, float %105 %.40 = select i1 %173, float %100, float %106 %174 = fmul float %., %59 %175 = fadd float %174, %61 %176 = fmul float %.40, %60 %177 = fadd float %176, %62 %178 = fmul float %46, %92 %179 = fmul float %49, %92 %180 = fmul float %52, %92 %181 = fmul float %47, %93 %182 = fadd float %181, %178 %183 = fmul float %50, %93 %184 = fadd float %183, %179 %185 = fmul float %53, %93 %186 = fadd float %185, %180 %187 = fmul float %48, %94 %188 = fadd float %187, %182 %189 = fmul float %51, %94 %190 = fadd float %189, %184 %191 = fmul float %54, %94 %192 = fadd float %191, %186 %193 = fmul float %188, %188 %194 = fmul float %190, %190 %195 = fadd float %194, %193 %196 = fmul float %192, %192 %197 = fadd float %195, %196 %198 = call float @llvm.AMDGPU.rsq.clamped.f32(float %197) %199 = fmul float %188, %198 %200 = fmul float %190, %198 %201 = fmul float %192, %198 %202 = fmul float %31, %111 %203 = fmul float %32, %111 %204 = fmul float %33, %111 %205 = fmul float %35, %112 %206 = fadd float %205, %202 %207 = fmul float %36, %112 %208 = fadd float %207, %203 %209 = fmul float %37, %112 %210 = fadd float %209, %204 %211 = fmul float %39, %113 %212 = fadd float %211, %206 %213 = fmul float %40, %113 %214 = fadd float %213, %208 %215 = fmul float %41, %113 %216 = fadd float %215, %210 %217 = fmul float %212, %212 %218 = fmul float %214, %214 %219 = fadd float %218, %217 %220 = fmul float %216, %216 %221 = fadd float %219, %220 %222 = call float @llvm.AMDGPU.rsq.clamped.f32(float %221) %223 = fmul float %212, %222 %224 = fmul float %214, %222 %225 = fmul float %216, %222 %226 = fmul float %201, %224 %227 = fmul float %199, %225 %228 = fmul float %200, %223 %229 = fmul float %200, %225 %230 = fsub float %229, %226 %231 = fmul float %201, %223 %232 = fsub float %231, %227 %233 = fmul float %199, %224 %234 = fsub float %233, %228 %235 = fmul float %230, %114 %236 = fmul float %232, %114 %237 = fmul float %234, %114 %238 = fmul float %199, %200 %239 = fmul float %200, %201 %240 = fmul float %201, %201 %241 = fmul float %201, %199 %242 = fmul float %16, %238 %243 = fmul float %17, %239 %244 = fadd float %242, %243 %245 = fmul float %18, %240 %246 = fadd float %244, %245 %247 = fmul float %19, %241 %248 = fadd float %246, %247 %249 = fmul float %20, %238 %250 = fmul float %21, %239 %251 = fadd float %249, %250 %252 = fmul float %22, %240 %253 = fadd float %251, %252 %254 = fmul float %23, %241 %255 = fadd float %253, %254 %256 = fmul float %24, %238 %257 = fmul float %25, %239 %258 = fadd float %256, %257 %259 = fmul float %26, %240 %260 = fadd float %258, %259 %261 = fmul float %27, %241 %262 = fadd float %260, %261 %263 = fmul float %200, %200 %264 = fmul float %199, %199 %265 = fsub float %264, %263 %266 = fmul float %28, %265 %267 = fadd float %266, %248 %268 = fmul float %29, %265 %269 = fadd float %268, %255 %270 = fmul float %30, %265 %271 = fadd float %270, %262 %272 = fsub float %136, %13 %273 = fsub float %138, %14 %274 = fsub float %140, %15 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %170, float %172, float %175, float %177) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %223, float %224, float %225, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %235, float %236, float %237, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %199, float %200, float %201, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %267, float %269, float %271, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %166, float %272, float %273, float %274) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %136, float %138, float %140, float %134) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %162, float %164, float %166, float %168) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0 ; 7E020280 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[28:31], s[2:3], 0x0 ; C08E0300 s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 s_load_dwordx4 s[12:15], s[8:9], 0x8 ; C0860908 s_load_dwordx4 s[16:19], s[8:9], 0xc ; C088090C s_load_dwordx4 s[8:11], s[8:9], 0x10 ; C0840910 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s27, s[28:31], 0x20 ; C20D9D20 buffer_load_format_xyzw v[2:5], v0, s[0:3], 0 idxen ; E00C2000 80000200 buffer_load_format_xyzw v[6:9], v0, s[4:7], 0 idxen ; E00C2000 80010600 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[9:12], v0, s[12:15], 0 idxen ; E00C2000 80030900 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[11:14], v0, s[16:19], 0 idxen ; E00C2000 80040B00 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[13:16], v0, s[8:11], 0 idxen ; E00C2000 80020D00 s_buffer_load_dword s32, s[28:31], 0x21 ; C2101D21 s_buffer_load_dword s33, s[28:31], 0x22 ; C2109D22 s_buffer_load_dword s34, s[28:31], 0x24 ; C2111D24 s_buffer_load_dword s35, s[28:31], 0x25 ; C2119D25 s_buffer_load_dword s1, s[28:31], 0x10 ; C2009D10 s_buffer_load_dword s2, s[28:31], 0x11 ; C2011D11 s_buffer_load_dword s0, s[28:31], 0x12 ; C2001D12 s_buffer_load_dword s18, s[28:31], 0x14 ; C2091D14 s_buffer_load_dword s17, s[28:31], 0x15 ; C2089D15 s_buffer_load_dword s36, s[28:31], 0x26 ; C2121D26 s_buffer_load_dword s37, s[28:31], 0x28 ; C2129D28 s_buffer_load_dword s38, s[28:31], 0x29 ; C2131D29 s_buffer_load_dword s39, s[28:31], 0x2a ; C2139D2A s_buffer_load_dword s40, s[28:31], 0x2c ; C2141D2C s_buffer_load_dword s20, s[28:31], 0x16 ; C20A1D16 s_buffer_load_dword s41, s[28:31], 0x17 ; C2149D17 s_buffer_load_dword s26, s[28:31], 0x18 ; C20D1D18 s_buffer_load_dword s25, s[28:31], 0x19 ; C20C9D19 s_buffer_load_dword s21, s[28:31], 0x1a ; C20A9D1A s_buffer_load_dword s42, s[28:31], 0x1b ; C2151D1B s_buffer_load_dword s23, s[28:31], 0x1c ; C20B9D1C s_buffer_load_dword s24, s[28:31], 0x1d ; C20C1D1D s_buffer_load_dword s22, s[28:31], 0x1e ; C20B1D1E s_buffer_load_dword s43, s[28:31], 0x1f ; C2159D1F s_buffer_load_dword s44, s[28:31], 0x2d ; C2161D2D s_buffer_load_dword s45, s[28:31], 0x2e ; C2169D2E s_buffer_load_dword s46, s[28:31], 0x34 ; C2171D34 s_buffer_load_dword s47, s[28:31], 0x35 ; C2179D35 s_buffer_load_dword s3, s[28:31], 0x36 ; C2019D36 s_buffer_load_dword s4, s[28:31], 0x3c ; C2021D3C s_buffer_load_dword s48, s[28:31], 0x40 ; C2181D40 s_buffer_load_dword s49, s[28:31], 0x41 ; C2189D41 s_buffer_load_dword s50, s[28:31], 0x42 ; C2191D42 s_buffer_load_dword s51, s[28:31], 0x43 ; C2199D43 s_buffer_load_dword s6, s[28:31], 0x37 ; C2031D37 s_buffer_load_dword s52, s[28:31], 0x38 ; C21A1D38 s_buffer_load_dword s53, s[28:31], 0x39 ; C21A9D39 s_buffer_load_dword s8, s[28:31], 0x3a ; C2041D3A s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v0, s3 ; 7E000203 s_buffer_load_dword s10, s[28:31], 0x3b ; C2051D3B v_cmp_eq_f32_e64 vcc, 0, s4 ; D004006A 00000880 s_buffer_load_dword s3, s[28:31], 0x0 ; C2019D00 s_buffer_load_dword s4, s[28:31], 0x1 ; C2021D01 s_buffer_load_dword s5, s[28:31], 0x2 ; C2029D02 s_buffer_load_dword s9, s[28:31], 0x4 ; C2049D04 v_mov_b32_e32 v17, s6 ; 7E220206 s_buffer_load_dword s14, s[28:31], 0x5 ; C2071D05 s_buffer_load_dword s7, s[28:31], 0x6 ; C2039D06 s_buffer_load_dword s6, s[28:31], 0x7 ; C2031D07 v_mov_b32_e32 v18, s8 ; 7E240208 s_buffer_load_dword s12, s[28:31], 0x8 ; C2061D08 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v19, s10 ; 7E26020A s_buffer_load_dword s16, s[28:31], 0x9 ; C2081D09 s_buffer_load_dword s10, s[28:31], 0xa ; C2051D0A s_buffer_load_dword s8, s[28:31], 0xb ; C2041D0B s_buffer_load_dword s15, s[28:31], 0xc ; C2079D0C s_buffer_load_dword s19, s[28:31], 0xd ; C2099D0D s_buffer_load_dword s13, s[28:31], 0xe ; C2069D0E s_buffer_load_dword s11, s[28:31], 0xf ; C2059D0F s_buffer_load_dword s54, s[28:31], 0x44 ; C21B1D44 s_buffer_load_dword s55, s[28:31], 0x45 ; C21B9D45 s_buffer_load_dword s56, s[28:31], 0x46 ; C21C1D46 s_buffer_load_dword s57, s[28:31], 0x47 ; C21C9D47 s_buffer_load_dword s58, s[28:31], 0x48 ; C21D1D48 s_buffer_load_dword s59, s[28:31], 0x49 ; C21D9D49 s_buffer_load_dword s60, s[28:31], 0x4a ; C21E1D4A s_buffer_load_dword s61, s[28:31], 0x4b ; C21E9D4B s_buffer_load_dword s62, s[28:31], 0x4c ; C21F1D4C s_buffer_load_dword s63, s[28:31], 0x4d ; C21F9D4D s_buffer_load_dword s64, s[28:31], 0x4e ; C2201D4E s_buffer_load_dword s28, s[28:31], 0x4f ; C20E1D4F v_mul_f32_e32 v20, s41, v2 ; 10280429 v_mac_f32_e32 v20, s42, v3 ; 3E28062A v_mac_f32_e32 v20, s43, v4 ; 3E28082B v_mac_f32_e32 v0, s46, v9 ; 3E00122E v_mac_f32_e32 v17, s47, v10 ; 3E22142F v_mul_f32_e32 v21, s48, v2 ; 102A0430 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v21, s54, v3 ; 3E2A0636 v_mac_f32_e32 v21, s58, v4 ; 3E2A083A v_mac_f32_e32 v21, s62, v5 ; 3E2A0A3E v_mul_f32_e32 v22, s49, v2 ; 102C0431 v_mac_f32_e32 v22, s55, v3 ; 3E2C0637 v_mac_f32_e32 v22, s59, v4 ; 3E2C083B v_mac_f32_e32 v22, s63, v5 ; 3E2C0A3F v_mul_f32_e32 v23, s50, v2 ; 102E0432 v_mac_f32_e32 v23, s56, v3 ; 3E2E0638 v_mac_f32_e32 v23, s60, v4 ; 3E2E083C v_mac_f32_e32 v23, s64, v5 ; 3E2E0A40 v_mul_f32_e32 v24, s51, v2 ; 10300433 v_mac_f32_e32 v24, s57, v3 ; 3E300639 v_mac_f32_e32 v24, s61, v4 ; 3E30083D v_mac_f32_e32 v24, s28, v5 ; 3E300A1C v_cndmask_b32_e32 v9, v11, v9 ; 0012130B v_cndmask_b32_e32 v10, v12, v10 ; 0014150C v_mul_f32_e32 v11, s34, v6 ; 10160C22 v_mac_f32_e32 v11, s35, v7 ; 3E160E23 v_mul_f32_e32 v12, s37, v6 ; 10180C25 v_mac_f32_e32 v12, s38, v7 ; 3E180E26 v_mul_f32_e32 v6, s40, v6 ; 100C0C28 v_mac_f32_e32 v6, s44, v7 ; 3E0C0E2C v_mac_f32_e32 v11, s36, v8 ; 3E161024 v_mac_f32_e32 v12, s39, v8 ; 3E181027 v_mac_f32_e32 v6, s45, v8 ; 3E0C102D v_mul_f32_e32 v7, s18, v2 ; 100E0412 v_mac_f32_e32 v7, s26, v3 ; 3E0E061A v_mac_f32_e32 v7, s23, v4 ; 3E0E0817 v_mac_f32_e32 v7, s27, v5 ; 3E0E0A1B v_mul_f32_e32 v8, s17, v2 ; 10100411 v_mac_f32_e32 v8, s25, v3 ; 3E100619 v_mac_f32_e32 v8, s24, v4 ; 3E100818 v_mac_f32_e32 v8, s32, v5 ; 3E100A20 v_mul_f32_e32 v2, s20, v2 ; 10040414 v_mac_f32_e32 v2, s21, v3 ; 3E040615 v_mac_f32_e32 v2, s22, v4 ; 3E040816 v_mac_f32_e32 v2, s33, v5 ; 3E040A21 v_mac_f32_e32 v18, s52, v9 ; 3E241234 v_mac_f32_e32 v19, s53, v10 ; 3E261435 exp 15, 32, 0, 0, 0, v0, v17, v18, v19 ; F800020F 13121100 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s18, v13 ; 10001A12 v_mac_f32_e32 v0, s26, v14 ; 3E001C1A v_mul_f32_e32 v3, s17, v13 ; 10061A11 v_mac_f32_e32 v3, s25, v14 ; 3E061C19 v_mul_f32_e32 v4, s20, v13 ; 10081A14 v_mac_f32_e32 v4, s21, v14 ; 3E081C15 v_mac_f32_e32 v0, s23, v15 ; 3E001E17 v_mac_f32_e32 v3, s24, v15 ; 3E061E18 v_mac_f32_e32 v4, s22, v15 ; 3E081E16 v_mul_f32_e32 v5, v11, v11 ; 100A170B v_mac_f32_e32 v5, v12, v12 ; 3E0A190C v_mac_f32_e32 v5, v6, v6 ; 3E0A0D06 v_rsq_clamp_f32_e32 v5, v5 ; 7E0A5905 v_mul_f32_e32 v9, v0, v0 ; 10120100 v_mac_f32_e32 v9, v3, v3 ; 3E120703 v_mac_f32_e32 v9, v4, v4 ; 3E120904 v_rsq_clamp_f32_e32 v9, v9 ; 7E125909 v_mul_f32_e32 v10, v5, v11 ; 10141705 v_mul_f32_e32 v11, v5, v12 ; 10161905 v_mul_f32_e32 v5, v5, v6 ; 100A0D05 v_mul_f32_e32 v0, v9, v0 ; 10000109 v_mul_f32_e32 v3, v9, v3 ; 10060709 v_mul_f32_e32 v4, v9, v4 ; 10080909 v_mul_f32_e32 v6, v3, v5 ; 100C0B03 v_mad_f32 v6, v11, v4, -v6 ; D2820006 841A090B v_mul_f32_e32 v9, v4, v10 ; 10121504 v_mad_f32 v9, v5, v0, -v9 ; D2820009 84260105 v_mul_f32_e32 v12, v0, v11 ; 10181700 v_mad_f32 v12, v10, v3, -v12 ; D282000C 8432070A v_mul_f32_e32 v6, v16, v6 ; 100C0D10 v_mul_f32_e32 v9, v16, v9 ; 10121310 v_mul_f32_e32 v12, v16, v12 ; 10181910 exp 15, 33, 0, 0, 0, v0, v3, v4, v1 ; F800021F 01040300 exp 15, 34, 0, 0, 0, v6, v9, v12, v1 ; F800022F 010C0906 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, v5, v11 ; 10001705 v_mul_f32_e32 v3, s14, v0 ; 1006000E v_mul_f32_e32 v4, s16, v0 ; 10080010 v_mul_f32_e32 v0, s19, v0 ; 10000013 v_mul_f32_e32 v6, v11, v10 ; 100C150B v_mac_f32_e32 v3, s9, v6 ; 3E060C09 v_mac_f32_e32 v4, s12, v6 ; 3E080C0C v_mac_f32_e32 v0, s15, v6 ; 3E000C0F v_mul_f32_e32 v6, v5, v5 ; 100C0B05 v_mac_f32_e32 v3, s7, v6 ; 3E060C07 v_mac_f32_e32 v4, s10, v6 ; 3E080C0A v_mac_f32_e32 v0, s13, v6 ; 3E000C0D v_mul_f32_e32 v6, v10, v5 ; 100C0B0A v_mac_f32_e32 v3, s6, v6 ; 3E060C06 v_mac_f32_e32 v4, s8, v6 ; 3E080C08 v_mac_f32_e32 v0, s11, v6 ; 3E000C0B v_mul_f32_e32 v6, v11, v11 ; 100C170B v_mad_f32 v6, v10, v10, -v6 ; D2820006 841A150A v_mac_f32_e32 v3, s1, v6 ; 3E060C01 v_mac_f32_e32 v4, s2, v6 ; 3E080C02 v_mac_f32_e32 v0, s0, v6 ; 3E000C00 v_subrev_f32_e32 v6, s3, v7 ; 0A0C0E03 v_subrev_f32_e32 v9, s4, v8 ; 0A121004 v_subrev_f32_e32 v12, s5, v2 ; 0A180405 exp 15, 35, 0, 0, 0, v10, v11, v5, v1 ; F800023F 01050B0A exp 15, 36, 0, 0, 0, v3, v4, v0, v1 ; F800024F 01000403 exp 15, 37, 0, 0, 0, v23, v6, v9, v12 ; F800025F 0C090617 exp 15, 38, 0, 0, 0, v7, v8, v2, v20 ; F800026F 14020807 exp 15, 12, 0, 1, 0, v21, v22, v23, v24 ; F80008CF 18171615 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 72 VGPRS: 28 Code Size: 892 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL IN[4], GENERIC[4], PERSPECTIVE DCL IN[5], GENERIC[5], PERSPECTIVE DCL IN[6], GENERIC[6], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SVIEW[0], CUBE, FLOAT DCL SVIEW[1], CUBE, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL SVIEW[4], 2D, FLOAT DCL CONST[0..5] DCL CONST[8..19] DCL CONST[22..24] DCL CONST[26] DCL TEMP[0..17], LOCAL IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 0.0000} IMM[1] FLT32 { 0.5000, 0.7500, 7.0000, 1.0000} IMM[2] FLT32 { 10.0000, 0.9680, 0.0300, 0.0001} 0: MOV TEMP[0].x, IN[1].xxxx 1: MOV TEMP[0].y, IN[2].xxxx 2: MOV TEMP[0].z, IN[3].xxxx 3: MOV TEMP[1].x, IN[1].yyyy 4: MOV TEMP[1].y, IN[2].yyyy 5: MOV TEMP[1].z, IN[3].yyyy 6: MOV TEMP[2].x, IN[1].zzzz 7: MOV TEMP[2].y, IN[2].zzzz 8: MOV TEMP[2].z, IN[3].zzzz 9: MOV TEMP[3].xy, IN[0].xyyy 10: TEX TEMP[3].yw, TEMP[3], SAMP[3], 2D 11: MAD TEMP[3].xy, TEMP[3].wyyy, IMM[0].xxxx, IMM[0].yyyy 12: MUL TEMP[3].xy, TEMP[3].xyyy, CONST[22].xxxx 13: DP2 TEMP[4].x, TEMP[3].xyyy, TEMP[3].xyyy 14: MOV_SAT TEMP[4].x, TEMP[4].xxxx 15: ADD TEMP[4].x, IMM[0].zzzz, -TEMP[4].xxxx 16: SQRT TEMP[4].x, TEMP[4].xxxx 17: MOV TEMP[3].z, TEMP[4].xxxx 18: DP3 TEMP[0].x, TEMP[3].xyzz, TEMP[0].xyzz 19: DP3 TEMP[1].x, TEMP[3].xyzz, TEMP[1].xyzz 20: MOV TEMP[0].y, TEMP[1].xxxx 21: DP3 TEMP[1].x, TEMP[3].xyzz, TEMP[2].xyzz 22: MOV TEMP[0].z, TEMP[1].xxxx 23: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[0].xyzz 24: RSQ TEMP[1].x, TEMP[1].xxxx 25: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xxxx 26: DP3 TEMP[1].x, IN[5].yzww, IN[5].yzww 27: RSQ TEMP[1].x, TEMP[1].xxxx 28: MUL TEMP[1].xyz, IN[5].yzww, TEMP[1].xxxx 29: MOV TEMP[2].xy, IN[0].xyyy 30: TEX TEMP[2].xyz, TEMP[2], SAMP[2], 2D 31: MUL TEMP[2].xyz, CONST[19].xyzz, TEMP[2].xyzz 32: LRP TEMP[3].xyz, CONST[23].xxxx, TEMP[2].xyzz, CONST[16].xyzz 33: MUL TEMP[4].x, CONST[23].xxxx, CONST[16].wwww 34: ADD TEMP[4].x, CONST[16].wwww, -TEMP[4].xxxx 35: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[4].xxxx 36: MOV TEMP[5].xy, IN[0].xyyy 37: TEX TEMP[5].y, TEMP[5], SAMP[4], 2D 38: ADD TEMP[6].x, IMM[0].zzzz, -CONST[26].xxxx 39: MAD TEMP[5].x, TEMP[5].yyyy, CONST[26].xxxx, TEMP[6].xxxx 40: DP3 TEMP[6].x, TEMP[0].xyzz, CONST[0].xyzz 41: MAX TEMP[6].x, IMM[0].wwww, TEMP[6].xxxx 42: MOV TEMP[7].xyz, IMM[0].wwww 43: MOV TEMP[8].w, IMM[0].zzzz 44: MOV TEMP[8].xyz, TEMP[0].xyzx 45: DP4 TEMP[9].x, CONST[1], TEMP[8] 46: DP4 TEMP[10].x, CONST[2], TEMP[8] 47: MOV TEMP[9].y, TEMP[10].xxxx 48: DP4 TEMP[8].x, CONST[3], TEMP[8] 49: MOV TEMP[9].z, TEMP[8].xxxx 50: ADD TEMP[8].xyz, IN[4].xyzz, TEMP[9].xyzz 51: MUL TEMP[8].xyz, TEMP[8].xyzz, TEMP[5].xxxx 52: DP3 TEMP[9].x, TEMP[0].xyzz, TEMP[1].xyzz 53: MUL TEMP[9].xyz, TEMP[9].xxxx, TEMP[0].xyzz 54: MUL TEMP[9].xyz, IMM[0].xxxx, TEMP[9].xyzz 55: ADD TEMP[9].xyz, TEMP[1].xyzz, -TEMP[9].xyzz 56: MOV TEMP[10].xyz, TEMP[9].xyzx 57: FSLT TEMP[11].x, IMM[0].wwww, CONST[10].wwww 58: UIF TEMP[11].xxxx :0 59: DP3 TEMP[11].x, TEMP[9].xyzz, TEMP[9].xyzz 60: RSQ TEMP[11].x, TEMP[11].xxxx 61: MUL TEMP[11].xyz, TEMP[9].xyzz, TEMP[11].xxxx 62: MOV TEMP[12].xyz, -IN[6].xyzx 63: ADD TEMP[13].xyz, CONST[8].xyzz, TEMP[12].xyzz 64: RCP TEMP[14].x, TEMP[11].xxxx 65: RCP TEMP[14].y, TEMP[11].yyyy 66: RCP TEMP[14].z, TEMP[11].zzzz 67: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[14].xyzz 68: ADD TEMP[12].xyz, CONST[9].xyzz, TEMP[12].xyzz 69: RCP TEMP[14].x, TEMP[11].xxxx 70: RCP TEMP[14].y, TEMP[11].yyyy 71: RCP TEMP[14].z, TEMP[11].zzzz 72: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[14].xyzz 73: FSLT TEMP[14].xyz, IMM[0].wwww, TEMP[11].xyzz 74: UIF TEMP[14].xxxx :0 75: MOV TEMP[15].x, TEMP[13].xxxx 76: ELSE :0 77: MOV TEMP[15].x, TEMP[12].xxxx 78: ENDIF 79: UIF TEMP[14].yyyy :0 80: MOV TEMP[16].x, TEMP[13].yyyy 81: ELSE :0 82: MOV TEMP[16].x, TEMP[12].yyyy 83: ENDIF 84: UIF TEMP[14].zzzz :0 85: MOV TEMP[13].x, TEMP[13].zzzz 86: ELSE :0 87: MOV TEMP[13].x, TEMP[12].zzzz 88: ENDIF 89: ADD TEMP[12].xyz, CONST[8].xyzz, CONST[9].xyzz 90: MUL TEMP[12].xyz, TEMP[12].xyzz, IMM[1].xxxx 91: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[16].xxxx 92: MIN TEMP[13].x, TEMP[14].xxxx, TEMP[13].xxxx 93: ADD TEMP[14].xyz, TEMP[12].xyzz, -CONST[10].xyzz 94: ADD TEMP[14].xyz, TEMP[14].xyzz, IN[6].xyzz 95: MAD TEMP[11].xyz, TEMP[11].xyzz, TEMP[13].xxxx, TEMP[14].xyzz 96: ADD TEMP[10].xyz, TEMP[11].xyzz, -TEMP[12].xyzz 97: ENDIF 98: ADD TEMP[11].x, IMM[0].zzzz, -CONST[24].xxxx 99: POW TEMP[11].x, TEMP[11].xxxx, IMM[1].yyyy 100: MUL TEMP[11].x, TEMP[11].xxxx, IMM[1].zzzz 101: MOV TEMP[10].xyz, TEMP[10].xyzz 102: MOV TEMP[10].w, TEMP[11].xxxx 103: TXL TEMP[10], TEMP[10], SAMP[0], CUBE 104: POW TEMP[11].x, TEMP[10].wwww, CONST[11].yyyy 105: MUL TEMP[11].x, CONST[11].xxxx, TEMP[11].xxxx 106: MUL TEMP[10].xyz, TEMP[11].xxxx, TEMP[10].xyzz 107: FSLT TEMP[11].x, CONST[9].wwww, IMM[1].wwww 108: UIF TEMP[11].xxxx :0 109: MOV TEMP[11].xyz, TEMP[9].xyzx 110: FSLT TEMP[12].x, IMM[0].wwww, CONST[14].wwww 111: UIF TEMP[12].xxxx :0 112: DP3 TEMP[12].x, TEMP[9].xyzz, TEMP[9].xyzz 113: RSQ TEMP[12].x, TEMP[12].xxxx 114: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[12].xxxx 115: MOV TEMP[12].xyz, -IN[6].xyzx 116: ADD TEMP[13].xyz, CONST[12].xyzz, TEMP[12].xyzz 117: RCP TEMP[14].x, TEMP[9].xxxx 118: RCP TEMP[14].y, TEMP[9].yyyy 119: RCP TEMP[14].z, TEMP[9].zzzz 120: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[14].xyzz 121: ADD TEMP[12].xyz, CONST[13].xyzz, TEMP[12].xyzz 122: RCP TEMP[14].x, TEMP[9].xxxx 123: RCP TEMP[14].y, TEMP[9].yyyy 124: RCP TEMP[14].z, TEMP[9].zzzz 125: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[14].xyzz 126: FSLT TEMP[14].xyz, IMM[0].wwww, TEMP[9].xyzz 127: UIF TEMP[14].xxxx :0 128: MOV TEMP[15].x, TEMP[13].xxxx 129: ELSE :0 130: MOV TEMP[15].x, TEMP[12].xxxx 131: ENDIF 132: UIF TEMP[14].yyyy :0 133: MOV TEMP[16].x, TEMP[13].yyyy 134: ELSE :0 135: MOV TEMP[16].x, TEMP[12].yyyy 136: ENDIF 137: UIF TEMP[14].zzzz :0 138: MOV TEMP[13].x, TEMP[13].zzzz 139: ELSE :0 140: MOV TEMP[13].x, TEMP[12].zzzz 141: ENDIF 142: ADD TEMP[12].xyz, CONST[12].xyzz, CONST[13].xyzz 143: MUL TEMP[12].xyz, TEMP[12].xyzz, IMM[1].xxxx 144: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[16].xxxx 145: MIN TEMP[13].x, TEMP[14].xxxx, TEMP[13].xxxx 146: ADD TEMP[14].xyz, TEMP[12].xyzz, -CONST[14].xyzz 147: ADD TEMP[14].xyz, TEMP[14].xyzz, IN[6].xyzz 148: MAD TEMP[9].xyz, TEMP[9].xyzz, TEMP[13].xxxx, TEMP[14].xyzz 149: ADD TEMP[11].xyz, TEMP[9].xyzz, -TEMP[12].xyzz 150: ENDIF 151: ADD TEMP[9].x, IMM[0].zzzz, -CONST[24].xxxx 152: POW TEMP[9].x, TEMP[9].xxxx, IMM[1].yyyy 153: MUL TEMP[9].x, TEMP[9].xxxx, IMM[1].zzzz 154: MOV TEMP[11].xyz, TEMP[11].xyzz 155: MOV TEMP[11].w, TEMP[9].xxxx 156: TXL TEMP[9], TEMP[11], SAMP[1], CUBE 157: POW TEMP[11].x, TEMP[9].wwww, CONST[15].yyyy 158: MUL TEMP[11].x, CONST[15].xxxx, TEMP[11].xxxx 159: MUL TEMP[9].xyz, TEMP[11].xxxx, TEMP[9].xyzz 160: LRP TEMP[7].xyz, CONST[9].wwww, TEMP[10].xyzz, TEMP[9].xyzz 161: ELSE :0 162: MOV TEMP[7].xyz, TEMP[10].xyzx 163: ENDIF 164: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[5].xxxx 165: MOV TEMP[1].xyz, -TEMP[1].xyzx 166: ADD TEMP[5].x, IMM[0].zzzz, -CONST[24].xxxx 167: ADD TEMP[9].xyz, CONST[0].xyzz, TEMP[1].xyzz 168: DP3 TEMP[10].x, TEMP[9].xyzz, TEMP[9].xyzz 169: RSQ TEMP[10].x, TEMP[10].xxxx 170: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[10].xxxx 171: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[1].xyzz 172: MAX TEMP[1].x, IMM[0].wwww, TEMP[1].xxxx 173: DP3 TEMP[10].x, CONST[0].xyzz, TEMP[9].xyzz 174: MAX TEMP[10].x, IMM[0].wwww, TEMP[10].xxxx 175: MUL TEMP[11].x, TEMP[5].xxxx, TEMP[5].xxxx 176: MUL TEMP[11].x, TEMP[11].xxxx, CONST[18].wwww 177: ADD TEMP[12].x, IMM[0].zzzz, -TEMP[5].xxxx 178: MAD TEMP[12].x, TEMP[12].xxxx, IMM[2].yyyy, IMM[2].zzzz 179: LG2 TEMP[12].x, TEMP[12].xxxx 180: RCP TEMP[12].x, TEMP[12].xxxx 181: MUL TEMP[12].x, IMM[2].xxxx, TEMP[12].xxxx 182: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[12].xxxx 183: ADD TEMP[13].x, IMM[0].zzzz, -TEMP[6].xxxx 184: ADD TEMP[14].x, IMM[0].zzzz, -TEMP[1].xxxx 185: MUL TEMP[15].x, IMM[0].xxxx, TEMP[10].xxxx 186: MUL TEMP[5].x, TEMP[10].xxxx, TEMP[5].xxxx 187: MAD TEMP[5].x, TEMP[15].xxxx, TEMP[5].xxxx, IMM[1].xxxx 188: ADD TEMP[10].x, IMM[0].zzzz, -TEMP[10].xxxx 189: ADD TEMP[15].x, IMM[0].zzzz, -TEMP[1].xxxx 190: ADD TEMP[4].x, IMM[0].zzzz, -TEMP[4].xxxx 191: ADD TEMP[4].x, CONST[24].xxxx, TEMP[4].xxxx 192: MOV_SAT TEMP[4].x, TEMP[4].xxxx 193: MUL TEMP[16].x, TEMP[15].xxxx, TEMP[15].xxxx 194: MUL TEMP[17].x, TEMP[15].xxxx, TEMP[15].xxxx 195: MUL TEMP[15].x, TEMP[17].xxxx, TEMP[15].xxxx 196: MUL TEMP[15].x, TEMP[16].xxxx, TEMP[15].xxxx 197: LRP TEMP[4].xyz, TEMP[15].xxxx, TEMP[4].xxxx, TEMP[3].xyzz 198: LRP TEMP[15].x, TEMP[6].xxxx, IMM[0].zzzz, TEMP[11].xxxx 199: LRP TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz, TEMP[11].xxxx 200: MAD TEMP[1].x, TEMP[15].xxxx, TEMP[1].xxxx, IMM[2].wwww 201: RCP TEMP[1].x, TEMP[1].xxxx 202: DP3 TEMP[9].x, TEMP[0].xyzz, TEMP[9].xyzz 203: MAX TEMP[9].x, IMM[0].wwww, TEMP[9].xxxx 204: POW TEMP[9].x, TEMP[9].xxxx, TEMP[12].xxxx 205: ADD TEMP[11].x, TEMP[12].xxxx, IMM[0].zzzz 206: MUL TEMP[11].x, TEMP[11].xxxx, CONST[18].yyyy 207: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[11].xxxx 208: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[9].xxxx 209: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[6].xxxx 210: MUL TEMP[1].x, TEMP[1].xxxx, CONST[18].xxxx 211: MAX TEMP[1].x, IMM[0].wwww, TEMP[1].xxxx 212: MUL TEMP[1].xyz, TEMP[1].xxxx, CONST[17].xyzz 213: ADD TEMP[9].xyz, IMM[0].zzzz, -TEMP[3].xyzz 214: MUL TEMP[11].x, TEMP[10].xxxx, TEMP[10].xxxx 215: MUL TEMP[12].x, TEMP[10].xxxx, TEMP[10].xxxx 216: MUL TEMP[10].x, TEMP[12].xxxx, TEMP[10].xxxx 217: MUL TEMP[10].x, TEMP[11].xxxx, TEMP[10].xxxx 218: MAD TEMP[3].xyz, TEMP[9].xyzz, TEMP[10].xxxx, TEMP[3].xyzz 219: ADD TEMP[9].x, TEMP[5].xxxx, IMM[0].yyyy 220: MUL TEMP[10].x, TEMP[13].xxxx, TEMP[13].xxxx 221: MUL TEMP[11].x, TEMP[13].xxxx, TEMP[13].xxxx 222: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[13].xxxx 223: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[11].xxxx 224: MAD TEMP[9].x, TEMP[9].xxxx, TEMP[10].xxxx, IMM[0].zzzz 225: ADD TEMP[5].x, TEMP[5].xxxx, IMM[0].yyyy 226: MUL TEMP[10].x, TEMP[14].xxxx, TEMP[14].xxxx 227: MUL TEMP[11].x, TEMP[14].xxxx, TEMP[14].xxxx 228: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[14].xxxx 229: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[11].xxxx 230: MAD TEMP[5].x, TEMP[5].xxxx, TEMP[10].xxxx, IMM[0].zzzz 231: MUL TEMP[5].x, TEMP[9].xxxx, TEMP[5].xxxx 232: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[6].xxxx 233: MAD TEMP[5].xyz, CONST[17].xyzz, TEMP[5].xxxx, TEMP[8].xyzz 234: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[5].xyzz 235: MAD TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xyzz, TEMP[2].xyzz 236: MAD TEMP[0].xyz, TEMP[7].xyzz, TEMP[4].xyzz, TEMP[1].xyzz 237: MOV TEMP[0].xyz, TEMP[0].xyzx 238: MAD TEMP[1].x, IN[5].xxxx, CONST[5].zzzz, CONST[5].wwww 239: MOV_SAT TEMP[1].x, TEMP[1].xxxx 240: LRP TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[4].xyzz 241: MOV TEMP[0].xyz, TEMP[0].xyzx 242: MOV TEMP[0].w, IMM[0].zzzz 243: MOV OUT[0], TEMP[0] 244: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 172) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200) %57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208) %58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212) %59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216) %60 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224) %61 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228) %62 = call float @llvm.SI.load.const(<16 x i8> %23, i32 232) %63 = call float @llvm.SI.load.const(<16 x i8> %23, i32 236) %64 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240) %65 = call float @llvm.SI.load.const(<16 x i8> %23, i32 244) %66 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256) %67 = call float @llvm.SI.load.const(<16 x i8> %23, i32 260) %68 = call float @llvm.SI.load.const(<16 x i8> %23, i32 264) %69 = call float @llvm.SI.load.const(<16 x i8> %23, i32 268) %70 = call float @llvm.SI.load.const(<16 x i8> %23, i32 272) %71 = call float @llvm.SI.load.const(<16 x i8> %23, i32 276) %72 = call float @llvm.SI.load.const(<16 x i8> %23, i32 280) %73 = call float @llvm.SI.load.const(<16 x i8> %23, i32 288) %74 = call float @llvm.SI.load.const(<16 x i8> %23, i32 292) %75 = call float @llvm.SI.load.const(<16 x i8> %23, i32 300) %76 = call float @llvm.SI.load.const(<16 x i8> %23, i32 304) %77 = call float @llvm.SI.load.const(<16 x i8> %23, i32 308) %78 = call float @llvm.SI.load.const(<16 x i8> %23, i32 312) %79 = call float @llvm.SI.load.const(<16 x i8> %23, i32 352) %80 = call float @llvm.SI.load.const(<16 x i8> %23, i32 368) %81 = call float @llvm.SI.load.const(<16 x i8> %23, i32 384) %82 = call float @llvm.SI.load.const(<16 x i8> %23, i32 416) %83 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %84 = load <32 x i8>, <32 x i8> addrspace(2)* %83, align 32, !tbaa !0 %85 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %86 = load <16 x i8>, <16 x i8> addrspace(2)* %85, align 16, !tbaa !0 %87 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %88 = bitcast <8 x i32> addrspace(2)* %87 to <32 x i8> addrspace(2)* %89 = load <32 x i8>, <32 x i8> addrspace(2)* %88, align 32, !tbaa !0 %90 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %91 = bitcast <4 x i32> addrspace(2)* %90 to <16 x i8> addrspace(2)* %92 = load <16 x i8>, <16 x i8> addrspace(2)* %91, align 16, !tbaa !0 %93 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %94 = bitcast <8 x i32> addrspace(2)* %93 to <32 x i8> addrspace(2)* %95 = load <32 x i8>, <32 x i8> addrspace(2)* %94, align 32, !tbaa !0 %96 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %97 = bitcast <4 x i32> addrspace(2)* %96 to <16 x i8> addrspace(2)* %98 = load <16 x i8>, <16 x i8> addrspace(2)* %97, align 16, !tbaa !0 %99 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %100 = bitcast <8 x i32> addrspace(2)* %99 to <32 x i8> addrspace(2)* %101 = load <32 x i8>, <32 x i8> addrspace(2)* %100, align 32, !tbaa !0 %102 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %103 = bitcast <4 x i32> addrspace(2)* %102 to <16 x i8> addrspace(2)* %104 = load <16 x i8>, <16 x i8> addrspace(2)* %103, align 16, !tbaa !0 %105 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %106 = bitcast <8 x i32> addrspace(2)* %105 to <32 x i8> addrspace(2)* %107 = load <32 x i8>, <32 x i8> addrspace(2)* %106, align 32, !tbaa !0 %108 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %109 = bitcast <4 x i32> addrspace(2)* %108 to <16 x i8> addrspace(2)* %110 = load <16 x i8>, <16 x i8> addrspace(2)* %109, align 16, !tbaa !0 %111 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %112 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %113 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %114 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %115 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %116 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %117 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %118 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %119 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %120 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %121 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %122 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %123 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %124 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %125 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7) %126 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %127 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7) %128 = call float @llvm.SI.fs.interp(i32 3, i32 5, i32 %5, <2 x i32> %7) %129 = call float @llvm.SI.fs.interp(i32 0, i32 6, i32 %5, <2 x i32> %7) %130 = call float @llvm.SI.fs.interp(i32 1, i32 6, i32 %5, <2 x i32> %7) %131 = call float @llvm.SI.fs.interp(i32 2, i32 6, i32 %5, <2 x i32> %7) %132 = bitcast float %111 to i32 %133 = bitcast float %112 to i32 %134 = insertelement <2 x i32> undef, i32 %132, i32 0 %135 = insertelement <2 x i32> %134, i32 %133, i32 1 %136 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %135, <32 x i8> %101, <16 x i8> %104, i32 2) %137 = extractelement <4 x float> %136, i32 1 %138 = extractelement <4 x float> %136, i32 3 %139 = fmul float %138, 2.000000e+00 %140 = fadd float %139, -1.000000e+00 %141 = fmul float %137, 2.000000e+00 %142 = fadd float %141, -1.000000e+00 %143 = fmul float %140, %79 %144 = fmul float %142, %79 %145 = fmul float %143, %143 %146 = fmul float %144, %144 %147 = fadd float %145, %146 %148 = call float @llvm.AMDIL.clamp.(float %147, float 0.000000e+00, float 1.000000e+00) %149 = fsub float 1.000000e+00, %148 %150 = call float @llvm.sqrt.f32(float %149) %151 = fmul float %143, %113 %152 = fmul float %144, %116 %153 = fadd float %152, %151 %154 = fmul float %150, %119 %155 = fadd float %153, %154 %156 = fmul float %143, %114 %157 = fmul float %144, %117 %158 = fadd float %157, %156 %159 = fmul float %150, %120 %160 = fadd float %158, %159 %161 = fmul float %143, %115 %162 = fmul float %144, %118 %163 = fadd float %162, %161 %164 = fmul float %150, %121 %165 = fadd float %163, %164 %166 = fmul float %155, %155 %167 = fmul float %160, %160 %168 = fadd float %167, %166 %169 = fmul float %165, %165 %170 = fadd float %168, %169 %171 = call float @llvm.AMDGPU.rsq.clamped.f32(float %170) %172 = fmul float %155, %171 %173 = fmul float %160, %171 %174 = fmul float %165, %171 %175 = fmul float %126, %126 %176 = fmul float %127, %127 %177 = fadd float %176, %175 %178 = fmul float %128, %128 %179 = fadd float %177, %178 %180 = call float @llvm.AMDGPU.rsq.clamped.f32(float %179) %181 = fmul float %126, %180 %182 = fmul float %127, %180 %183 = fmul float %128, %180 %184 = bitcast float %111 to i32 %185 = bitcast float %112 to i32 %186 = insertelement <2 x i32> undef, i32 %184, i32 0 %187 = insertelement <2 x i32> %186, i32 %185, i32 1 %188 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %187, <32 x i8> %95, <16 x i8> %98, i32 2) %189 = extractelement <4 x float> %188, i32 0 %190 = extractelement <4 x float> %188, i32 1 %191 = extractelement <4 x float> %188, i32 2 %192 = fmul float %76, %189 %193 = fmul float %77, %190 %194 = fmul float %78, %191 %195 = call float @llvm.AMDGPU.lrp(float %80, float %192, float %66) %196 = call float @llvm.AMDGPU.lrp(float %80, float %193, float %67) %197 = call float @llvm.AMDGPU.lrp(float %80, float %194, float %68) %198 = fmul float %80, %69 %199 = fsub float %69, %198 %200 = fmul float %192, %199 %201 = fmul float %193, %199 %202 = fmul float %194, %199 %203 = bitcast float %111 to i32 %204 = bitcast float %112 to i32 %205 = insertelement <2 x i32> undef, i32 %203, i32 0 %206 = insertelement <2 x i32> %205, i32 %204, i32 1 %207 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %206, <32 x i8> %107, <16 x i8> %110, i32 2) %208 = extractelement <4 x float> %207, i32 1 %209 = fsub float 1.000000e+00, %82 %210 = fmul float %208, %82 %211 = fadd float %210, %209 %212 = fmul float %172, %24 %213 = fmul float %173, %25 %214 = fadd float %213, %212 %215 = fmul float %174, %26 %216 = fadd float %214, %215 %217 = call float @llvm.maxnum.f32(float %216, float 0.000000e+00) %218 = fmul float %27, %172 %219 = fmul float %28, %173 %220 = fadd float %218, %219 %221 = fmul float %29, %174 %222 = fadd float %220, %221 %223 = fadd float %222, %30 %224 = fmul float %31, %172 %225 = fmul float %32, %173 %226 = fadd float %224, %225 %227 = fmul float %33, %174 %228 = fadd float %226, %227 %229 = fadd float %228, %34 %230 = fmul float %35, %172 %231 = fmul float %36, %173 %232 = fadd float %230, %231 %233 = fmul float %37, %174 %234 = fadd float %232, %233 %235 = fadd float %234, %38 %236 = fadd float %122, %223 %237 = fadd float %123, %229 %238 = fadd float %124, %235 %239 = fmul float %236, %211 %240 = fmul float %237, %211 %241 = fmul float %238, %211 %242 = fmul float %172, %181 %243 = fmul float %173, %182 %244 = fadd float %243, %242 %245 = fmul float %174, %183 %246 = fadd float %244, %245 %247 = fmul float %246, %172 %248 = fmul float %246, %173 %249 = fmul float %246, %174 %250 = fmul float %247, 2.000000e+00 %251 = fmul float %248, 2.000000e+00 %252 = fmul float %249, 2.000000e+00 %253 = fsub float %181, %250 %254 = fsub float %182, %251 %255 = fsub float %183, %252 %256 = fcmp ogt float %51, 0.000000e+00 br i1 %256, label %IF, label %ENDIF IF: ; preds = %main_body %257 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %258 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %259 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %260 = fmul float %253, %253 %261 = fmul float %254, %254 %262 = fadd float %261, %260 %263 = fmul float %255, %255 %264 = fadd float %262, %263 %265 = call float @llvm.AMDGPU.rsq.clamped.f32(float %264) %266 = fmul float %253, %265 %267 = fmul float %254, %265 %268 = fmul float %255, %265 %269 = fsub float %44, %129 %270 = fsub float %45, %130 %271 = fsub float %46, %131 %272 = fdiv float 1.000000e+00, %266 %273 = fdiv float 1.000000e+00, %267 %274 = fdiv float 1.000000e+00, %268 %275 = fmul float %269, %272 %276 = fmul float %270, %273 %277 = fmul float %271, %274 %278 = fsub float %47, %129 %279 = fsub float %48, %130 %280 = fsub float %49, %131 %281 = fdiv float 1.000000e+00, %266 %282 = fdiv float 1.000000e+00, %267 %283 = fdiv float 1.000000e+00, %268 %284 = fmul float %278, %281 %285 = fmul float %279, %282 %286 = fmul float %280, %283 %287 = fcmp ogt float %266, 0.000000e+00 %288 = fcmp ogt float %267, 0.000000e+00 %289 = fcmp ogt float %268, 0.000000e+00 %. = select i1 %287, float %275, float %284 %temp64.0 = select i1 %288, float %276, float %285 %.96 = select i1 %289, float %277, float %286 %290 = fadd float %44, %47 %291 = fadd float %45, %48 %292 = fadd float %46, %49 %293 = fmul float %290, 5.000000e-01 %294 = fmul float %291, 5.000000e-01 %295 = fmul float %292, 5.000000e-01 %296 = call float @llvm.minnum.f32(float %., float %temp64.0) %297 = call float @llvm.minnum.f32(float %296, float %.96) %298 = fsub float %293, %259 %299 = fsub float %294, %258 %300 = fsub float %295, %257 %301 = fadd float %298, %129 %302 = fadd float %299, %130 %303 = fadd float %300, %131 %304 = fmul float %266, %297 %305 = fadd float %304, %301 %306 = fmul float %267, %297 %307 = fadd float %306, %302 %308 = fmul float %268, %297 %309 = fadd float %308, %303 %310 = fsub float %305, %293 %311 = fsub float %307, %294 %312 = fsub float %309, %295 br label %ENDIF ENDIF: ; preds = %main_body, %IF %temp40.0 = phi float [ %310, %IF ], [ %253, %main_body ] %temp41.0 = phi float [ %311, %IF ], [ %254, %main_body ] %temp42.0 = phi float [ %312, %IF ], [ %255, %main_body ] %313 = fsub float 1.000000e+00, %81 %314 = call float @llvm.pow.f32(float %313, float 7.500000e-01) %315 = fmul float %314, 7.000000e+00 %316 = insertelement <4 x float> undef, float %temp40.0, i32 0 %317 = insertelement <4 x float> %316, float %temp41.0, i32 1 %318 = insertelement <4 x float> %317, float %temp42.0, i32 2 %319 = insertelement <4 x float> %318, float %315, i32 3 %320 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %319) %321 = extractelement <4 x float> %320, i32 0 %322 = extractelement <4 x float> %320, i32 1 %323 = extractelement <4 x float> %320, i32 2 %324 = extractelement <4 x float> %320, i32 3 %325 = call float @llvm.fabs.f32(float %323) %326 = fdiv float 1.000000e+00, %325 %327 = fmul float %321, %326 %328 = fadd float %327, 1.500000e+00 %329 = fmul float %322, %326 %330 = fadd float %329, 1.500000e+00 %331 = bitcast float %330 to i32 %332 = bitcast float %328 to i32 %333 = bitcast float %324 to i32 %334 = bitcast float %315 to i32 %335 = insertelement <4 x i32> undef, i32 %331, i32 0 %336 = insertelement <4 x i32> %335, i32 %332, i32 1 %337 = insertelement <4 x i32> %336, i32 %333, i32 2 %338 = insertelement <4 x i32> %337, i32 %334, i32 3 %339 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %338, <32 x i8> %84, <16 x i8> %86, i32 4) %340 = extractelement <4 x float> %339, i32 0 %341 = extractelement <4 x float> %339, i32 1 %342 = extractelement <4 x float> %339, i32 2 %343 = extractelement <4 x float> %339, i32 3 %344 = call float @llvm.pow.f32(float %343, float %53) %345 = fmul float %52, %344 %346 = fmul float %345, %340 %347 = fmul float %345, %341 %348 = fmul float %345, %342 %349 = fcmp olt float %50, 0x3FEFFFEB00000000 br i1 %349, label %IF82, label %ENDIF81 IF82: ; preds = %ENDIF %350 = fcmp ogt float %63, 0.000000e+00 br i1 %350, label %IF85, label %ENDIF84 ENDIF81: ; preds = %ENDIF, %ENDIF84 %temp28.0 = phi float [ %578, %ENDIF84 ], [ %346, %ENDIF ] %temp29.0 = phi float [ %579, %ENDIF84 ], [ %347, %ENDIF ] %temp30.0 = phi float [ %580, %ENDIF84 ], [ %348, %ENDIF ] %351 = fmul float %temp28.0, %211 %352 = fmul float %temp29.0, %211 %353 = fmul float %temp30.0, %211 %354 = fsub float 1.000000e+00, %81 %355 = fsub float %24, %181 %356 = fsub float %25, %182 %357 = fsub float %26, %183 %358 = fmul float %355, %355 %359 = fmul float %356, %356 %360 = fadd float %359, %358 %361 = fmul float %357, %357 %362 = fadd float %360, %361 %363 = call float @llvm.AMDGPU.rsq.clamped.f32(float %362) %364 = fmul float %355, %363 %365 = fmul float %356, %363 %366 = fmul float %357, %363 %367 = fmul float %181, %172 %368 = fsub float -0.000000e+00, %367 %369 = fmul float %182, %173 %370 = fsub float %368, %369 %371 = fmul float %183, %174 %372 = fsub float %370, %371 %373 = call float @llvm.maxnum.f32(float %372, float 0.000000e+00) %374 = fmul float %24, %364 %375 = fmul float %25, %365 %376 = fadd float %375, %374 %377 = fmul float %26, %366 %378 = fadd float %376, %377 %379 = call float @llvm.maxnum.f32(float %378, float 0.000000e+00) %380 = fmul float %354, %354 %381 = fmul float %380, %75 %382 = fsub float 1.000000e+00, %354 %383 = fmul float %382, 0x3FEEF9DB20000000 %384 = fadd float %383, 0x3F9EB851E0000000 %385 = call float @llvm.log2.f32(float %384) %386 = fdiv float 1.000000e+00, %385 %387 = fmul float %386, 1.000000e+01 %388 = fmul float %387, %387 %389 = fsub float 1.000000e+00, %217 %390 = fsub float 1.000000e+00, %373 %391 = fmul float %379, 2.000000e+00 %392 = fmul float %379, %354 %393 = fmul float %391, %392 %394 = fadd float %393, 5.000000e-01 %395 = fsub float 1.000000e+00, %379 %396 = fsub float 1.000000e+00, %373 %397 = fsub float 1.000000e+00, %199 %398 = fadd float %81, %397 %399 = call float @llvm.AMDIL.clamp.(float %398, float 0.000000e+00, float 1.000000e+00) %400 = fmul float %396, %396 %401 = fmul float %396, %396 %402 = fmul float %401, %396 %403 = fmul float %400, %402 %404 = call float @llvm.AMDGPU.lrp(float %403, float %399, float %195) %405 = call float @llvm.AMDGPU.lrp(float %403, float %399, float %196) %406 = call float @llvm.AMDGPU.lrp(float %403, float %399, float %197) %407 = call float @llvm.AMDGPU.lrp(float %217, float 1.000000e+00, float %381) %408 = call float @llvm.AMDGPU.lrp(float %373, float 1.000000e+00, float %381) %409 = fmul float %407, %408 %410 = fadd float %409, 0x3F1A36E2E0000000 %411 = fdiv float 1.000000e+00, %410 %412 = fmul float %172, %364 %413 = fmul float %173, %365 %414 = fadd float %413, %412 %415 = fmul float %174, %366 %416 = fadd float %414, %415 %417 = call float @llvm.maxnum.f32(float %416, float 0.000000e+00) %418 = call float @llvm.pow.f32(float %417, float %388) %419 = fadd float %388, 1.000000e+00 %420 = fmul float %419, %74 %421 = fmul float %418, %420 %422 = fmul float %411, %421 %423 = fmul float %422, %217 %424 = fmul float %423, %73 %425 = call float @llvm.maxnum.f32(float %424, float 0.000000e+00) %426 = fmul float %425, %70 %427 = fmul float %425, %71 %428 = fmul float %425, %72 %429 = fsub float 1.000000e+00, %195 %430 = fsub float 1.000000e+00, %196 %431 = fsub float 1.000000e+00, %197 %432 = fmul float %395, %395 %433 = fmul float %395, %395 %434 = fmul float %433, %395 %435 = fmul float %432, %434 %436 = fmul float %429, %435 %437 = fadd float %436, %195 %438 = fmul float %430, %435 %439 = fadd float %438, %196 %440 = fmul float %431, %435 %441 = fadd float %440, %197 %442 = fadd float %394, -1.000000e+00 %443 = fmul float %389, %389 %444 = fmul float %389, %389 %445 = fmul float %444, %389 %446 = fmul float %443, %445 %447 = fmul float %442, %446 %448 = fadd float %447, 1.000000e+00 %449 = fadd float %394, -1.000000e+00 %450 = fmul float %390, %390 %451 = fmul float %390, %390 %452 = fmul float %451, %390 %453 = fmul float %450, %452 %454 = fmul float %449, %453 %455 = fadd float %454, 1.000000e+00 %456 = fmul float %448, %455 %457 = fmul float %456, %217 %458 = fmul float %70, %457 %459 = fadd float %458, %239 %460 = fmul float %71, %457 %461 = fadd float %460, %240 %462 = fmul float %72, %457 %463 = fadd float %462, %241 %464 = fmul float %200, %459 %465 = fmul float %201, %461 %466 = fmul float %202, %463 %467 = fmul float %426, %437 %468 = fadd float %467, %464 %469 = fmul float %427, %439 %470 = fadd float %469, %465 %471 = fmul float %428, %441 %472 = fadd float %471, %466 %473 = fmul float %351, %404 %474 = fadd float %473, %468 %475 = fmul float %352, %405 %476 = fadd float %475, %470 %477 = fmul float %353, %406 %478 = fadd float %477, %472 %479 = fmul float %125, %42 %480 = fadd float %479, %43 %481 = call float @llvm.AMDIL.clamp.(float %480, float 0.000000e+00, float 1.000000e+00) %482 = call float @llvm.AMDGPU.lrp(float %481, float %474, float %39) %483 = call float @llvm.AMDGPU.lrp(float %481, float %476, float %40) %484 = call float @llvm.AMDGPU.lrp(float %481, float %478, float %41) %485 = call i32 @llvm.SI.packf16(float %482, float %483) %486 = bitcast i32 %485 to float %487 = call i32 @llvm.SI.packf16(float %484, float 1.000000e+00) %488 = bitcast i32 %487 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %486, float %488, float %486, float %488) ret void IF85: ; preds = %IF82 %489 = fmul float %253, %253 %490 = fmul float %254, %254 %491 = fadd float %490, %489 %492 = fmul float %255, %255 %493 = fadd float %491, %492 %494 = call float @llvm.AMDGPU.rsq.clamped.f32(float %493) %495 = fmul float %253, %494 %496 = fmul float %254, %494 %497 = fmul float %255, %494 %498 = fsub float %54, %129 %499 = fsub float %55, %130 %500 = fsub float %56, %131 %501 = fdiv float 1.000000e+00, %495 %502 = fdiv float 1.000000e+00, %496 %503 = fdiv float 1.000000e+00, %497 %504 = fmul float %498, %501 %505 = fmul float %499, %502 %506 = fmul float %500, %503 %507 = fsub float %57, %129 %508 = fsub float %58, %130 %509 = fsub float %59, %131 %510 = fdiv float 1.000000e+00, %495 %511 = fdiv float 1.000000e+00, %496 %512 = fdiv float 1.000000e+00, %497 %513 = fmul float %507, %510 %514 = fmul float %508, %511 %515 = fmul float %509, %512 %516 = fcmp ogt float %495, 0.000000e+00 %517 = fcmp ogt float %496, 0.000000e+00 %518 = fcmp ogt float %497, 0.000000e+00 %.97 = select i1 %516, float %504, float %513 %temp64.1 = select i1 %517, float %505, float %514 %.98 = select i1 %518, float %506, float %515 %519 = fadd float %54, %57 %520 = fadd float %55, %58 %521 = fadd float %56, %59 %522 = fmul float %519, 5.000000e-01 %523 = fmul float %520, 5.000000e-01 %524 = fmul float %521, 5.000000e-01 %525 = call float @llvm.minnum.f32(float %.97, float %temp64.1) %526 = call float @llvm.minnum.f32(float %525, float %.98) %527 = fsub float %522, %60 %528 = fsub float %523, %61 %529 = fsub float %524, %62 %530 = fadd float %527, %129 %531 = fadd float %528, %130 %532 = fadd float %529, %131 %533 = fmul float %495, %526 %534 = fadd float %533, %530 %535 = fmul float %496, %526 %536 = fadd float %535, %531 %537 = fmul float %497, %526 %538 = fadd float %537, %532 %539 = fsub float %534, %522 %540 = fsub float %536, %523 %541 = fsub float %538, %524 br label %ENDIF84 ENDIF84: ; preds = %IF82, %IF85 %temp44.0 = phi float [ %539, %IF85 ], [ %253, %IF82 ] %temp45.0 = phi float [ %540, %IF85 ], [ %254, %IF82 ] %temp46.0 = phi float [ %541, %IF85 ], [ %255, %IF82 ] %542 = fsub float 1.000000e+00, %81 %543 = call float @llvm.pow.f32(float %542, float 7.500000e-01) %544 = fmul float %543, 7.000000e+00 %545 = insertelement <4 x float> undef, float %temp44.0, i32 0 %546 = insertelement <4 x float> %545, float %temp45.0, i32 1 %547 = insertelement <4 x float> %546, float %temp46.0, i32 2 %548 = insertelement <4 x float> %547, float %544, i32 3 %549 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %548) %550 = extractelement <4 x float> %549, i32 0 %551 = extractelement <4 x float> %549, i32 1 %552 = extractelement <4 x float> %549, i32 2 %553 = extractelement <4 x float> %549, i32 3 %554 = call float @llvm.fabs.f32(float %552) %555 = fdiv float 1.000000e+00, %554 %556 = fmul float %550, %555 %557 = fadd float %556, 1.500000e+00 %558 = fmul float %551, %555 %559 = fadd float %558, 1.500000e+00 %560 = bitcast float %559 to i32 %561 = bitcast float %557 to i32 %562 = bitcast float %553 to i32 %563 = bitcast float %544 to i32 %564 = insertelement <4 x i32> undef, i32 %560, i32 0 %565 = insertelement <4 x i32> %564, i32 %561, i32 1 %566 = insertelement <4 x i32> %565, i32 %562, i32 2 %567 = insertelement <4 x i32> %566, i32 %563, i32 3 %568 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %567, <32 x i8> %89, <16 x i8> %92, i32 4) %569 = extractelement <4 x float> %568, i32 0 %570 = extractelement <4 x float> %568, i32 1 %571 = extractelement <4 x float> %568, i32 2 %572 = extractelement <4 x float> %568, i32 3 %573 = call float @llvm.pow.f32(float %572, float %65) %574 = fmul float %64, %573 %575 = fmul float %574, %569 %576 = fmul float %574, %570 %577 = fmul float %574, %571 %578 = call float @llvm.AMDGPU.lrp(float %50, float %346, float %575) %579 = call float @llvm.AMDGPU.lrp(float %50, float %347, float %576) %580 = call float @llvm.AMDGPU.lrp(float %50, float %348, float %577) br label %ENDIF81 } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v13, v0, 0, 0, [m0] ; C8340000 v_interp_p2_f32 v13, [v13], v1, 0, 0, [m0] ; C8350001 v_interp_p1_f32 v14, v0, 1, 0, [m0] ; C8380100 v_interp_p2_f32 v14, [v14], v1, 1, 0, [m0] ; C8390101 v_interp_p1_f32 v2, v0, 0, 1, [m0] ; C8080400 v_interp_p2_f32 v2, [v2], v1, 0, 1, [m0] ; C8090401 v_interp_p1_f32 v4, v0, 1, 1, [m0] ; C8100500 v_interp_p2_f32 v4, [v4], v1, 1, 1, [m0] ; C8110501 v_interp_p1_f32 v5, v0, 2, 1, [m0] ; C8140600 v_interp_p2_f32 v5, [v5], v1, 2, 1, [m0] ; C8150601 v_interp_p1_f32 v6, v0, 0, 2, [m0] ; C8180800 v_interp_p2_f32 v6, [v6], v1, 0, 2, [m0] ; C8190801 v_interp_p1_f32 v7, v0, 1, 2, [m0] ; C81C0900 v_interp_p2_f32 v7, [v7], v1, 1, 2, [m0] ; C81D0901 v_interp_p1_f32 v11, v0, 2, 2, [m0] ; C82C0A00 v_interp_p2_f32 v11, [v11], v1, 2, 2, [m0] ; C82D0A01 v_interp_p1_f32 v12, v0, 0, 3, [m0] ; C8300C00 v_interp_p2_f32 v12, [v12], v1, 0, 3, [m0] ; C8310C01 v_interp_p1_f32 v15, v0, 1, 3, [m0] ; C83C0D00 v_interp_p2_f32 v15, [v15], v1, 1, 3, [m0] ; C83D0D01 v_interp_p1_f32 v16, v0, 2, 3, [m0] ; C8400E00 v_interp_p2_f32 v16, [v16], v1, 2, 3, [m0] ; C8410E01 v_interp_p1_f32 v8, v0, 0, 4, [m0] ; C8201000 v_interp_p2_f32 v8, [v8], v1, 0, 4, [m0] ; C8211001 v_interp_p1_f32 v9, v0, 1, 4, [m0] ; C8241100 v_interp_p2_f32 v9, [v9], v1, 1, 4, [m0] ; C8251101 v_interp_p1_f32 v10, v0, 2, 4, [m0] ; C8281200 v_interp_p2_f32 v10, [v10], v1, 2, 4, [m0] ; C8291201 v_interp_p1_f32 v3, v0, 0, 5, [m0] ; C80C1400 v_interp_p2_f32 v3, [v3], v1, 0, 5, [m0] ; C80D1401 v_interp_p1_f32 v18, v0, 1, 5, [m0] ; C8481500 v_interp_p2_f32 v18, [v18], v1, 1, 5, [m0] ; C8491501 v_interp_p1_f32 v19, v0, 2, 5, [m0] ; C84C1600 s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300 v_interp_p2_f32 v19, [v19], v1, 2, 5, [m0] ; C84D1601 v_interp_p1_f32 v20, v0, 3, 5, [m0] ; C8501700 v_interp_p2_f32 v20, [v20], v1, 3, 5, [m0] ; C8511701 s_load_dwordx4 s[0:3], s[4:5], 0xc ; C080050C s_load_dwordx8 s[20:27], s[6:7], 0x18 ; C0CA0718 v_interp_p1_f32 v21, v0, 0, 6, [m0] ; C8541800 v_interp_p2_f32 v21, [v21], v1, 0, 6, [m0] ; C8551801 v_interp_p1_f32 v17, v0, 1, 6, [m0] ; C8441900 v_interp_p2_f32 v17, [v17], v1, 1, 6, [m0] ; C8451901 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[8:11], 0x58 ; C2060958 v_interp_p1_f32 v22, v0, 2, 6, [m0] ; C8581A00 v_interp_p2_f32 v22, [v22], v1, 2, 6, [m0] ; C8591A01 s_load_dwordx4 s[16:19], s[4:5], 0x10 ; C0880510 s_load_dwordx8 s[32:39], s[6:7], 0x20 ; C0D00720 image_sample v[0:1], 10, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[20:27], s[0:3] ; F0800A00 0005000D s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mad_f32 v1, 2.0, v1, -1.0 ; D2820001 03CE02F4 v_mad_f32 v0, 2.0, v0, -1.0 ; D2820000 03CE00F4 s_buffer_load_dword s21, s[8:11], 0x5c ; C20A895C s_buffer_load_dword s0, s[8:11], 0x60 ; C2000960 v_mul_f32_e32 v1, s12, v1 ; 1002020C v_mul_f32_e32 v0, s12, v0 ; 1000000C v_mul_f32_e32 v2, v2, v1 ; 10040302 v_mac_f32_e32 v2, v6, v0 ; 3E040106 v_mul_f32_e32 v4, v4, v1 ; 10080304 v_mac_f32_e32 v4, v7, v0 ; 3E080107 v_mul_f32_e32 v7, v5, v1 ; 100E0305 v_mac_f32_e32 v7, v11, v0 ; 3E0E010B v_mul_f32_e32 v0, v0, v0 ; 10000100 v_mac_f32_e32 v0, v1, v1 ; 3E000301 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_sub_f32_e32 v0, 1.0, v0 ; 080000F2 v_sqrt_f32_e32 v0, v0 ; 7E006700 v_mac_f32_e32 v2, v12, v0 ; 3E04010C v_mac_f32_e32 v4, v15, v0 ; 3E08010F v_mac_f32_e32 v7, v16, v0 ; 3E0E0110 v_mul_f32_e32 v0, v2, v2 ; 10000502 v_mac_f32_e32 v0, v4, v4 ; 3E000904 v_mac_f32_e32 v0, v7, v7 ; 3E000F07 v_rsq_clamp_f32_e32 v0, v0 ; 7E005900 v_mul_f32_e32 v1, v18, v18 ; 10022512 v_mac_f32_e32 v1, v19, v19 ; 3E022713 v_mac_f32_e32 v1, v20, v20 ; 3E022914 v_rsq_clamp_f32_e32 v1, v1 ; 7E025901 v_mul_f32_e32 v6, v0, v2 ; 100C0500 v_mul_f32_e32 v5, v0, v4 ; 100A0900 v_mul_f32_e32 v4, v0, v7 ; 10080F00 v_mul_f32_e32 v12, v1, v18 ; 10182501 v_mul_f32_e32 v11, v1, v19 ; 10162701 v_mul_f32_e32 v0, v12, v6 ; 10000D0C v_mac_f32_e32 v0, v11, v5 ; 3E000B0B v_mul_f32_e32 v7, v1, v20 ; 100E2901 v_mac_f32_e32 v0, v7, v4 ; 3E000907 v_mul_f32_e32 v2, v6, v0 ; 10040106 v_mac_f32_e32 v2, v6, v0 ; 3E040106 v_mul_f32_e32 v15, v5, v0 ; 101E0105 s_load_dwordx4 s[12:15], s[4:5], 0x8 ; C0860508 s_load_dwordx8 s[24:31], s[6:7], 0x10 ; C0CC0710 v_mac_f32_e32 v15, v5, v0 ; 3E1E0105 v_mad_f32 v23, v18, v1, -v2 ; D2820017 840A0312 v_mad_f32 v24, v19, v1, -v15 ; D2820018 843E0313 s_buffer_load_dword s1, s[8:11], 0x4c ; C200894C s_buffer_load_dword s2, s[8:11], 0x4d ; C201094D s_buffer_load_dword s3, s[8:11], 0x4e ; C201894E v_mul_f32_e32 v2, v4, v0 ; 10040104 v_mac_f32_e32 v2, v4, v0 ; 3E040104 v_mad_f32 v25, v20, v1, -v2 ; D2820019 840A0314 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:2], 7, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[24:31], s[12:15] ; F0800700 0066000D s_buffer_load_dword s13, s[8:11], 0x40 ; C2068940 s_buffer_load_dword s14, s[8:11], 0x41 ; C2070941 s_buffer_load_dword s15, s[8:11], 0x42 ; C2078942 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v18, s1, v0 ; 10240001 v_mul_f32_e32 v19, s2, v1 ; 10260202 v_mul_f32_e32 v20, s3, v2 ; 10280403 s_buffer_load_dword s12, s[8:11], 0x27 ; C2060927 s_buffer_load_dword s1, s[8:11], 0x2b ; C200892B s_buffer_load_dword s29, s[8:11], 0x2c ; C20E892C s_buffer_load_dword s30, s[8:11], 0x2d ; C20F092D v_sub_f32_e64 v0, 1.0, s21 ; D2080000 00002AF2 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s13, v0 ; 1004000D v_mul_f32_e32 v1, s14, v0 ; 1002000E v_mul_f32_e32 v0, s15, v0 ; 1000000F v_mac_f32_e32 v2, s21, v18 ; 3E042415 v_mov_b32_e32 v26, v23 ; 7E340317 v_mac_f32_e32 v1, s21, v19 ; 3E022615 v_mov_b32_e32 v27, v24 ; 7E360318 v_mac_f32_e32 v0, s21, v20 ; 3E002815 v_mov_b32_e32 v28, v25 ; 7E380319 v_cmp_lt_f32_e64 s[2:3], 0, s1 ; D0020002 00000280 image_sample v[13:16], 15, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[32:39], s[16:19] ; F0800F00 00880D0D s_waitcnt vmcnt(0) ; BF8C0770 s_and_saveexec_b64 s[22:23], s[2:3] ; BE962402 s_xor_b64 s[22:23], exec, s[22:23] ; 8996167E s_cbranch_execz BB0_2 ; BF880000 s_buffer_load_dword s1, s[8:11], 0x20 ; C2008920 s_buffer_load_dword s2, s[8:11], 0x21 ; C2010921 s_buffer_load_dword s3, s[8:11], 0x22 ; C2018922 s_buffer_load_dword s13, s[8:11], 0x24 ; C2068924 s_buffer_load_dword s14, s[8:11], 0x25 ; C2070925 v_mul_f32_e32 v13, v23, v23 ; 101A2F17 v_mac_f32_e32 v13, v24, v24 ; 3E1A3118 v_mac_f32_e32 v13, v25, v25 ; 3E1A3319 v_rsq_clamp_f32_e32 v13, v13 ; 7E1A590D s_buffer_load_dword s15, s[8:11], 0x26 ; C2078926 s_buffer_load_dword s16, s[8:11], 0x28 ; C2080928 s_buffer_load_dword s17, s[8:11], 0x29 ; C2088929 s_buffer_load_dword s18, s[8:11], 0x2a ; C209092A v_mul_f32_e32 v15, v13, v23 ; 101E2F0D v_mul_f32_e32 v16, v13, v24 ; 1020310D v_mul_f32_e32 v13, v13, v25 ; 101A330D v_rcp_f32_e32 v26, v15 ; 7E34550F s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v27, s1, v21 ; 08362A01 v_sub_f32_e32 v28, s2, v17 ; 08382202 v_rcp_f32_e32 v29, v16 ; 7E3A5510 v_mul_f32_e32 v27, v26, v27 ; 1036371A v_sub_f32_e32 v30, s13, v21 ; 083C2A0D v_mul_f32_e32 v26, v26, v30 ; 10343D1A v_cmp_lt_f32_e32 vcc, 0, v15 ; 7C021E80 v_cndmask_b32_e32 v26, v26, v27 ; 0034371A v_rcp_f32_e32 v27, v13 ; 7E36550D v_mul_f32_e32 v28, v29, v28 ; 1038391D v_sub_f32_e32 v30, s14, v17 ; 083C220E v_mul_f32_e32 v29, v29, v30 ; 103A3D1D v_cmp_lt_f32_e32 vcc, 0, v16 ; 7C022080 v_cndmask_b32_e32 v28, v29, v28 ; 0038391D v_sub_f32_e32 v29, s3, v22 ; 083A2C03 v_mul_f32_e32 v29, v27, v29 ; 103A3B1B v_sub_f32_e32 v30, s15, v22 ; 083C2C0F v_mul_f32_e32 v27, v27, v30 ; 10363D1B v_cmp_lt_f32_e32 vcc, 0, v13 ; 7C021A80 v_cndmask_b32_e32 v27, v27, v29 ; 00363B1B v_min3_f32 v26, v26, v28, v27 ; D2A2001A 046E391A v_mov_b32_e32 v27, s13 ; 7E36020D v_add_f32_e32 v27, s1, v27 ; 06363601 v_mov_b32_e32 v28, s14 ; 7E38020E v_add_f32_e32 v28, s2, v28 ; 06383802 v_mov_b32_e32 v29, s15 ; 7E3A020F v_add_f32_e32 v29, s3, v29 ; 063A3A03 v_mad_f32 v30, 0.5, v27, -s16 ; D282001E 804236F0 v_add_f32_e32 v30, v21, v30 ; 063C3D15 v_mac_f32_e32 v30, v26, v15 ; 3E3C1F1A v_mad_f32 v15, 0.5, v28, -s17 ; D282000F 804638F0 v_add_f32_e32 v15, v17, v15 ; 061E1F11 v_mac_f32_e32 v15, v26, v16 ; 3E1E211A v_mad_f32 v16, 0.5, v29, -s18 ; D2820010 804A3AF0 v_add_f32_e32 v16, v22, v16 ; 06202116 v_mac_f32_e32 v16, v26, v13 ; 3E201B1A v_mad_f32 v26, 0.5, -v27, v30 ; D282001A 447A36F0 v_mad_f32 v27, 0.5, -v28, v15 ; D282001B 443E38F0 v_mad_f32 v28, 0.5, -v29, v16 ; D282001C 44423AF0 s_or_b64 exec, exec, s[22:23] ; 88FE167E s_buffer_load_dword s14, s[8:11], 0x17 ; C2070917 s_buffer_load_dword s15, s[8:11], 0x43 ; C2078943 s_buffer_load_dword s13, s[8:11], 0x68 ; C2068968 s_buffer_load_dword s1, s[8:11], 0x0 ; C2008900 s_buffer_load_dword s2, s[8:11], 0x1 ; C2010901 s_buffer_load_dword s3, s[8:11], 0x2 ; C2018902 s_buffer_load_dword s16, s[8:11], 0x4 ; C2080904 s_buffer_load_dword s17, s[8:11], 0x5 ; C2088905 s_buffer_load_dword s18, s[8:11], 0x6 ; C2090906 s_buffer_load_dword s20, s[8:11], 0x7 ; C20A0907 s_buffer_load_dword s19, s[8:11], 0x8 ; C2098908 s_buffer_load_dword s22, s[8:11], 0x9 ; C20B0909 s_buffer_load_dword s23, s[8:11], 0xa ; C20B890A s_buffer_load_dword s24, s[8:11], 0xb ; C20C090B s_buffer_load_dword s25, s[8:11], 0xc ; C20C890C s_buffer_load_dword s26, s[8:11], 0xd ; C20D090D s_buffer_load_dword s27, s[8:11], 0xe ; C20D890E s_buffer_load_dword s28, s[8:11], 0xf ; C20E090F v_sub_f32_e64 v13, 1.0, s0 ; D208000D 000000F2 v_log_f32_e32 v13, v13 ; 7E1A4F0D s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500 s_load_dwordx8 s[36:43], s[6:7], 0x0 ; C0D20700 v_mul_legacy_f32_e32 v13, 0x3f400000, v13 ; 0E1A1AFF 3F400000 v_exp_f32_e32 v13, v13 ; 7E1A4B0D v_mul_f32_e32 v29, 0x40e00000, v13 ; 103A1AFF 40E00000 v_cubeid_f32 v33, v26, v27, v28 ; D2880021 0472371A v_cubema_f32 v32, v26, v27, v28 ; D28E0020 0472371A v_cubesc_f32 v31, v26, v27, v28 ; D28A001F 0472371A v_cubetc_f32 v30, v26, v27, v28 ; D28C001E 0472371A v_mov_b32_e32 v26, 0x3fc00000 ; 7E3402FF 3FC00000 v_rcp_f32_e64 v13, |v32| ; D354010D 00000120 v_mad_f32 v27, v13, v30, v26 ; D282001B 046A3D0D v_mac_f32_e32 v26, v13, v31 ; 3E343F0D v_mov_b32_e32 v28, v33 ; 7E380321 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[26:29], 15, 0, 0, 0, 0, 0, 0, 0, v[26:29], s[36:43], s[32:35] ; F0900F00 01091A1A s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v13, v29 ; 7E1A4F1D v_mul_legacy_f32_e32 v13, s30, v13 ; 0E1A1A1E v_exp_f32_e32 v13, v13 ; 7E1A4B0D v_mul_f32_e32 v13, s29, v13 ; 101A1A1D v_mul_f32_e32 v16, v26, v13 ; 10201B1A v_mul_f32_e32 v15, v27, v13 ; 101E1B1B v_mul_f32_e32 v13, v28, v13 ; 101A1B1C v_mov_b32_e32 v27, s21 ; 7E360215 v_mov_b32_e32 v26, 0x3f7fff58 ; 7E3402FF 3F7FFF58 v_cmp_lt_f32_e32 vcc, s12, v26 ; 7C02340C s_and_saveexec_b64 s[30:31], vcc ; BE9E246A s_xor_b64 s[30:31], exec, s[30:31] ; 899E1E7E s_cbranch_execz BB0_6 ; BF880000 s_buffer_load_dword s32, s[8:11], 0x3b ; C210093B s_buffer_load_dword s21, s[8:11], 0x3c ; C20A893C s_buffer_load_dword s29, s[8:11], 0x3d ; C20E893D s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_lt_f32_e64 s[32:33], 0, s32 ; D0020020 00004080 s_and_saveexec_b64 s[32:33], s[32:33] ; BEA02420 s_xor_b64 s[32:33], exec, s[32:33] ; 89A0207E s_cbranch_execz BB0_7 ; BF880000 s_buffer_load_dword s34, s[8:11], 0x36 ; C2110936 s_buffer_load_dword s35, s[8:11], 0x38 ; C2118938 s_buffer_load_dword s36, s[8:11], 0x39 ; C2120939 s_buffer_load_dword s37, s[8:11], 0x3a ; C212893A s_buffer_load_dword s38, s[8:11], 0x30 ; C2130930 s_buffer_load_dword s39, s[8:11], 0x31 ; C2138931 s_buffer_load_dword s40, s[8:11], 0x32 ; C2140932 s_buffer_load_dword s41, s[8:11], 0x34 ; C2148934 s_buffer_load_dword s42, s[8:11], 0x35 ; C2150935 v_mul_f32_e32 v26, v23, v23 ; 10342F17 v_mac_f32_e32 v26, v24, v24 ; 3E343118 v_mac_f32_e32 v26, v25, v25 ; 3E343319 v_rsq_clamp_f32_e32 v26, v26 ; 7E34591A s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v28, s34, v22 ; 08382C22 v_mov_b32_e32 v29, s34 ; 7E3A0222 v_sub_f32_e32 v30, s38, v21 ; 083C2A26 v_sub_f32_e32 v31, s39, v17 ; 083E2227 v_add_f32_e32 v29, s40, v29 ; 063A3A28 v_sub_f32_e32 v32, s40, v22 ; 08402C28 v_mad_f32 v33, 0.5, v29, -s37 ; D2820021 80963AF0 v_add_f32_e32 v22, v22, v33 ; 062C4316 v_mul_f32_e32 v23, v26, v23 ; 102E2F1A v_mul_f32_e32 v24, v26, v24 ; 1030311A v_mul_f32_e32 v25, v26, v25 ; 1032331A v_rcp_f32_e32 v26, v23 ; 7E345517 v_rcp_f32_e32 v33, v24 ; 7E425518 v_rcp_f32_e32 v34, v25 ; 7E445519 v_sub_f32_e32 v35, s41, v21 ; 08462A29 v_mov_b32_e32 v36, s41 ; 7E480229 v_add_f32_e32 v36, s38, v36 ; 06484826 v_mul_f32_e32 v30, v26, v30 ; 103C3D1A v_mul_f32_e32 v26, v26, v35 ; 1034471A v_mul_f32_e32 v31, v33, v31 ; 103E3F21 v_mul_f32_e32 v32, v34, v32 ; 10404122 v_mul_f32_e32 v28, v34, v28 ; 10383922 v_mad_f32 v34, 0.5, v36, -s35 ; D2820022 808E48F0 v_add_f32_e32 v21, v21, v34 ; 062A4515 v_sub_f32_e32 v34, s42, v17 ; 0844222A v_mov_b32_e32 v35, s42 ; 7E46022A v_mul_f32_e32 v33, v33, v34 ; 10424521 v_add_f32_e32 v34, s39, v35 ; 06444627 v_cmp_lt_f32_e32 vcc, 0, v23 ; 7C022E80 v_cndmask_b32_e32 v26, v26, v30 ; 00343D1A v_cmp_lt_f32_e32 vcc, 0, v24 ; 7C023080 v_cndmask_b32_e32 v30, v33, v31 ; 003C3F21 v_cmp_lt_f32_e32 vcc, 0, v25 ; 7C023280 v_cndmask_b32_e32 v28, v28, v32 ; 0038411C v_min3_f32 v26, v26, v30, v28 ; D2A2001A 04723D1A v_mad_f32 v28, 0.5, v34, -s36 ; D282001C 809244F0 v_add_f32_e32 v17, v17, v28 ; 06223911 v_mac_f32_e32 v21, v26, v23 ; 3E2A2F1A v_mac_f32_e32 v17, v26, v24 ; 3E22311A v_mac_f32_e32 v22, v26, v25 ; 3E2C331A v_mad_f32 v23, 0.5, -v36, v21 ; D2820017 445648F0 v_mad_f32 v24, 0.5, -v34, v17 ; D2820018 444644F0 v_mad_f32 v25, 0.5, -v29, v22 ; D2820019 445A3AF0 s_or_b64 exec, exec, s[32:33] ; 88FE207E v_sub_f32_e64 v17, 1.0, s0 ; D2080011 000000F2 v_log_f32_e32 v17, v17 ; 7E224F11 s_load_dwordx4 s[32:35], s[4:5], 0x4 ; C0900504 v_mul_legacy_f32_e32 v17, 0x3f400000, v17 ; 0E2222FF 3F400000 v_exp_f32_e32 v17, v17 ; 7E224B11 v_mul_f32_e32 v26, 0x40e00000, v17 ; 103422FF 40E00000 v_cubeid_f32 v31, v23, v24, v25 ; D288001F 04663117 v_cubema_f32 v30, v23, v24, v25 ; D28E001E 04663117 s_load_dwordx8 s[36:43], s[6:7], 0x8 ; C0D20708 v_cubesc_f32 v29, v23, v24, v25 ; D28A001D 04663117 v_cubetc_f32 v28, v23, v24, v25 ; D28C001C 04663117 v_rcp_f32_e64 v17, |v30| ; D3540111 0000011E v_mov_b32_e32 v23, 0x3fc00000 ; 7E2E02FF 3FC00000 v_mad_f32 v24, v17, v28, v23 ; D2820018 045E3911 v_mac_f32_e32 v23, v17, v29 ; 3E2E3B11 v_mov_b32_e32 v25, v31 ; 7E32031F s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[21:24], 15, 0, 0, 0, 0, 0, 0, 0, v[23:26], s[36:43], s[32:35] ; F0900F00 01091517 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v17, v24 ; 7E224F18 v_sub_f32_e64 v24, 1.0, s12 ; D2080018 000018F2 v_mul_legacy_f32_e32 v17, s29, v17 ; 0E22221D v_exp_f32_e32 v17, v17 ; 7E224B11 v_mul_f32_e32 v17, s21, v17 ; 10222215 v_mul_f32_e32 v21, v21, v17 ; 102A2315 v_mul_f32_e32 v22, v22, v17 ; 102C2316 v_mul_f32_e32 v17, v23, v17 ; 10222317 v_mul_f32_e32 v21, v21, v24 ; 102A3115 v_mul_f32_e32 v22, v22, v24 ; 102C3116 v_mul_f32_e32 v17, v17, v24 ; 10223111 v_mac_f32_e32 v21, s12, v16 ; 3E2A200C v_mac_f32_e32 v22, s12, v15 ; 3E2C1E0C v_mac_f32_e32 v17, s12, v13 ; 3E221A0C v_mov_b32_e32 v13, v17 ; 7E1A0311 v_mov_b32_e32 v15, v22 ; 7E1E0316 v_mov_b32_e32 v16, v21 ; 7E200315 s_or_b64 exec, exec, s[30:31] ; 88FE1E7E v_mad_f32 v22, -v27, s15, s15 ; D2820016 203C1F1B v_mov_b32_e32 v17, s14 ; 7E22020E v_mul_f32_e32 v21, v22, v18 ; 102A2516 v_mul_f32_e32 v19, v22, v19 ; 10262716 v_mul_f32_e32 v18, v22, v20 ; 10242916 v_mul_f32_e32 v20, s17, v5 ; 10280A11 v_mac_f32_e32 v20, s16, v6 ; 3E280C10 v_mac_f32_e32 v20, s18, v4 ; 3E280812 v_add_f32_e32 v20, s20, v20 ; 06282814 v_add_f32_e32 v23, v20, v8 ; 062E1114 v_mul_f32_e32 v8, s22, v5 ; 10100A16 v_mac_f32_e32 v8, s19, v6 ; 3E100C13 v_mac_f32_e32 v8, s23, v4 ; 3E100817 v_add_f32_e32 v8, s24, v8 ; 06101018 v_add_f32_e32 v9, v8, v9 ; 06121308 v_mul_f32_e32 v8, s26, v5 ; 10100A1A v_mac_f32_e32 v8, s25, v6 ; 3E100C19 v_mac_f32_e32 v8, s27, v4 ; 3E10081B v_add_f32_e32 v8, s28, v8 ; 0610101C v_add_f32_e32 v10, v8, v10 ; 06141508 s_buffer_load_dword s6, s[8:11], 0x10 ; C2030910 s_buffer_load_dword s5, s[8:11], 0x11 ; C2028911 s_buffer_load_dword s4, s[8:11], 0x12 ; C2020912 s_buffer_load_dword s17, s[8:11], 0x16 ; C2088916 s_buffer_load_dword s14, s[8:11], 0x44 ; C2070944 s_buffer_load_dword s7, s[8:11], 0x45 ; C2038945 s_buffer_load_dword s12, s[8:11], 0x46 ; C2060946 s_buffer_load_dword s15, s[8:11], 0x48 ; C2078948 s_buffer_load_dword s16, s[8:11], 0x49 ; C2080949 s_buffer_load_dword s8, s[8:11], 0x4b ; C204094B v_sub_f32_e64 v20, 1.0, s13 ; D2080014 00001AF2 v_mac_f32_e32 v20, s13, v14 ; 3E281C0D v_mul_f32_e32 v8, s1, v6 ; 10100C01 v_mac_f32_e32 v8, s2, v5 ; 3E100A02 v_mac_f32_e32 v8, s3, v4 ; 3E100803 v_max_f32_e32 v8, 0, v8 ; 20101080 v_mul_f32_e32 v14, v20, v23 ; 101C2F14 v_mul_f32_e32 v9, v20, v9 ; 10121314 v_mul_f32_e32 v10, v20, v10 ; 10141514 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v17, s17, v3 ; 3E220611 v_mul_f32_e32 v3, v20, v16 ; 10062114 v_mul_f32_e32 v15, v20, v15 ; 101E1F14 v_mul_f32_e32 v13, v20, v13 ; 101A1B14 v_sub_f32_e32 v16, 1.0, v22 ; 08202CF2 v_add_f32_e32 v16, s0, v16 ; 06202000 v_sub_f32_e64 v20, 1.0, s0 ; D2080014 000000F2 v_add_f32_e64 v16, 0, v16 clamp ; D2060810 00022080 v_sub_f32_e32 v22, s1, v12 ; 082C1801 v_sub_f32_e32 v23, s2, v11 ; 082E1602 v_mul_f32_e32 v24, v22, v22 ; 10302D16 v_mac_f32_e32 v24, v23, v23 ; 3E302F17 v_sub_f32_e32 v25, s3, v7 ; 08320E03 v_mac_f32_e32 v24, v25, v25 ; 3E303319 v_rsq_clamp_f32_e32 v24, v24 ; 7E305918 v_mul_f32_e32 v22, v24, v22 ; 102C2D18 v_mul_f32_e32 v23, v24, v23 ; 102E2F18 v_mul_f32_e32 v24, v24, v25 ; 10303318 v_mul_f32_e32 v12, v12, v6 ; 10180D0C v_mad_f32 v11, -v11, v5, -v12 ; D282000B A4320B0B v_mul_f32_e32 v6, v22, v6 ; 100C0D16 v_mac_f32_e32 v6, v23, v5 ; 3E0C0B17 v_mul_f32_e32 v5, s1, v22 ; 100A2C01 v_mac_f32_e32 v5, s2, v23 ; 3E0A2E02 v_mad_f32 v7, -v7, v4, v11 ; D2820007 242E0907 v_mac_f32_e32 v5, s3, v24 ; 3E0A3003 v_mac_f32_e32 v6, v24, v4 ; 3E0C0918 v_max_f32_e32 v4, 0, v5 ; 20080A80 v_sub_f32_e32 v5, 1.0, v4 ; 080A08F2 v_mul_f32_e32 v11, v5, v5 ; 10160B05 v_mul_f32_e32 v5, v5, v11 ; 100A1705 v_mul_f32_e32 v5, v5, v11 ; 100A1705 v_max_f32_e32 v7, 0, v7 ; 200E0E80 v_sub_f32_e32 v11, 1.0, v7 ; 08160EF2 v_mul_f32_e32 v12, v11, v11 ; 1018170B v_mul_f32_e32 v22, v11, v12 ; 102C190B v_mad_f32 v23, -v12, v22, 1.0 ; D2820017 23CA2D0C v_mul_f32_e32 v24, v2, v23 ; 10302F02 v_sub_f32_e32 v25, 1.0, v2 ; 083204F2 v_mac_f32_e32 v2, v5, v25 ; 3E043305 v_mul_f32_e32 v25, v1, v23 ; 10322F01 v_sub_f32_e32 v26, 1.0, v1 ; 083402F2 v_mac_f32_e32 v1, v5, v26 ; 3E023505 v_mul_f32_e32 v23, v0, v23 ; 102E2F00 v_sub_f32_e32 v26, 1.0, v0 ; 083400F2 v_mac_f32_e32 v0, v5, v26 ; 3E003505 v_sub_f32_e32 v5, 1.0, v20 ; 080A28F2 v_mov_b32_e32 v26, 0x3cf5c28f ; 7E3402FF 3CF5C28F v_madmk_f32_e32 v5, v5, v26, 0x3f77ced9 ; 400A3505 3F77CED9 v_add_f32_e32 v26, v4, v4 ; 06340904 v_mul_f32_e32 v4, v20, v4 ; 10080914 v_mad_f32 v4, v26, v4, 0.5 ; D2820004 03C2091A v_mul_f32_e32 v12, v22, v12 ; 10181916 v_mac_f32_e32 v24, v16, v12 ; 3E301910 v_mac_f32_e32 v25, v16, v12 ; 3E321910 v_mac_f32_e32 v23, v16, v12 ; 3E2E1910 v_mul_f32_e32 v16, v20, v20 ; 10202914 v_log_f32_e32 v5, v5 ; 7E0A4F05 v_mul_f32_e32 v16, s8, v16 ; 10202008 v_mul_f32_e32 v11, v16, v11 ; 10161710 v_mac_f32_e32 v11, 1.0, v7 ; 3E160EF2 v_rcp_f32_e32 v5, v5 ; 7E0A5505 v_sub_f32_e32 v7, 1.0, v8 ; 080E10F2 v_mul_f32_e32 v16, v16, v7 ; 10200F10 v_mac_f32_e32 v16, 1.0, v8 ; 3E2010F2 v_max_f32_e32 v6, 0, v6 ; 200C0C80 v_log_f32_e32 v6, v6 ; 7E0C4F06 v_madak_f32_e32 v11, v16, v11, 0x38d1b717 ; 42161710 38D1B717 v_mul_f32_e32 v5, 0x41200000, v5 ; 100A0AFF 41200000 v_mul_f32_e32 v16, v5, v5 ; 10200B05 v_mul_legacy_f32_e32 v6, v16, v6 ; 0E0C0D10 v_rcp_f32_e32 v11, v11 ; 7E16550B v_mad_f32 v5, v5, v5, 1.0 ; D2820005 03CA0B05 v_mul_f32_e32 v5, s16, v5 ; 100A0A10 v_exp_f32_e32 v6, v6 ; 7E0C4B06 v_mul_f32_e32 v5, v5, v6 ; 100A0D05 v_mul_f32_e32 v5, v5, v11 ; 100A1705 v_mul_f32_e32 v5, v8, v5 ; 100A0B08 v_mul_f32_e32 v5, s15, v5 ; 100A0A0F v_mul_f32_e32 v6, v7, v7 ; 100C0F07 v_mul_f32_e32 v7, v7, v6 ; 100E0D07 v_mul_f32_e32 v6, v7, v6 ; 100C0D07 v_add_f32_e32 v4, -1.0, v4 ; 060808F3 v_mad_f32 v6, v4, v6, 1.0 ; D2820006 03CA0D04 v_mad_f32 v4, v4, v12, 1.0 ; D2820004 03CA1904 v_mul_f32_e32 v4, v4, v6 ; 10080D04 v_mul_f32_e32 v4, v8, v4 ; 10080908 v_mac_f32_e32 v14, s14, v4 ; 3E1C080E v_mul_f32_e32 v6, v14, v21 ; 100C2B0E v_max_f32_e32 v5, 0, v5 ; 200A0A80 v_mul_f32_e32 v7, s14, v5 ; 100E0A0E v_mac_f32_e32 v6, v2, v7 ; 3E0C0F02 v_mac_f32_e32 v9, s7, v4 ; 3E120807 v_mac_f32_e32 v10, s12, v4 ; 3E14080C v_mul_f32_e32 v2, s7, v5 ; 10040A07 v_mul_f32_e32 v4, s12, v5 ; 10080A0C v_mul_f32_e32 v5, v9, v19 ; 100A2709 v_mul_f32_e32 v7, v10, v18 ; 100E250A v_mac_f32_e32 v5, v1, v2 ; 3E0A0501 v_mac_f32_e32 v7, v0, v4 ; 3E0E0900 v_mac_f32_e32 v6, v24, v3 ; 3E0C0718 v_mac_f32_e32 v5, v25, v15 ; 3E0A1F19 v_mac_f32_e32 v7, v23, v13 ; 3E0E1B17 v_add_f32_e64 v0, 0, v17 clamp ; D2060800 00022280 v_sub_f32_e32 v1, 1.0, v0 ; 080200F2 v_mul_f32_e32 v2, s6, v1 ; 10040206 v_mac_f32_e32 v2, v6, v0 ; 3E040106 v_mul_f32_e32 v3, s5, v1 ; 10060205 v_mac_f32_e32 v3, v5, v0 ; 3E060105 v_mul_f32_e32 v1, s4, v1 ; 10020204 v_mac_f32_e32 v1, v7, v0 ; 3E020107 v_cvt_pkrtz_f16_f32_e32 v0, v2, v3 ; 5E000702 v_cvt_pkrtz_f16_f32_e64 v1, v1, 1.0 ; D25E0001 0001E501 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 48 VGPRS: 40 Code Size: 2272 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL OUT[5], GENERIC[4] DCL OUT[6], GENERIC[5] DCL CONST[0..20] DCL TEMP[0..7], LOCAL IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MUL TEMP[0], CONST[2], IN[0].xxxx 1: MAD TEMP[0], CONST[3], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[4], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0].xyz, CONST[5], IN[0].wwww, TEMP[0] 4: MUL TEMP[1], CONST[17], IN[0].xxxx 5: MAD TEMP[1], CONST[18], IN[0].yyyy, TEMP[1] 6: MAD TEMP[1], CONST[19], IN[0].zzzz, TEMP[1] 7: MAD TEMP[1], CONST[20], IN[0].wwww, TEMP[1] 8: MAD TEMP[2].xy, IN[2].xyyy, CONST[10].xyyy, CONST[10].zwww 9: FSEQ TEMP[3].x, CONST[12].xxxx, IMM[0].xxxx 10: UIF TEMP[3].xxxx :0 11: MOV TEMP[3].xy, IN[2].xyxx 12: ELSE :0 13: MOV TEMP[3].xy, IN[3].xyxx 14: ENDIF 15: MAD TEMP[3].xy, TEMP[3].xyyy, CONST[11].xyyy, CONST[11].zwww 16: MOV TEMP[2].zw, TEMP[3].yyxy 17: MOV TEMP[3].x, CONST[6].xxxx 18: MOV TEMP[3].y, CONST[7].xxxx 19: MOV TEMP[3].z, CONST[8].xxxx 20: MOV TEMP[4].x, CONST[6].yyyy 21: MOV TEMP[4].y, CONST[7].yyyy 22: MOV TEMP[4].z, CONST[8].yyyy 23: MOV TEMP[5].x, CONST[6].zzzz 24: MOV TEMP[5].y, CONST[7].zzzz 25: MOV TEMP[5].z, CONST[8].zzzz 26: MUL TEMP[3].xyz, TEMP[3].xyzz, IN[1].xxxx 27: MAD TEMP[3].xyz, TEMP[4].xyzz, IN[1].yyyy, TEMP[3].xyzz 28: MAD TEMP[3].xyz, TEMP[5].xyzz, IN[1].zzzz, TEMP[3].xyzz 29: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz 30: RSQ TEMP[4].x, TEMP[4].xxxx 31: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx 32: MUL TEMP[4].xyz, CONST[2].xyzz, IN[4].xxxx 33: MAD TEMP[4].xyz, CONST[3].xyzz, IN[4].yyyy, TEMP[4].xyzz 34: MAD TEMP[4].xyz, CONST[4].xyzz, IN[4].zzzz, TEMP[4].xyzz 35: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz 36: RSQ TEMP[5].x, TEMP[5].xxxx 37: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx 38: MUL TEMP[5].xyz, TEMP[3].zxyy, TEMP[4].yzxx 39: MAD TEMP[5].xyz, TEMP[3].yzxx, TEMP[4].zxyy, -TEMP[5].xyzz 40: MUL TEMP[5].xyz, TEMP[5].xyzz, IN[4].wwww 41: MOV TEMP[4].xyz, TEMP[4].xyzx 42: MOV TEMP[5].xyz, TEMP[5].xyzx 43: MOV TEMP[3].xyz, TEMP[3].xyzx 44: MUL TEMP[6].xyz, TEMP[0].xyzz, CONST[1].wwww 45: ADD TEMP[6].xyz, CONST[1].xyzz, -TEMP[6].xyzz 46: MOV TEMP[4].w, TEMP[6].xxxx 47: MOV TEMP[5].w, TEMP[6].yyyy 48: MOV TEMP[3].w, TEMP[6].zzzz 49: MUL TEMP[6], CONST[2], IN[0].xxxx 50: MAD TEMP[6], CONST[3], IN[0].yyyy, TEMP[6] 51: MAD TEMP[6], CONST[4], IN[0].zzzz, TEMP[6] 52: MAD TEMP[6], CONST[5], IN[0].wwww, TEMP[6] 53: ADD TEMP[0].xyz, TEMP[0].xyzz, -CONST[0].xyzz 54: MOV TEMP[0].yzw, TEMP[0].yxyz 55: MUL TEMP[7], CONST[13], TEMP[6].xxxx 56: MAD TEMP[7], CONST[14], TEMP[6].yyyy, TEMP[7] 57: MAD TEMP[7], CONST[15], TEMP[6].zzzz, TEMP[7] 58: MAD TEMP[6].xyz, CONST[16], TEMP[6].wwww, TEMP[7] 59: MOV TEMP[6].xyz, TEMP[6].xyzx 60: MOV TEMP[0].x, TEMP[1].zzzz 61: MOV OUT[1], TEMP[2] 62: MOV OUT[3], TEMP[5] 63: MOV OUT[2], TEMP[4] 64: MOV OUT[4], TEMP[3] 65: MOV OUT[0], TEMP[1] 66: MOV OUT[6], TEMP[6] 67: MOV OUT[5], TEMP[0] 68: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 172) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248) %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256) %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260) %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264) %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272) %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276) %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280) %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284) %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288) %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292) %72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296) %73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300) %74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304) %75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308) %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312) %77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316) %78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320) %79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 324) %80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 328) %81 = call float @llvm.SI.load.const(<16 x i8> %12, i32 332) %82 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %83 = load <16 x i8>, <16 x i8> addrspace(2)* %82, align 16, !tbaa !0 %84 = add i32 %5, %7 %85 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %83, i32 0, i32 %84) %86 = extractelement <4 x float> %85, i32 0 %87 = extractelement <4 x float> %85, i32 1 %88 = extractelement <4 x float> %85, i32 2 %89 = extractelement <4 x float> %85, i32 3 %90 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %91 = load <16 x i8>, <16 x i8> addrspace(2)* %90, align 16, !tbaa !0 %92 = add i32 %5, %7 %93 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %91, i32 0, i32 %92) %94 = extractelement <4 x float> %93, i32 0 %95 = extractelement <4 x float> %93, i32 1 %96 = extractelement <4 x float> %93, i32 2 %97 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %98 = load <16 x i8>, <16 x i8> addrspace(2)* %97, align 16, !tbaa !0 %99 = add i32 %5, %7 %100 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %98, i32 0, i32 %99) %101 = extractelement <4 x float> %100, i32 0 %102 = extractelement <4 x float> %100, i32 1 %103 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %104 = load <16 x i8>, <16 x i8> addrspace(2)* %103, align 16, !tbaa !0 %105 = add i32 %5, %7 %106 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %104, i32 0, i32 %105) %107 = extractelement <4 x float> %106, i32 0 %108 = extractelement <4 x float> %106, i32 1 %109 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4 %110 = load <16 x i8>, <16 x i8> addrspace(2)* %109, align 16, !tbaa !0 %111 = add i32 %5, %7 %112 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %110, i32 0, i32 %111) %113 = extractelement <4 x float> %112, i32 0 %114 = extractelement <4 x float> %112, i32 1 %115 = extractelement <4 x float> %112, i32 2 %116 = extractelement <4 x float> %112, i32 3 %117 = fmul float %20, %86 %118 = fmul float %21, %86 %119 = fmul float %22, %86 %120 = fmul float %24, %87 %121 = fadd float %120, %117 %122 = fmul float %25, %87 %123 = fadd float %122, %118 %124 = fmul float %26, %87 %125 = fadd float %124, %119 %126 = fmul float %28, %88 %127 = fadd float %126, %121 %128 = fmul float %29, %88 %129 = fadd float %128, %123 %130 = fmul float %30, %88 %131 = fadd float %130, %125 %132 = fmul float %32, %89 %133 = fadd float %132, %127 %134 = fmul float %33, %89 %135 = fadd float %134, %129 %136 = fmul float %34, %89 %137 = fadd float %136, %131 %138 = fmul float %66, %86 %139 = fmul float %67, %86 %140 = fmul float %68, %86 %141 = fmul float %69, %86 %142 = fmul float %70, %87 %143 = fadd float %142, %138 %144 = fmul float %71, %87 %145 = fadd float %144, %139 %146 = fmul float %72, %87 %147 = fadd float %146, %140 %148 = fmul float %73, %87 %149 = fadd float %148, %141 %150 = fmul float %74, %88 %151 = fadd float %150, %143 %152 = fmul float %75, %88 %153 = fadd float %152, %145 %154 = fmul float %76, %88 %155 = fadd float %154, %147 %156 = fmul float %77, %88 %157 = fadd float %156, %149 %158 = fmul float %78, %89 %159 = fadd float %158, %151 %160 = fmul float %79, %89 %161 = fadd float %160, %153 %162 = fmul float %80, %89 %163 = fadd float %162, %155 %164 = fmul float %81, %89 %165 = fadd float %164, %157 %166 = fmul float %101, %45 %167 = fadd float %166, %47 %168 = fmul float %102, %46 %169 = fadd float %168, %48 %170 = fcmp oeq float %53, 0.000000e+00 %. = select i1 %170, float %101, float %107 %.32 = select i1 %170, float %102, float %108 %171 = fmul float %., %49 %172 = fadd float %171, %51 %173 = fmul float %.32, %50 %174 = fadd float %173, %52 %175 = fmul float %36, %94 %176 = fmul float %39, %94 %177 = fmul float %42, %94 %178 = fmul float %37, %95 %179 = fadd float %178, %175 %180 = fmul float %40, %95 %181 = fadd float %180, %176 %182 = fmul float %43, %95 %183 = fadd float %182, %177 %184 = fmul float %38, %96 %185 = fadd float %184, %179 %186 = fmul float %41, %96 %187 = fadd float %186, %181 %188 = fmul float %44, %96 %189 = fadd float %188, %183 %190 = fmul float %185, %185 %191 = fmul float %187, %187 %192 = fadd float %191, %190 %193 = fmul float %189, %189 %194 = fadd float %192, %193 %195 = call float @llvm.AMDGPU.rsq.clamped.f32(float %194) %196 = fmul float %185, %195 %197 = fmul float %187, %195 %198 = fmul float %189, %195 %199 = fmul float %20, %113 %200 = fmul float %21, %113 %201 = fmul float %22, %113 %202 = fmul float %24, %114 %203 = fadd float %202, %199 %204 = fmul float %25, %114 %205 = fadd float %204, %200 %206 = fmul float %26, %114 %207 = fadd float %206, %201 %208 = fmul float %28, %115 %209 = fadd float %208, %203 %210 = fmul float %29, %115 %211 = fadd float %210, %205 %212 = fmul float %30, %115 %213 = fadd float %212, %207 %214 = fmul float %209, %209 %215 = fmul float %211, %211 %216 = fadd float %215, %214 %217 = fmul float %213, %213 %218 = fadd float %216, %217 %219 = call float @llvm.AMDGPU.rsq.clamped.f32(float %218) %220 = fmul float %209, %219 %221 = fmul float %211, %219 %222 = fmul float %213, %219 %223 = fmul float %198, %221 %224 = fmul float %196, %222 %225 = fmul float %197, %220 %226 = fmul float %197, %222 %227 = fsub float %226, %223 %228 = fmul float %198, %220 %229 = fsub float %228, %224 %230 = fmul float %196, %221 %231 = fsub float %230, %225 %232 = fmul float %227, %116 %233 = fmul float %229, %116 %234 = fmul float %231, %116 %235 = fmul float %133, %19 %236 = fmul float %135, %19 %237 = fmul float %137, %19 %238 = fsub float %16, %235 %239 = fsub float %17, %236 %240 = fsub float %18, %237 %241 = fmul float %20, %86 %242 = fmul float %21, %86 %243 = fmul float %22, %86 %244 = fmul float %23, %86 %245 = fmul float %24, %87 %246 = fadd float %245, %241 %247 = fmul float %25, %87 %248 = fadd float %247, %242 %249 = fmul float %26, %87 %250 = fadd float %249, %243 %251 = fmul float %27, %87 %252 = fadd float %251, %244 %253 = fmul float %28, %88 %254 = fadd float %253, %246 %255 = fmul float %29, %88 %256 = fadd float %255, %248 %257 = fmul float %30, %88 %258 = fadd float %257, %250 %259 = fmul float %31, %88 %260 = fadd float %259, %252 %261 = fmul float %32, %89 %262 = fadd float %261, %254 %263 = fmul float %33, %89 %264 = fadd float %263, %256 %265 = fmul float %34, %89 %266 = fadd float %265, %258 %267 = fmul float %35, %89 %268 = fadd float %267, %260 %269 = fsub float %133, %13 %270 = fsub float %135, %14 %271 = fsub float %137, %15 %272 = fmul float %54, %262 %273 = fmul float %55, %262 %274 = fmul float %56, %262 %275 = fmul float %57, %264 %276 = fadd float %275, %272 %277 = fmul float %58, %264 %278 = fadd float %277, %273 %279 = fmul float %59, %264 %280 = fadd float %279, %274 %281 = fmul float %60, %266 %282 = fadd float %281, %276 %283 = fmul float %61, %266 %284 = fadd float %283, %278 %285 = fmul float %62, %266 %286 = fadd float %285, %280 %287 = fmul float %63, %268 %288 = fadd float %287, %282 %289 = fmul float %64, %268 %290 = fadd float %289, %284 %291 = fmul float %65, %268 %292 = fadd float %291, %286 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %167, float %169, float %172, float %174) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %220, float %221, float %222, float %238) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %232, float %233, float %234, float %239) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %196, float %197, float %198, float %240) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %163, float %269, float %270, float %271) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %288, float %290, float %292, float %268) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %159, float %161, float %163, float %165) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 s_load_dwordx4 s[20:23], s[8:9], 0xc ; C08A090C v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[8:11], s[8:9], 0x10 ; C0840910 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[8:11], v0, s[16:19], 0 idxen ; E00C2000 80040800 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[10:13], v0, s[20:23], 0 idxen ; E00C2000 80050A00 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[12:15], v0, s[8:11], 0 idxen ; E00C2000 80020C00 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x30 ; C2020130 s_buffer_load_dword s5, s[0:3], 0x2a ; C202812A s_buffer_load_dword s6, s[0:3], 0x28 ; C2030128 s_buffer_load_dword s7, s[0:3], 0x34 ; C2038134 s_buffer_load_dword s8, s[0:3], 0x35 ; C2040135 s_buffer_load_dword s9, s[0:3], 0x36 ; C2048136 s_buffer_load_dword s10, s[0:3], 0x38 ; C2050138 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_eq_f32_e64 vcc, 0, s4 ; D004006A 00000880 v_cndmask_b32_e32 v0, v10, v8 ; 0000110A v_cndmask_b32_e32 v10, v11, v9 ; 0014130B v_mov_b32_e32 v11, s5 ; 7E160205 s_buffer_load_dword s4, s[0:3], 0x29 ; C2020129 s_buffer_load_dword s5, s[0:3], 0x2b ; C202812B v_mac_f32_e32 v11, s6, v8 ; 3E161006 s_buffer_load_dword s6, s[0:3], 0x18 ; C2030118 s_buffer_load_dword s11, s[0:3], 0x19 ; C2058119 s_buffer_load_dword s12, s[0:3], 0x1c ; C206011C s_buffer_load_dword s13, s[0:3], 0x1d ; C206811D s_buffer_load_dword s14, s[0:3], 0x20 ; C2070120 s_buffer_load_dword s15, s[0:3], 0x2c ; C207812C s_buffer_load_dword s16, s[0:3], 0x2d ; C208012D s_buffer_load_dword s17, s[0:3], 0x2e ; C208812E s_buffer_load_dword s18, s[0:3], 0x2f ; C209012F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v8, s5 ; 7E100205 s_buffer_load_dword s5, s[0:3], 0x21 ; C2028121 v_mac_f32_e32 v8, s4, v9 ; 3E101204 v_mul_f32_e32 v9, s6, v5 ; 10120A06 v_mac_f32_e32 v9, s11, v6 ; 3E120C0B s_buffer_load_dword s4, s[0:3], 0x1a ; C202011A s_buffer_load_dword s6, s[0:3], 0x1e ; C203011E v_mul_f32_e32 v16, s12, v5 ; 10200A0C v_mac_f32_e32 v16, s13, v6 ; 3E200C0D v_mul_f32_e32 v5, s14, v5 ; 100A0A0E s_buffer_load_dword s11, s[0:3], 0x22 ; C2058122 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v5, s5, v6 ; 3E0A0C05 s_buffer_load_dword s5, s[0:3], 0x44 ; C2028144 s_buffer_load_dword s12, s[0:3], 0x48 ; C2060148 s_buffer_load_dword s13, s[0:3], 0x45 ; C2068145 s_buffer_load_dword s14, s[0:3], 0x49 ; C2070149 v_mac_f32_e32 v9, s4, v7 ; 3E120E04 s_buffer_load_dword s4, s[0:3], 0x46 ; C2020146 s_buffer_load_dword s19, s[0:3], 0x4a ; C209814A s_buffer_load_dword s20, s[0:3], 0x47 ; C20A0147 v_mac_f32_e32 v16, s6, v7 ; 3E200E06 v_mac_f32_e32 v5, s11, v7 ; 3E0A0E0B s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s5, v1 ; 100C0205 v_mac_f32_e32 v6, s12, v2 ; 3E0C040C v_mul_f32_e32 v7, s13, v1 ; 100E020D v_mac_f32_e32 v7, s14, v2 ; 3E0E040E s_buffer_load_dword s5, s[0:3], 0x4b ; C202814B v_mul_f32_e32 v17, s4, v1 ; 10220204 v_mac_f32_e32 v17, s19, v2 ; 3E220413 v_mul_f32_e32 v18, s20, v1 ; 10240214 s_buffer_load_dword s4, s[0:3], 0xb ; C202010B s_buffer_load_dword s6, s[0:3], 0xf ; C203010F s_buffer_load_dword s11, s[0:3], 0x4c ; C205814C s_buffer_load_dword s12, s[0:3], 0x4d ; C206014D s_buffer_load_dword s13, s[0:3], 0x4e ; C206814E s_buffer_load_dword s14, s[0:3], 0x4f ; C207014F s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v18, s5, v2 ; 3E240405 s_buffer_load_dword s5, s[0:3], 0xc ; C202810C s_buffer_load_dword s19, s[0:3], 0xd ; C209810D s_buffer_load_dword s20, s[0:3], 0xe ; C20A010E v_mul_f32_e32 v19, s4, v1 ; 10260204 s_buffer_load_dword s4, s[0:3], 0x13 ; C2020113 v_mac_f32_e32 v19, s6, v2 ; 3E260406 v_mac_f32_e32 v6, s11, v3 ; 3E0C060B s_buffer_load_dword s6, s[0:3], 0x8 ; C2030108 v_mac_f32_e32 v7, s12, v3 ; 3E0E060C v_mac_f32_e32 v17, s13, v3 ; 3E22060D s_buffer_load_dword s11, s[0:3], 0x10 ; C2058110 s_buffer_load_dword s12, s[0:3], 0x14 ; C2060114 v_mac_f32_e32 v18, s14, v3 ; 3E24060E s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v19, s4, v3 ; 3E260604 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A v_mul_f32_e32 v20, s6, v1 ; 10280206 v_mac_f32_e32 v20, s5, v2 ; 3E280405 s_buffer_load_dword s21, s[0:3], 0x12 ; C20A8112 v_mac_f32_e32 v20, s11, v3 ; 3E28060B v_mac_f32_e32 v20, s12, v4 ; 3E28080C s_buffer_load_dword s12, s[0:3], 0x15 ; C2060115 v_mul_f32_e32 v21, s13, v1 ; 102A020D v_mac_f32_e32 v21, s19, v2 ; 3E2A0413 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v21, s4, v3 ; 3E2A0604 s_buffer_load_dword s22, s[0:3], 0x16 ; C20B0116 s_buffer_load_dword s23, s[0:3], 0x50 ; C20B8150 s_buffer_load_dword s24, s[0:3], 0x51 ; C20C0151 s_buffer_load_dword s25, s[0:3], 0x52 ; C20C8152 s_buffer_load_dword s26, s[0:3], 0x53 ; C20D0153 s_buffer_load_dword s27, s[0:3], 0x17 ; C20D8117 v_mac_f32_e32 v21, s12, v4 ; 3E2A080C v_mul_f32_e32 v1, s14, v1 ; 1002020E v_mac_f32_e32 v1, s20, v2 ; 3E020414 v_mac_f32_e32 v1, s21, v3 ; 3E020615 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v1, s22, v4 ; 3E020816 v_mac_f32_e32 v6, s23, v4 ; 3E0C0817 v_mac_f32_e32 v7, s24, v4 ; 3E0E0818 v_mac_f32_e32 v17, s25, v4 ; 3E220819 v_mac_f32_e32 v18, s26, v4 ; 3E24081A v_mac_f32_e32 v19, s27, v4 ; 3E26081B v_mov_b32_e32 v2, s17 ; 7E040211 v_mac_f32_e32 v2, s15, v0 ; 3E04000F v_mov_b32_e32 v0, s18 ; 7E000212 v_mac_f32_e32 v0, s16, v10 ; 3E001410 exp 15, 32, 0, 0, 0, v11, v8, v2, v0 ; F800020F 0002080B s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s6, v12 ; 10001806 v_mac_f32_e32 v0, s5, v13 ; 3E001A05 v_mul_f32_e32 v2, s13, v12 ; 1004180D v_mac_f32_e32 v2, s19, v13 ; 3E041A13 v_mul_f32_e32 v3, s14, v12 ; 1006180E v_mac_f32_e32 v3, s20, v13 ; 3E061A14 v_mac_f32_e32 v0, s11, v14 ; 3E001C0B v_mac_f32_e32 v2, s4, v14 ; 3E041C04 v_mac_f32_e32 v3, s21, v14 ; 3E061C15 v_mul_f32_e32 v4, v9, v9 ; 10081309 v_mac_f32_e32 v4, v16, v16 ; 3E082110 v_mul_f32_e32 v8, v0, v0 ; 10100100 v_mac_f32_e32 v8, v2, v2 ; 3E100502 v_mac_f32_e32 v4, v5, v5 ; 3E080B05 v_rsq_clamp_f32_e32 v4, v4 ; 7E085904 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_buffer_load_dword s5, s[0:3], 0x7 ; C2028107 v_mac_f32_e32 v8, v3, v3 ; 3E100703 v_rsq_clamp_f32_e32 v8, v8 ; 7E105908 v_mul_f32_e32 v9, v4, v9 ; 10121304 v_mul_f32_e32 v10, v4, v16 ; 10142104 v_mul_f32_e32 v4, v4, v5 ; 10080B04 v_mul_f32_e32 v0, v8, v0 ; 10000108 v_mul_f32_e32 v2, v8, v2 ; 10040508 v_mul_f32_e32 v3, v8, v3 ; 10060708 s_buffer_load_dword s6, s[0:3], 0x5 ; C2030105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v5, s4 ; 7E0A0204 v_mad_f32 v5, -v20, s5, v5 ; D2820005 24140B14 exp 15, 33, 0, 0, 0, v0, v2, v3, v5 ; F800021F 05030200 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v5, v2, v4 ; 100A0902 v_mad_f32 v5, v10, v3, -v5 ; D2820005 8416070A v_mul_f32_e32 v3, v3, v9 ; 10061303 v_mad_f32 v3, v4, v0, -v3 ; D2820003 840E0104 v_mul_f32_e32 v0, v0, v10 ; 10001500 v_mad_f32 v0, v9, v2, -v0 ; D2820000 84020509 v_mul_f32_e32 v2, v15, v5 ; 10040B0F v_mul_f32_e32 v3, v15, v3 ; 1006070F v_mul_f32_e32 v0, v15, v0 ; 1000010F v_mov_b32_e32 v5, s6 ; 7E0A0206 v_mad_f32 v5, -v21, s5, v5 ; D2820005 24140B15 exp 15, 34, 0, 0, 0, v2, v3, v0, v5 ; F800022F 05000302 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_buffer_load_dword s6, s[0:3], 0x0 ; C2030100 s_buffer_load_dword s11, s[0:3], 0x1 ; C2058101 s_buffer_load_dword s12, s[0:3], 0x2 ; C2060102 s_buffer_load_dword s13, s[0:3], 0x40 ; C2068140 s_buffer_load_dword s14, s[0:3], 0x41 ; C2070141 s_buffer_load_dword s15, s[0:3], 0x42 ; C2078142 s_buffer_load_dword s16, s[0:3], 0x39 ; C2080139 s_buffer_load_dword s17, s[0:3], 0x3a ; C208813A s_buffer_load_dword s18, s[0:3], 0x3c ; C209013C s_buffer_load_dword s19, s[0:3], 0x3d ; C209813D s_buffer_load_dword s0, s[0:3], 0x3e ; C200013E s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mov_b32_e32 v0, s4 ; 7E000204 v_mad_f32 v0, -v1, s5, v0 ; D2820000 24000B01 v_subrev_f32_e32 v2, s6, v20 ; 0A042806 v_mul_f32_e32 v3, s7, v20 ; 10062807 v_mul_f32_e32 v5, s8, v20 ; 100A2808 v_mul_f32_e32 v8, s9, v20 ; 10102809 v_mac_f32_e32 v3, s10, v21 ; 3E062A0A v_mac_f32_e32 v5, s16, v21 ; 3E0A2A10 v_mac_f32_e32 v8, s17, v21 ; 3E102A11 v_subrev_f32_e32 v11, s11, v21 ; 0A162A0B v_mac_f32_e32 v3, s18, v1 ; 3E060212 v_mac_f32_e32 v5, s19, v1 ; 3E0A0213 v_mac_f32_e32 v8, s0, v1 ; 3E100200 v_subrev_f32_e32 v1, s12, v1 ; 0A02020C v_mac_f32_e32 v3, s13, v19 ; 3E06260D v_mac_f32_e32 v5, s14, v19 ; 3E0A260E v_mac_f32_e32 v8, s15, v19 ; 3E10260F exp 15, 35, 0, 0, 0, v9, v10, v4, v0 ; F800023F 00040A09 exp 15, 36, 0, 0, 0, v17, v2, v11, v1 ; F800024F 010B0211 exp 15, 37, 0, 0, 0, v3, v5, v8, v19 ; F800025F 13080503 exp 15, 12, 0, 1, 0, v6, v7, v17, v18 ; F80008CF 12110706 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 24 Code Size: 920 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL IN[4], GENERIC[4], PERSPECTIVE DCL IN[5], GENERIC[5], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL CONST[0..4] DCL CONST[6..7] DCL TEMP[0..13], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 10.0000, 0.9680} IMM[1] FLT32 { 0.0300, 2.0000, 0.5000, 0.0001} IMM[2] FLT32 { -1.0000, 0.0000, 0.0000, 0.0000} 0: DP3 TEMP[0].x, IN[3].xyzz, IN[3].xyzz 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MUL TEMP[0].xyz, IN[3].xyzz, TEMP[0].xxxx 3: MOV TEMP[1].xy, IN[0].xyyy 4: TEX TEMP[1].xyz, TEMP[1], SAMP[0], 2D 5: MUL TEMP[1].xyz, CONST[4].xyzz, TEMP[1].xyzz 6: LRP TEMP[2].xyz, CONST[6].xxxx, TEMP[1].xyzz, CONST[1].xyzz 7: MOV TEMP[3].x, IN[1].wwww 8: MOV TEMP[3].y, IN[2].wwww 9: MOV TEMP[3].z, IN[3].wwww 10: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz 11: RSQ TEMP[4].x, TEMP[4].xxxx 12: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx 13: DP3 TEMP[4].x, TEMP[0].xyzz, TEMP[3].xyzz 14: MAX TEMP[4].x, IMM[0].xxxx, TEMP[4].xxxx 15: DP3 TEMP[5].x, IN[5].xyzz, IN[5].xyzz 16: MOV TEMP[5].xy, TEMP[5].xxxx 17: TEX TEMP[5].w, TEMP[5], SAMP[1], 2D 18: MUL TEMP[5].xyz, CONST[2].xyzz, TEMP[5].wwww 19: DP3 TEMP[6].x, IN[4].yzww, IN[4].yzww 20: RSQ TEMP[6].x, TEMP[6].xxxx 21: MUL TEMP[6].xyz, IN[4].yzww, TEMP[6].xxxx 22: MOV TEMP[6].xyz, -TEMP[6].xyzx 23: ADD TEMP[7].x, IMM[0].yyyy, -CONST[7].xxxx 24: ADD TEMP[8].xyz, TEMP[3].xyzz, TEMP[6].xyzz 25: DP3 TEMP[9].x, TEMP[8].xyzz, TEMP[8].xyzz 26: RSQ TEMP[9].x, TEMP[9].xxxx 27: MUL TEMP[8].xyz, TEMP[8].xyzz, TEMP[9].xxxx 28: DP3 TEMP[6].x, TEMP[0].xyzz, TEMP[6].xyzz 29: MAX TEMP[6].x, IMM[0].xxxx, TEMP[6].xxxx 30: DP3 TEMP[3].x, TEMP[3].xyzz, TEMP[8].xyzz 31: MAX TEMP[3].x, IMM[0].xxxx, TEMP[3].xxxx 32: MUL TEMP[9].x, TEMP[7].xxxx, TEMP[7].xxxx 33: MUL TEMP[9].x, TEMP[9].xxxx, CONST[3].wwww 34: ADD TEMP[10].x, IMM[0].yyyy, -TEMP[7].xxxx 35: MAD TEMP[10].x, TEMP[10].xxxx, IMM[0].wwww, IMM[1].xxxx 36: LG2 TEMP[10].x, TEMP[10].xxxx 37: RCP TEMP[10].x, TEMP[10].xxxx 38: MUL TEMP[10].x, IMM[0].zzzz, TEMP[10].xxxx 39: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[10].xxxx 40: ADD TEMP[11].x, IMM[0].yyyy, -TEMP[4].xxxx 41: ADD TEMP[12].x, IMM[0].yyyy, -TEMP[6].xxxx 42: MUL TEMP[13].x, IMM[1].yyyy, TEMP[3].xxxx 43: MUL TEMP[7].x, TEMP[3].xxxx, TEMP[7].xxxx 44: MAD TEMP[7].x, TEMP[13].xxxx, TEMP[7].xxxx, IMM[1].zzzz 45: ADD TEMP[3].x, IMM[0].yyyy, -TEMP[3].xxxx 46: LRP TEMP[13].x, TEMP[4].xxxx, IMM[0].yyyy, TEMP[9].xxxx 47: LRP TEMP[6].x, TEMP[6].xxxx, IMM[0].yyyy, TEMP[9].xxxx 48: MAD TEMP[6].x, TEMP[13].xxxx, TEMP[6].xxxx, IMM[1].wwww 49: RCP TEMP[6].x, TEMP[6].xxxx 50: DP3 TEMP[8].x, TEMP[0].xyzz, TEMP[8].xyzz 51: MAX TEMP[8].x, IMM[0].xxxx, TEMP[8].xxxx 52: POW TEMP[8].x, TEMP[8].xxxx, TEMP[10].xxxx 53: ADD TEMP[9].x, TEMP[10].xxxx, IMM[0].yyyy 54: MUL TEMP[9].x, TEMP[9].xxxx, CONST[3].yyyy 55: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[9].xxxx 56: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[8].xxxx 57: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[4].xxxx 58: MUL TEMP[6].x, TEMP[6].xxxx, CONST[3].xxxx 59: MAX TEMP[6].x, IMM[0].xxxx, TEMP[6].xxxx 60: MUL TEMP[6].xyz, TEMP[6].xxxx, TEMP[5].xyzz 61: ADD TEMP[8].xyz, IMM[0].yyyy, -TEMP[2].xyzz 62: MUL TEMP[9].x, TEMP[3].xxxx, TEMP[3].xxxx 63: MUL TEMP[10].x, TEMP[3].xxxx, TEMP[3].xxxx 64: MUL TEMP[3].x, TEMP[10].xxxx, TEMP[3].xxxx 65: MUL TEMP[3].x, TEMP[9].xxxx, TEMP[3].xxxx 66: MAD TEMP[2].xyz, TEMP[8].xyzz, TEMP[3].xxxx, TEMP[2].xyzz 67: MUL TEMP[3].x, CONST[6].xxxx, CONST[1].wwww 68: ADD TEMP[3].x, CONST[1].wwww, -TEMP[3].xxxx 69: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xxxx 70: ADD TEMP[3].x, TEMP[7].xxxx, IMM[2].xxxx 71: MUL TEMP[8].x, TEMP[11].xxxx, TEMP[11].xxxx 72: MUL TEMP[9].x, TEMP[11].xxxx, TEMP[11].xxxx 73: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[11].xxxx 74: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[9].xxxx 75: MAD TEMP[3].x, TEMP[3].xxxx, TEMP[8].xxxx, IMM[0].yyyy 76: ADD TEMP[7].x, TEMP[7].xxxx, IMM[2].xxxx 77: MUL TEMP[8].x, TEMP[12].xxxx, TEMP[12].xxxx 78: MUL TEMP[9].x, TEMP[12].xxxx, TEMP[12].xxxx 79: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[12].xxxx 80: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[9].xxxx 81: MAD TEMP[7].x, TEMP[7].xxxx, TEMP[8].xxxx, IMM[0].yyyy 82: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[7].xxxx 83: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[4].xxxx 84: MUL TEMP[3].xyz, TEMP[5].xyzz, TEMP[3].xxxx 85: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xyzz 86: MAD TEMP[0].xyz, TEMP[6].xyzz, TEMP[2].xyzz, TEMP[1].xyzz 87: MAD TEMP[1].x, IN[4].xxxx, CONST[0].zzzz, CONST[0].wwww 88: MOV_SAT TEMP[1].x, TEMP[1].xxxx 89: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xxxx 90: MOV TEMP[0].xyz, TEMP[0].xyzx 91: MOV TEMP[0].w, IMM[0].yyyy 92: MOV OUT[0], TEMP[0] 93: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %41 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %42 = load <32 x i8>, <32 x i8> addrspace(2)* %41, align 32, !tbaa !0 %43 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0 %45 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %46 = bitcast <8 x i32> addrspace(2)* %45 to <32 x i8> addrspace(2)* %47 = load <32 x i8>, <32 x i8> addrspace(2)* %46, align 32, !tbaa !0 %48 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %49 = bitcast <4 x i32> addrspace(2)* %48 to <16 x i8> addrspace(2)* %50 = load <16 x i8>, <16 x i8> addrspace(2)* %49, align 16, !tbaa !0 %51 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %52 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %53 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %54 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %55 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %56 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %57 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %58 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7) %59 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %60 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %61 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %62 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7) %63 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7) %64 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %65 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7) %66 = fmul float %55, %55 %67 = fmul float %56, %56 %68 = fadd float %67, %66 %69 = fmul float %57, %57 %70 = fadd float %68, %69 %71 = call float @llvm.AMDGPU.rsq.clamped.f32(float %70) %72 = fmul float %55, %71 %73 = fmul float %56, %71 %74 = fmul float %57, %71 %75 = bitcast float %51 to i32 %76 = bitcast float %52 to i32 %77 = insertelement <2 x i32> undef, i32 %75, i32 0 %78 = insertelement <2 x i32> %77, i32 %76, i32 1 %79 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %78, <32 x i8> %42, <16 x i8> %44, i32 2) %80 = extractelement <4 x float> %79, i32 0 %81 = extractelement <4 x float> %79, i32 1 %82 = extractelement <4 x float> %79, i32 2 %83 = fmul float %36, %80 %84 = fmul float %37, %81 %85 = fmul float %38, %82 %86 = call float @llvm.AMDGPU.lrp(float %39, float %83, float %26) %87 = call float @llvm.AMDGPU.lrp(float %39, float %84, float %27) %88 = call float @llvm.AMDGPU.lrp(float %39, float %85, float %28) %89 = fmul float %53, %53 %90 = fmul float %54, %54 %91 = fadd float %90, %89 %92 = fmul float %58, %58 %93 = fadd float %91, %92 %94 = call float @llvm.AMDGPU.rsq.clamped.f32(float %93) %95 = fmul float %53, %94 %96 = fmul float %54, %94 %97 = fmul float %58, %94 %98 = fmul float %72, %95 %99 = fmul float %73, %96 %100 = fadd float %99, %98 %101 = fmul float %74, %97 %102 = fadd float %100, %101 %103 = call float @llvm.maxnum.f32(float %102, float 0.000000e+00) %104 = fmul float %63, %63 %105 = fmul float %64, %64 %106 = fadd float %105, %104 %107 = fmul float %65, %65 %108 = fadd float %106, %107 %109 = bitcast float %108 to i32 %110 = bitcast float %108 to i32 %111 = insertelement <2 x i32> undef, i32 %109, i32 0 %112 = insertelement <2 x i32> %111, i32 %110, i32 1 %113 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %112, <32 x i8> %47, <16 x i8> %50, i32 2) %114 = extractelement <4 x float> %113, i32 3 %115 = fmul float %30, %114 %116 = fmul float %31, %114 %117 = fmul float %32, %114 %118 = fmul float %60, %60 %119 = fmul float %61, %61 %120 = fadd float %119, %118 %121 = fmul float %62, %62 %122 = fadd float %120, %121 %123 = call float @llvm.AMDGPU.rsq.clamped.f32(float %122) %124 = fmul float %60, %123 %125 = fmul float %61, %123 %126 = fmul float %62, %123 %127 = fsub float 1.000000e+00, %40 %128 = fsub float %95, %124 %129 = fsub float %96, %125 %130 = fsub float %97, %126 %131 = fmul float %128, %128 %132 = fmul float %129, %129 %133 = fadd float %132, %131 %134 = fmul float %130, %130 %135 = fadd float %133, %134 %136 = call float @llvm.AMDGPU.rsq.clamped.f32(float %135) %137 = fmul float %128, %136 %138 = fmul float %129, %136 %139 = fmul float %130, %136 %140 = fmul float %124, %72 %141 = fsub float -0.000000e+00, %140 %142 = fmul float %125, %73 %143 = fsub float %141, %142 %144 = fmul float %126, %74 %145 = fsub float %143, %144 %146 = call float @llvm.maxnum.f32(float %145, float 0.000000e+00) %147 = fmul float %95, %137 %148 = fmul float %96, %138 %149 = fadd float %148, %147 %150 = fmul float %97, %139 %151 = fadd float %149, %150 %152 = call float @llvm.maxnum.f32(float %151, float 0.000000e+00) %153 = fmul float %127, %127 %154 = fmul float %153, %35 %155 = fsub float 1.000000e+00, %127 %156 = fmul float %155, 0x3FEEF9DB20000000 %157 = fadd float %156, 0x3F9EB851E0000000 %158 = call float @llvm.log2.f32(float %157) %159 = fdiv float 1.000000e+00, %158 %160 = fmul float %159, 1.000000e+01 %161 = fmul float %160, %160 %162 = fsub float 1.000000e+00, %103 %163 = fsub float 1.000000e+00, %146 %164 = fmul float %152, 2.000000e+00 %165 = fmul float %152, %127 %166 = fmul float %164, %165 %167 = fadd float %166, 5.000000e-01 %168 = fsub float 1.000000e+00, %152 %169 = call float @llvm.AMDGPU.lrp(float %103, float 1.000000e+00, float %154) %170 = call float @llvm.AMDGPU.lrp(float %146, float 1.000000e+00, float %154) %171 = fmul float %169, %170 %172 = fadd float %171, 0x3F1A36E2E0000000 %173 = fdiv float 1.000000e+00, %172 %174 = fmul float %72, %137 %175 = fmul float %73, %138 %176 = fadd float %175, %174 %177 = fmul float %74, %139 %178 = fadd float %176, %177 %179 = call float @llvm.maxnum.f32(float %178, float 0.000000e+00) %180 = call float @llvm.pow.f32(float %179, float %161) %181 = fadd float %161, 1.000000e+00 %182 = fmul float %181, %34 %183 = fmul float %180, %182 %184 = fmul float %173, %183 %185 = fmul float %184, %103 %186 = fmul float %185, %33 %187 = call float @llvm.maxnum.f32(float %186, float 0.000000e+00) %188 = fmul float %187, %115 %189 = fmul float %187, %116 %190 = fmul float %187, %117 %191 = fsub float 1.000000e+00, %86 %192 = fsub float 1.000000e+00, %87 %193 = fsub float 1.000000e+00, %88 %194 = fmul float %168, %168 %195 = fmul float %168, %168 %196 = fmul float %195, %168 %197 = fmul float %194, %196 %198 = fmul float %191, %197 %199 = fadd float %198, %86 %200 = fmul float %192, %197 %201 = fadd float %200, %87 %202 = fmul float %193, %197 %203 = fadd float %202, %88 %204 = fmul float %39, %29 %205 = fsub float %29, %204 %206 = fmul float %83, %205 %207 = fmul float %84, %205 %208 = fmul float %85, %205 %209 = fadd float %167, -1.000000e+00 %210 = fmul float %162, %162 %211 = fmul float %162, %162 %212 = fmul float %211, %162 %213 = fmul float %210, %212 %214 = fmul float %209, %213 %215 = fadd float %214, 1.000000e+00 %216 = fadd float %167, -1.000000e+00 %217 = fmul float %163, %163 %218 = fmul float %163, %163 %219 = fmul float %218, %163 %220 = fmul float %217, %219 %221 = fmul float %216, %220 %222 = fadd float %221, 1.000000e+00 %223 = fmul float %215, %222 %224 = fmul float %223, %103 %225 = fmul float %115, %224 %226 = fmul float %116, %224 %227 = fmul float %117, %224 %228 = fmul float %206, %225 %229 = fmul float %207, %226 %230 = fmul float %208, %227 %231 = fmul float %188, %199 %232 = fadd float %231, %228 %233 = fmul float %189, %201 %234 = fadd float %233, %229 %235 = fmul float %190, %203 %236 = fadd float %235, %230 %237 = fmul float %59, %24 %238 = fadd float %237, %25 %239 = call float @llvm.AMDIL.clamp.(float %238, float 0.000000e+00, float 1.000000e+00) %240 = fmul float %232, %239 %241 = fmul float %234, %239 %242 = fmul float %236, %239 %243 = call i32 @llvm.SI.packf16(float %240, float %241) %244 = bitcast i32 %243 to float %245 = call i32 @llvm.SI.packf16(float %242, float 1.000000e+00) %246 = bitcast i32 %245 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %244, float %246, float %244, float %246) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 3, 1, [m0] ; C8100700 v_interp_p2_f32 v4, [v4], v1, 3, 1, [m0] ; C8110701 v_interp_p1_f32 v5, v0, 3, 2, [m0] ; C8140B00 v_interp_p2_f32 v5, [v5], v1, 3, 2, [m0] ; C8150B01 v_interp_p1_f32 v6, v0, 0, 3, [m0] ; C8180C00 v_interp_p2_f32 v6, [v6], v1, 0, 3, [m0] ; C8190C01 v_interp_p1_f32 v7, v0, 1, 3, [m0] ; C81C0D00 v_interp_p2_f32 v7, [v7], v1, 1, 3, [m0] ; C81D0D01 v_interp_p1_f32 v8, v0, 2, 3, [m0] ; C8200E00 v_interp_p2_f32 v8, [v8], v1, 2, 3, [m0] ; C8210E01 v_interp_p1_f32 v9, v0, 3, 3, [m0] ; C8240F00 v_interp_p2_f32 v9, [v9], v1, 3, 3, [m0] ; C8250F01 v_interp_p1_f32 v10, v0, 0, 4, [m0] ; C8281000 v_interp_p2_f32 v10, [v10], v1, 0, 4, [m0] ; C8291001 v_interp_p1_f32 v11, v0, 1, 4, [m0] ; C82C1100 v_interp_p2_f32 v11, [v11], v1, 1, 4, [m0] ; C82D1101 v_interp_p1_f32 v12, v0, 2, 4, [m0] ; C8301200 v_interp_p2_f32 v12, [v12], v1, 2, 4, [m0] ; C8311201 v_interp_p1_f32 v13, v0, 3, 4, [m0] ; C8341300 v_interp_p2_f32 v13, [v13], v1, 3, 4, [m0] ; C8351301 v_interp_p1_f32 v14, v0, 0, 5, [m0] ; C8381400 v_interp_p2_f32 v14, [v14], v1, 0, 5, [m0] ; C8391401 s_load_dwordx4 s[20:23], s[4:5], 0x0 ; C08A0500 s_load_dwordx4 s[8:11], s[4:5], 0x4 ; C0840504 s_load_dwordx8 s[24:31], s[6:7], 0x0 ; C0CC0700 s_load_dwordx8 s[12:19], s[6:7], 0x8 ; C0C60708 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_interp_p1_f32 v15, v0, 1, 5, [m0] ; C83C1500 v_interp_p2_f32 v15, [v15], v1, 1, 5, [m0] ; C83D1501 v_interp_p1_f32 v0, v0, 2, 5, [m0] ; C8001600 v_interp_p2_f32 v0, [v0], v1, 2, 5, [m0] ; C8011601 v_mul_f32_e32 v16, v14, v14 ; 10201D0E v_mac_f32_e32 v16, v15, v15 ; 3E201F0F v_mac_f32_e32 v16, v0, v0 ; 3E200100 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:2], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[24:31], s[20:23] ; F0800700 00A60002 v_mov_b32_e32 v17, v16 ; 7E220310 image_sample v3, 8, 0, 0, 0, 0, 0, 0, 0, v[16:17], s[12:19], s[8:11] ; F0800800 00430310 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_buffer_load_dword s5, s[0:3], 0x3 ; C2028103 s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104 s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105 s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106 s_buffer_load_dword s9, s[0:3], 0xd ; C204810D s_buffer_load_dword s10, s[0:3], 0xf ; C205010F s_buffer_load_dword s11, s[0:3], 0x10 ; C2058110 s_buffer_load_dword s12, s[0:3], 0x11 ; C2060111 s_buffer_load_dword s13, s[0:3], 0x12 ; C2068112 s_buffer_load_dword s14, s[0:3], 0x7 ; C2070107 s_buffer_load_dword s15, s[0:3], 0x8 ; C2078108 s_buffer_load_dword s16, s[0:3], 0x9 ; C2080109 s_buffer_load_dword s17, s[0:3], 0xa ; C208810A s_buffer_load_dword s18, s[0:3], 0xc ; C209010C s_buffer_load_dword s19, s[0:3], 0x18 ; C2098118 s_buffer_load_dword s0, s[0:3], 0x1c ; C200011C s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v0, s11, v0 ; 1000000B v_mul_f32_e32 v1, s12, v1 ; 1002020C v_mul_f32_e32 v2, s13, v2 ; 1004040D v_mul_f32_e32 v14, v6, v6 ; 101C0D06 v_mac_f32_e32 v14, v7, v7 ; 3E1C0F07 v_mac_f32_e32 v14, v8, v8 ; 3E1C1108 v_rsq_clamp_f32_e32 v14, v14 ; 7E1C590E v_mul_f32_e32 v15, v11, v11 ; 101E170B v_mac_f32_e32 v15, v12, v12 ; 3E1E190C v_mac_f32_e32 v15, v13, v13 ; 3E1E1B0D v_rsq_clamp_f32_e32 v15, v15 ; 7E1E590F v_mul_f32_e32 v16, v4, v4 ; 10200904 v_mac_f32_e32 v16, v5, v5 ; 3E200B05 v_mac_f32_e32 v16, v9, v9 ; 3E201309 v_rsq_clamp_f32_e32 v16, v16 ; 7E205910 v_mul_f32_e32 v11, v15, v11 ; 1016170F v_mul_f32_e32 v12, v15, v12 ; 1018190F v_mul_f32_e32 v13, v15, v13 ; 101A1B0F v_mul_f32_e32 v6, v14, v6 ; 100C0D0E v_mul_f32_e32 v7, v14, v7 ; 100E0F0E v_mul_f32_e32 v15, v16, v4 ; 101E0910 v_mad_f32 v4, v4, v16, -v11 ; D2820004 842E2104 v_mul_f32_e32 v11, v6, v11 ; 10161706 v_mad_f32 v11, -v12, v7, -v11 ; D282000B A42E0F0C v_mad_f32 v12, v5, v16, -v12 ; D282000C 84322105 v_mul_f32_e32 v17, v4, v4 ; 10220904 v_mac_f32_e32 v17, v12, v12 ; 3E22190C v_mad_f32 v18, v9, v16, -v13 ; D2820012 84362109 v_mac_f32_e32 v17, v18, v18 ; 3E222512 v_rsq_clamp_f32_e32 v17, v17 ; 7E225911 v_mul_f32_e32 v5, v16, v5 ; 100A0B10 v_mul_f32_e32 v19, v15, v6 ; 10260D0F v_mac_f32_e32 v19, v5, v7 ; 3E260F05 v_mul_f32_e32 v4, v17, v4 ; 10080911 v_mul_f32_e32 v12, v17, v12 ; 10181911 v_mul_f32_e32 v15, v4, v15 ; 101E1F04 v_mac_f32_e32 v15, v12, v5 ; 3E1E0B0C v_mul_f32_e32 v4, v4, v6 ; 10080D04 v_mac_f32_e32 v4, v12, v7 ; 3E080F0C v_mul_f32_e32 v5, v14, v8 ; 100A110E v_mul_f32_e32 v6, v16, v9 ; 100C1310 v_mul_f32_e32 v7, v17, v18 ; 100E2511 v_mad_f32 v8, -v13, v5, v11 ; D2820008 242E0B0D v_mac_f32_e32 v19, v6, v5 ; 3E260B06 v_mac_f32_e32 v15, v7, v6 ; 3E1E0D07 v_mac_f32_e32 v4, v7, v5 ; 3E080B07 v_max_f32_e32 v5, 0, v15 ; 200A1E80 v_sub_f32_e32 v6, 1.0, v5 ; 080C0AF2 v_mul_f32_e32 v7, v6, v6 ; 100E0D06 v_mul_f32_e32 v6, v6, v7 ; 100C0F06 v_mul_f32_e32 v6, v6, v7 ; 100C0F06 v_sub_f32_e64 v7, 1.0, s19 ; D2080007 000026F2 v_mul_f32_e32 v9, s6, v7 ; 10120E06 v_mad_f32 v11, -v7, s6, 1.0 ; D282000B 23C80D07 v_mul_f32_e32 v12, s7, v7 ; 10180E07 v_mad_f32 v13, -v7, s7, 1.0 ; D282000D 23C80F07 v_mul_f32_e32 v14, s8, v7 ; 101C0E08 v_mad_f32 v7, -v7, s8, 1.0 ; D2820007 23C81107 v_mac_f32_e32 v9, s19, v0 ; 3E120013 v_mad_f32 v11, -s19, v0, v11 ; D282000B 242E0013 v_mac_f32_e32 v9, v6, v11 ; 3E121706 v_mac_f32_e32 v12, s19, v1 ; 3E180213 v_mad_f32 v11, -s19, v1, v13 ; D282000B 24360213 v_mac_f32_e32 v12, v6, v11 ; 3E181706 v_mac_f32_e32 v14, s19, v2 ; 3E1C0413 v_mad_f32 v7, -s19, v2, v7 ; D2820007 241E0413 v_mac_f32_e32 v14, v6, v7 ; 3E1C0F06 v_sub_f32_e64 v6, 1.0, s0 ; D2080006 000000F2 v_mul_f32_e32 v7, v6, v6 ; 100E0D06 v_mul_f32_e32 v7, s10, v7 ; 100E0E0A v_sub_f32_e32 v11, 1.0, v6 ; 08160CF2 v_mov_b32_e32 v13, 0x3cf5c28f ; 7E1A02FF 3CF5C28F v_madmk_f32_e32 v11, v11, v13, 0x3f77ced9 ; 40161B0B 3F77CED9 v_mul_f32_e32 v6, v6, v5 ; 100C0B06 v_add_f32_e32 v5, v5, v5 ; 060A0B05 v_mad_f32 v5, v5, v6, 0.5 ; D2820005 03C20D05 v_max_f32_e32 v6, 0, v8 ; 200C1080 v_log_f32_e32 v8, v11 ; 7E104F0B v_sub_f32_e32 v11, 1.0, v6 ; 08160CF2 v_mul_f32_e32 v13, v7, v11 ; 101A1707 v_mac_f32_e32 v13, 1.0, v6 ; 3E1A0CF2 v_max_f32_e32 v6, 0, v19 ; 200C2680 v_rcp_f32_e32 v8, v8 ; 7E105508 v_sub_f32_e32 v15, 1.0, v6 ; 081E0CF2 v_mul_f32_e32 v7, v7, v15 ; 100E1F07 v_mac_f32_e32 v7, 1.0, v6 ; 3E0E0CF2 v_max_f32_e32 v4, 0, v4 ; 20080880 v_log_f32_e32 v4, v4 ; 7E084F04 v_madak_f32_e32 v7, v7, v13, 0x38d1b717 ; 420E1B07 38D1B717 v_mul_f32_e32 v8, 0x41200000, v8 ; 101010FF 41200000 v_mul_f32_e32 v13, v8, v8 ; 101A1108 v_mul_legacy_f32_e32 v4, v13, v4 ; 0E08090D v_rcp_f32_e32 v7, v7 ; 7E0E5507 v_mad_f32 v8, v8, v8, 1.0 ; D2820008 03CA1108 v_mul_f32_e32 v8, s9, v8 ; 10101009 v_exp_f32_e32 v4, v4 ; 7E084B04 v_mul_f32_e32 v4, v8, v4 ; 10080908 v_mul_f32_e32 v4, v4, v7 ; 10080F04 v_mul_f32_e32 v7, v15, v15 ; 100E1F0F v_mul_f32_e32 v8, v15, v7 ; 10100F0F v_mul_f32_e32 v7, v8, v7 ; 100E0F08 v_mul_f32_e32 v8, v11, v11 ; 1010170B v_mul_f32_e32 v11, v11, v8 ; 1016110B v_mul_f32_e32 v8, v11, v8 ; 1010110B v_add_f32_e32 v5, -1.0, v5 ; 060A0AF3 v_mad_f32 v7, v5, v7, 1.0 ; D2820007 03CA0F05 v_mad_f32 v5, v5, v8, 1.0 ; D2820005 03CA1105 v_mov_b32_e32 v8, s19 ; 7E100213 v_mad_f32 v8, -v8, s14, s14 ; D2820008 20381D08 v_mul_f32_e32 v5, v5, v7 ; 100A0F05 v_mul_f32_e32 v7, s15, v3 ; 100E060F v_mul_f32_e32 v4, v6, v4 ; 10080906 v_mul_f32_e32 v0, v8, v0 ; 10000108 v_mul_f32_e32 v5, v6, v5 ; 100A0B06 v_mul_f32_e32 v6, v5, v7 ; 100C0F05 v_mul_f32_e32 v0, v6, v0 ; 10000106 v_mul_f32_e32 v4, s18, v4 ; 10080812 v_max_f32_e32 v4, 0, v4 ; 20080880 v_mul_f32_e32 v6, v7, v4 ; 100C0907 v_mac_f32_e32 v0, v9, v6 ; 3E000D09 v_mul_f32_e32 v6, s16, v3 ; 100C0610 v_mul_f32_e32 v1, v8, v1 ; 10020308 v_mul_f32_e32 v7, v5, v6 ; 100E0D05 v_mul_f32_e32 v1, v7, v1 ; 10020307 v_mul_f32_e32 v6, v6, v4 ; 100C0906 v_mac_f32_e32 v1, v12, v6 ; 3E020D0C v_mul_f32_e32 v2, v8, v2 ; 10040508 v_mul_f32_e32 v3, s17, v3 ; 10060611 v_mul_f32_e32 v4, v3, v4 ; 10080903 v_mul_f32_e32 v3, v5, v3 ; 10060705 v_mul_f32_e32 v2, v3, v2 ; 10040503 v_mac_f32_e32 v2, v14, v4 ; 3E04090E v_mov_b32_e32 v3, s5 ; 7E060205 v_mac_f32_e32 v3, s4, v10 ; 3E061404 v_add_f32_e64 v3, 0, v3 clamp ; D2060803 00020680 v_mul_f32_e32 v0, v3, v0 ; 10000103 v_mul_f32_e32 v1, v3, v1 ; 10020303 v_mul_f32_e32 v2, v3, v2 ; 10040503 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e64 v1, v2, 1.0 ; D25E0001 0001E502 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 20 Code Size: 924 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL OUT[5], GENERIC[4] DCL OUT[6], GENERIC[5] DCL CONST[0..20] DCL TEMP[0..7], LOCAL IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MUL TEMP[0], CONST[2], IN[0].xxxx 1: MAD TEMP[0], CONST[3], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[4], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0].xyz, CONST[5], IN[0].wwww, TEMP[0] 4: MUL TEMP[1], CONST[17], IN[0].xxxx 5: MAD TEMP[1], CONST[18], IN[0].yyyy, TEMP[1] 6: MAD TEMP[1], CONST[19], IN[0].zzzz, TEMP[1] 7: MAD TEMP[1], CONST[20], IN[0].wwww, TEMP[1] 8: MAD TEMP[2].xy, IN[2].xyyy, CONST[10].xyyy, CONST[10].zwww 9: FSEQ TEMP[3].x, CONST[12].xxxx, IMM[0].xxxx 10: UIF TEMP[3].xxxx :0 11: MOV TEMP[3].xy, IN[2].xyxx 12: ELSE :0 13: MOV TEMP[3].xy, IN[3].xyxx 14: ENDIF 15: MAD TEMP[3].xy, TEMP[3].xyyy, CONST[11].xyyy, CONST[11].zwww 16: MOV TEMP[2].zw, TEMP[3].yyxy 17: MOV TEMP[3].x, CONST[6].xxxx 18: MOV TEMP[3].y, CONST[7].xxxx 19: MOV TEMP[3].z, CONST[8].xxxx 20: MOV TEMP[4].x, CONST[6].yyyy 21: MOV TEMP[4].y, CONST[7].yyyy 22: MOV TEMP[4].z, CONST[8].yyyy 23: MOV TEMP[5].x, CONST[6].zzzz 24: MOV TEMP[5].y, CONST[7].zzzz 25: MOV TEMP[5].z, CONST[8].zzzz 26: MUL TEMP[3].xyz, TEMP[3].xyzz, IN[1].xxxx 27: MAD TEMP[3].xyz, TEMP[4].xyzz, IN[1].yyyy, TEMP[3].xyzz 28: MAD TEMP[3].xyz, TEMP[5].xyzz, IN[1].zzzz, TEMP[3].xyzz 29: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz 30: RSQ TEMP[4].x, TEMP[4].xxxx 31: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx 32: MUL TEMP[4].xyz, CONST[2].xyzz, IN[4].xxxx 33: MAD TEMP[4].xyz, CONST[3].xyzz, IN[4].yyyy, TEMP[4].xyzz 34: MAD TEMP[4].xyz, CONST[4].xyzz, IN[4].zzzz, TEMP[4].xyzz 35: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz 36: RSQ TEMP[5].x, TEMP[5].xxxx 37: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx 38: MUL TEMP[5].xyz, TEMP[3].zxyy, TEMP[4].yzxx 39: MAD TEMP[5].xyz, TEMP[3].yzxx, TEMP[4].zxyy, -TEMP[5].xyzz 40: MUL TEMP[5].xyz, TEMP[5].xyzz, IN[4].wwww 41: MOV TEMP[4].xyz, TEMP[4].xyzx 42: MOV TEMP[5].xyz, TEMP[5].xyzx 43: MOV TEMP[3].xyz, TEMP[3].xyzx 44: MUL TEMP[6].xyz, TEMP[0].xyzz, CONST[1].wwww 45: ADD TEMP[6].xyz, CONST[1].xyzz, -TEMP[6].xyzz 46: MOV TEMP[4].w, TEMP[6].xxxx 47: MOV TEMP[5].w, TEMP[6].yyyy 48: MOV TEMP[3].w, TEMP[6].zzzz 49: MUL TEMP[6], CONST[2], IN[0].xxxx 50: MAD TEMP[6], CONST[3], IN[0].yyyy, TEMP[6] 51: MAD TEMP[6], CONST[4], IN[0].zzzz, TEMP[6] 52: MAD TEMP[6], CONST[5], IN[0].wwww, TEMP[6] 53: ADD TEMP[0].xyz, TEMP[0].xyzz, -CONST[0].xyzz 54: MOV TEMP[0].yzw, TEMP[0].yxyz 55: MUL TEMP[7], CONST[13], TEMP[6].xxxx 56: MAD TEMP[7], CONST[14], TEMP[6].yyyy, TEMP[7] 57: MAD TEMP[7], CONST[15], TEMP[6].zzzz, TEMP[7] 58: MAD TEMP[6].xyz, CONST[16], TEMP[6].wwww, TEMP[7] 59: MOV TEMP[6].xyz, TEMP[6].xyzx 60: MOV TEMP[0].x, TEMP[1].zzzz 61: MOV OUT[1], TEMP[2] 62: MOV OUT[3], TEMP[5] 63: MOV OUT[2], TEMP[4] 64: MOV OUT[4], TEMP[3] 65: MOV OUT[0], TEMP[1] 66: MOV OUT[6], TEMP[6] 67: MOV OUT[5], TEMP[0] 68: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 172) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248) %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256) %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260) %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264) %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272) %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276) %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280) %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284) %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288) %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292) %72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296) %73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300) %74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304) %75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308) %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312) %77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316) %78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320) %79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 324) %80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 328) %81 = call float @llvm.SI.load.const(<16 x i8> %12, i32 332) %82 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %83 = load <16 x i8>, <16 x i8> addrspace(2)* %82, align 16, !tbaa !0 %84 = add i32 %5, %7 %85 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %83, i32 0, i32 %84) %86 = extractelement <4 x float> %85, i32 0 %87 = extractelement <4 x float> %85, i32 1 %88 = extractelement <4 x float> %85, i32 2 %89 = extractelement <4 x float> %85, i32 3 %90 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %91 = load <16 x i8>, <16 x i8> addrspace(2)* %90, align 16, !tbaa !0 %92 = add i32 %5, %7 %93 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %91, i32 0, i32 %92) %94 = extractelement <4 x float> %93, i32 0 %95 = extractelement <4 x float> %93, i32 1 %96 = extractelement <4 x float> %93, i32 2 %97 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %98 = load <16 x i8>, <16 x i8> addrspace(2)* %97, align 16, !tbaa !0 %99 = add i32 %5, %7 %100 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %98, i32 0, i32 %99) %101 = extractelement <4 x float> %100, i32 0 %102 = extractelement <4 x float> %100, i32 1 %103 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %104 = load <16 x i8>, <16 x i8> addrspace(2)* %103, align 16, !tbaa !0 %105 = add i32 %5, %7 %106 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %104, i32 0, i32 %105) %107 = extractelement <4 x float> %106, i32 0 %108 = extractelement <4 x float> %106, i32 1 %109 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4 %110 = load <16 x i8>, <16 x i8> addrspace(2)* %109, align 16, !tbaa !0 %111 = add i32 %5, %7 %112 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %110, i32 0, i32 %111) %113 = extractelement <4 x float> %112, i32 0 %114 = extractelement <4 x float> %112, i32 1 %115 = extractelement <4 x float> %112, i32 2 %116 = extractelement <4 x float> %112, i32 3 %117 = fmul float %20, %86 %118 = fmul float %21, %86 %119 = fmul float %22, %86 %120 = fmul float %24, %87 %121 = fadd float %120, %117 %122 = fmul float %25, %87 %123 = fadd float %122, %118 %124 = fmul float %26, %87 %125 = fadd float %124, %119 %126 = fmul float %28, %88 %127 = fadd float %126, %121 %128 = fmul float %29, %88 %129 = fadd float %128, %123 %130 = fmul float %30, %88 %131 = fadd float %130, %125 %132 = fmul float %32, %89 %133 = fadd float %132, %127 %134 = fmul float %33, %89 %135 = fadd float %134, %129 %136 = fmul float %34, %89 %137 = fadd float %136, %131 %138 = fmul float %66, %86 %139 = fmul float %67, %86 %140 = fmul float %68, %86 %141 = fmul float %69, %86 %142 = fmul float %70, %87 %143 = fadd float %142, %138 %144 = fmul float %71, %87 %145 = fadd float %144, %139 %146 = fmul float %72, %87 %147 = fadd float %146, %140 %148 = fmul float %73, %87 %149 = fadd float %148, %141 %150 = fmul float %74, %88 %151 = fadd float %150, %143 %152 = fmul float %75, %88 %153 = fadd float %152, %145 %154 = fmul float %76, %88 %155 = fadd float %154, %147 %156 = fmul float %77, %88 %157 = fadd float %156, %149 %158 = fmul float %78, %89 %159 = fadd float %158, %151 %160 = fmul float %79, %89 %161 = fadd float %160, %153 %162 = fmul float %80, %89 %163 = fadd float %162, %155 %164 = fmul float %81, %89 %165 = fadd float %164, %157 %166 = fmul float %101, %45 %167 = fadd float %166, %47 %168 = fmul float %102, %46 %169 = fadd float %168, %48 %170 = fcmp oeq float %53, 0.000000e+00 %. = select i1 %170, float %101, float %107 %.32 = select i1 %170, float %102, float %108 %171 = fmul float %., %49 %172 = fadd float %171, %51 %173 = fmul float %.32, %50 %174 = fadd float %173, %52 %175 = fmul float %36, %94 %176 = fmul float %39, %94 %177 = fmul float %42, %94 %178 = fmul float %37, %95 %179 = fadd float %178, %175 %180 = fmul float %40, %95 %181 = fadd float %180, %176 %182 = fmul float %43, %95 %183 = fadd float %182, %177 %184 = fmul float %38, %96 %185 = fadd float %184, %179 %186 = fmul float %41, %96 %187 = fadd float %186, %181 %188 = fmul float %44, %96 %189 = fadd float %188, %183 %190 = fmul float %185, %185 %191 = fmul float %187, %187 %192 = fadd float %191, %190 %193 = fmul float %189, %189 %194 = fadd float %192, %193 %195 = call float @llvm.AMDGPU.rsq.clamped.f32(float %194) %196 = fmul float %185, %195 %197 = fmul float %187, %195 %198 = fmul float %189, %195 %199 = fmul float %20, %113 %200 = fmul float %21, %113 %201 = fmul float %22, %113 %202 = fmul float %24, %114 %203 = fadd float %202, %199 %204 = fmul float %25, %114 %205 = fadd float %204, %200 %206 = fmul float %26, %114 %207 = fadd float %206, %201 %208 = fmul float %28, %115 %209 = fadd float %208, %203 %210 = fmul float %29, %115 %211 = fadd float %210, %205 %212 = fmul float %30, %115 %213 = fadd float %212, %207 %214 = fmul float %209, %209 %215 = fmul float %211, %211 %216 = fadd float %215, %214 %217 = fmul float %213, %213 %218 = fadd float %216, %217 %219 = call float @llvm.AMDGPU.rsq.clamped.f32(float %218) %220 = fmul float %209, %219 %221 = fmul float %211, %219 %222 = fmul float %213, %219 %223 = fmul float %198, %221 %224 = fmul float %196, %222 %225 = fmul float %197, %220 %226 = fmul float %197, %222 %227 = fsub float %226, %223 %228 = fmul float %198, %220 %229 = fsub float %228, %224 %230 = fmul float %196, %221 %231 = fsub float %230, %225 %232 = fmul float %227, %116 %233 = fmul float %229, %116 %234 = fmul float %231, %116 %235 = fmul float %133, %19 %236 = fmul float %135, %19 %237 = fmul float %137, %19 %238 = fsub float %16, %235 %239 = fsub float %17, %236 %240 = fsub float %18, %237 %241 = fmul float %20, %86 %242 = fmul float %21, %86 %243 = fmul float %22, %86 %244 = fmul float %23, %86 %245 = fmul float %24, %87 %246 = fadd float %245, %241 %247 = fmul float %25, %87 %248 = fadd float %247, %242 %249 = fmul float %26, %87 %250 = fadd float %249, %243 %251 = fmul float %27, %87 %252 = fadd float %251, %244 %253 = fmul float %28, %88 %254 = fadd float %253, %246 %255 = fmul float %29, %88 %256 = fadd float %255, %248 %257 = fmul float %30, %88 %258 = fadd float %257, %250 %259 = fmul float %31, %88 %260 = fadd float %259, %252 %261 = fmul float %32, %89 %262 = fadd float %261, %254 %263 = fmul float %33, %89 %264 = fadd float %263, %256 %265 = fmul float %34, %89 %266 = fadd float %265, %258 %267 = fmul float %35, %89 %268 = fadd float %267, %260 %269 = fsub float %133, %13 %270 = fsub float %135, %14 %271 = fsub float %137, %15 %272 = fmul float %54, %262 %273 = fmul float %55, %262 %274 = fmul float %56, %262 %275 = fmul float %57, %264 %276 = fadd float %275, %272 %277 = fmul float %58, %264 %278 = fadd float %277, %273 %279 = fmul float %59, %264 %280 = fadd float %279, %274 %281 = fmul float %60, %266 %282 = fadd float %281, %276 %283 = fmul float %61, %266 %284 = fadd float %283, %278 %285 = fmul float %62, %266 %286 = fadd float %285, %280 %287 = fmul float %63, %268 %288 = fadd float %287, %282 %289 = fmul float %64, %268 %290 = fadd float %289, %284 %291 = fmul float %65, %268 %292 = fadd float %291, %286 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %167, float %169, float %172, float %174) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %220, float %221, float %222, float %238) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %232, float %233, float %234, float %239) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %196, float %197, float %198, float %240) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %163, float %269, float %270, float %271) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %288, float %290, float %292, float %268) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %159, float %161, float %163, float %165) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 s_load_dwordx4 s[20:23], s[8:9], 0xc ; C08A090C v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[8:11], s[8:9], 0x10 ; C0840910 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[8:11], v0, s[16:19], 0 idxen ; E00C2000 80040800 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[10:13], v0, s[20:23], 0 idxen ; E00C2000 80050A00 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[12:15], v0, s[8:11], 0 idxen ; E00C2000 80020C00 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x30 ; C2020130 s_buffer_load_dword s5, s[0:3], 0x2a ; C202812A s_buffer_load_dword s6, s[0:3], 0x28 ; C2030128 s_buffer_load_dword s7, s[0:3], 0x34 ; C2038134 s_buffer_load_dword s8, s[0:3], 0x35 ; C2040135 s_buffer_load_dword s9, s[0:3], 0x36 ; C2048136 s_buffer_load_dword s10, s[0:3], 0x38 ; C2050138 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_eq_f32_e64 vcc, 0, s4 ; D004006A 00000880 v_cndmask_b32_e32 v0, v10, v8 ; 0000110A v_cndmask_b32_e32 v10, v11, v9 ; 0014130B v_mov_b32_e32 v11, s5 ; 7E160205 s_buffer_load_dword s4, s[0:3], 0x29 ; C2020129 s_buffer_load_dword s5, s[0:3], 0x2b ; C202812B v_mac_f32_e32 v11, s6, v8 ; 3E161006 s_buffer_load_dword s6, s[0:3], 0x18 ; C2030118 s_buffer_load_dword s11, s[0:3], 0x19 ; C2058119 s_buffer_load_dword s12, s[0:3], 0x1c ; C206011C s_buffer_load_dword s13, s[0:3], 0x1d ; C206811D s_buffer_load_dword s14, s[0:3], 0x20 ; C2070120 s_buffer_load_dword s15, s[0:3], 0x2c ; C207812C s_buffer_load_dword s16, s[0:3], 0x2d ; C208012D s_buffer_load_dword s17, s[0:3], 0x2e ; C208812E s_buffer_load_dword s18, s[0:3], 0x2f ; C209012F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v8, s5 ; 7E100205 s_buffer_load_dword s5, s[0:3], 0x21 ; C2028121 v_mac_f32_e32 v8, s4, v9 ; 3E101204 v_mul_f32_e32 v9, s6, v5 ; 10120A06 v_mac_f32_e32 v9, s11, v6 ; 3E120C0B s_buffer_load_dword s4, s[0:3], 0x1a ; C202011A s_buffer_load_dword s6, s[0:3], 0x1e ; C203011E v_mul_f32_e32 v16, s12, v5 ; 10200A0C v_mac_f32_e32 v16, s13, v6 ; 3E200C0D v_mul_f32_e32 v5, s14, v5 ; 100A0A0E s_buffer_load_dword s11, s[0:3], 0x22 ; C2058122 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v5, s5, v6 ; 3E0A0C05 s_buffer_load_dword s5, s[0:3], 0x44 ; C2028144 s_buffer_load_dword s12, s[0:3], 0x48 ; C2060148 s_buffer_load_dword s13, s[0:3], 0x45 ; C2068145 s_buffer_load_dword s14, s[0:3], 0x49 ; C2070149 v_mac_f32_e32 v9, s4, v7 ; 3E120E04 s_buffer_load_dword s4, s[0:3], 0x46 ; C2020146 s_buffer_load_dword s19, s[0:3], 0x4a ; C209814A s_buffer_load_dword s20, s[0:3], 0x47 ; C20A0147 v_mac_f32_e32 v16, s6, v7 ; 3E200E06 v_mac_f32_e32 v5, s11, v7 ; 3E0A0E0B s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s5, v1 ; 100C0205 v_mac_f32_e32 v6, s12, v2 ; 3E0C040C v_mul_f32_e32 v7, s13, v1 ; 100E020D v_mac_f32_e32 v7, s14, v2 ; 3E0E040E s_buffer_load_dword s5, s[0:3], 0x4b ; C202814B v_mul_f32_e32 v17, s4, v1 ; 10220204 v_mac_f32_e32 v17, s19, v2 ; 3E220413 v_mul_f32_e32 v18, s20, v1 ; 10240214 s_buffer_load_dword s4, s[0:3], 0xb ; C202010B s_buffer_load_dword s6, s[0:3], 0xf ; C203010F s_buffer_load_dword s11, s[0:3], 0x4c ; C205814C s_buffer_load_dword s12, s[0:3], 0x4d ; C206014D s_buffer_load_dword s13, s[0:3], 0x4e ; C206814E s_buffer_load_dword s14, s[0:3], 0x4f ; C207014F s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v18, s5, v2 ; 3E240405 s_buffer_load_dword s5, s[0:3], 0xc ; C202810C s_buffer_load_dword s19, s[0:3], 0xd ; C209810D s_buffer_load_dword s20, s[0:3], 0xe ; C20A010E v_mul_f32_e32 v19, s4, v1 ; 10260204 s_buffer_load_dword s4, s[0:3], 0x13 ; C2020113 v_mac_f32_e32 v19, s6, v2 ; 3E260406 v_mac_f32_e32 v6, s11, v3 ; 3E0C060B s_buffer_load_dword s6, s[0:3], 0x8 ; C2030108 v_mac_f32_e32 v7, s12, v3 ; 3E0E060C v_mac_f32_e32 v17, s13, v3 ; 3E22060D s_buffer_load_dword s11, s[0:3], 0x10 ; C2058110 s_buffer_load_dword s12, s[0:3], 0x14 ; C2060114 v_mac_f32_e32 v18, s14, v3 ; 3E24060E s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v19, s4, v3 ; 3E260604 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A v_mul_f32_e32 v20, s6, v1 ; 10280206 v_mac_f32_e32 v20, s5, v2 ; 3E280405 s_buffer_load_dword s21, s[0:3], 0x12 ; C20A8112 v_mac_f32_e32 v20, s11, v3 ; 3E28060B v_mac_f32_e32 v20, s12, v4 ; 3E28080C s_buffer_load_dword s12, s[0:3], 0x15 ; C2060115 v_mul_f32_e32 v21, s13, v1 ; 102A020D v_mac_f32_e32 v21, s19, v2 ; 3E2A0413 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v21, s4, v3 ; 3E2A0604 s_buffer_load_dword s22, s[0:3], 0x16 ; C20B0116 s_buffer_load_dword s23, s[0:3], 0x50 ; C20B8150 s_buffer_load_dword s24, s[0:3], 0x51 ; C20C0151 s_buffer_load_dword s25, s[0:3], 0x52 ; C20C8152 s_buffer_load_dword s26, s[0:3], 0x53 ; C20D0153 s_buffer_load_dword s27, s[0:3], 0x17 ; C20D8117 v_mac_f32_e32 v21, s12, v4 ; 3E2A080C v_mul_f32_e32 v1, s14, v1 ; 1002020E v_mac_f32_e32 v1, s20, v2 ; 3E020414 v_mac_f32_e32 v1, s21, v3 ; 3E020615 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v1, s22, v4 ; 3E020816 v_mac_f32_e32 v6, s23, v4 ; 3E0C0817 v_mac_f32_e32 v7, s24, v4 ; 3E0E0818 v_mac_f32_e32 v17, s25, v4 ; 3E220819 v_mac_f32_e32 v18, s26, v4 ; 3E24081A v_mac_f32_e32 v19, s27, v4 ; 3E26081B v_mov_b32_e32 v2, s17 ; 7E040211 v_mac_f32_e32 v2, s15, v0 ; 3E04000F v_mov_b32_e32 v0, s18 ; 7E000212 v_mac_f32_e32 v0, s16, v10 ; 3E001410 exp 15, 32, 0, 0, 0, v11, v8, v2, v0 ; F800020F 0002080B s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s6, v12 ; 10001806 v_mac_f32_e32 v0, s5, v13 ; 3E001A05 v_mul_f32_e32 v2, s13, v12 ; 1004180D v_mac_f32_e32 v2, s19, v13 ; 3E041A13 v_mul_f32_e32 v3, s14, v12 ; 1006180E v_mac_f32_e32 v3, s20, v13 ; 3E061A14 v_mac_f32_e32 v0, s11, v14 ; 3E001C0B v_mac_f32_e32 v2, s4, v14 ; 3E041C04 v_mac_f32_e32 v3, s21, v14 ; 3E061C15 v_mul_f32_e32 v4, v9, v9 ; 10081309 v_mac_f32_e32 v4, v16, v16 ; 3E082110 v_mul_f32_e32 v8, v0, v0 ; 10100100 v_mac_f32_e32 v8, v2, v2 ; 3E100502 v_mac_f32_e32 v4, v5, v5 ; 3E080B05 v_rsq_clamp_f32_e32 v4, v4 ; 7E085904 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_buffer_load_dword s5, s[0:3], 0x7 ; C2028107 v_mac_f32_e32 v8, v3, v3 ; 3E100703 v_rsq_clamp_f32_e32 v8, v8 ; 7E105908 v_mul_f32_e32 v9, v4, v9 ; 10121304 v_mul_f32_e32 v10, v4, v16 ; 10142104 v_mul_f32_e32 v4, v4, v5 ; 10080B04 v_mul_f32_e32 v0, v8, v0 ; 10000108 v_mul_f32_e32 v2, v8, v2 ; 10040508 v_mul_f32_e32 v3, v8, v3 ; 10060708 s_buffer_load_dword s6, s[0:3], 0x5 ; C2030105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v5, s4 ; 7E0A0204 v_mad_f32 v5, -v20, s5, v5 ; D2820005 24140B14 exp 15, 33, 0, 0, 0, v0, v2, v3, v5 ; F800021F 05030200 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v5, v2, v4 ; 100A0902 v_mad_f32 v5, v10, v3, -v5 ; D2820005 8416070A v_mul_f32_e32 v3, v3, v9 ; 10061303 v_mad_f32 v3, v4, v0, -v3 ; D2820003 840E0104 v_mul_f32_e32 v0, v0, v10 ; 10001500 v_mad_f32 v0, v9, v2, -v0 ; D2820000 84020509 v_mul_f32_e32 v2, v15, v5 ; 10040B0F v_mul_f32_e32 v3, v15, v3 ; 1006070F v_mul_f32_e32 v0, v15, v0 ; 1000010F v_mov_b32_e32 v5, s6 ; 7E0A0206 v_mad_f32 v5, -v21, s5, v5 ; D2820005 24140B15 exp 15, 34, 0, 0, 0, v2, v3, v0, v5 ; F800022F 05000302 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_buffer_load_dword s6, s[0:3], 0x0 ; C2030100 s_buffer_load_dword s11, s[0:3], 0x1 ; C2058101 s_buffer_load_dword s12, s[0:3], 0x2 ; C2060102 s_buffer_load_dword s13, s[0:3], 0x40 ; C2068140 s_buffer_load_dword s14, s[0:3], 0x41 ; C2070141 s_buffer_load_dword s15, s[0:3], 0x42 ; C2078142 s_buffer_load_dword s16, s[0:3], 0x39 ; C2080139 s_buffer_load_dword s17, s[0:3], 0x3a ; C208813A s_buffer_load_dword s18, s[0:3], 0x3c ; C209013C s_buffer_load_dword s19, s[0:3], 0x3d ; C209813D s_buffer_load_dword s0, s[0:3], 0x3e ; C200013E s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mov_b32_e32 v0, s4 ; 7E000204 v_mad_f32 v0, -v1, s5, v0 ; D2820000 24000B01 v_subrev_f32_e32 v2, s6, v20 ; 0A042806 v_mul_f32_e32 v3, s7, v20 ; 10062807 v_mul_f32_e32 v5, s8, v20 ; 100A2808 v_mul_f32_e32 v8, s9, v20 ; 10102809 v_mac_f32_e32 v3, s10, v21 ; 3E062A0A v_mac_f32_e32 v5, s16, v21 ; 3E0A2A10 v_mac_f32_e32 v8, s17, v21 ; 3E102A11 v_subrev_f32_e32 v11, s11, v21 ; 0A162A0B v_mac_f32_e32 v3, s18, v1 ; 3E060212 v_mac_f32_e32 v5, s19, v1 ; 3E0A0213 v_mac_f32_e32 v8, s0, v1 ; 3E100200 v_subrev_f32_e32 v1, s12, v1 ; 0A02020C v_mac_f32_e32 v3, s13, v19 ; 3E06260D v_mac_f32_e32 v5, s14, v19 ; 3E0A260E v_mac_f32_e32 v8, s15, v19 ; 3E10260F exp 15, 35, 0, 0, 0, v9, v10, v4, v0 ; F800023F 00040A09 exp 15, 36, 0, 0, 0, v17, v2, v11, v1 ; F800024F 010B0211 exp 15, 37, 0, 0, 0, v3, v5, v8, v19 ; F800025F 13080503 exp 15, 12, 0, 1, 0, v6, v7, v17, v18 ; F80008CF 12110706 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 24 Code Size: 920 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL IN[4], GENERIC[4], PERSPECTIVE DCL IN[5], GENERIC[5], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL CONST[0..4] DCL CONST[7..9] DCL TEMP[0..13], LOCAL IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 0.0000} IMM[1] FLT32 { 10.0000, 0.9680, 0.0300, 0.5000} IMM[2] FLT32 { 0.0001, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].x, IN[1].xxxx 1: MOV TEMP[0].y, IN[2].xxxx 2: MOV TEMP[0].z, IN[3].xxxx 3: MOV TEMP[1].x, IN[1].yyyy 4: MOV TEMP[1].y, IN[2].yyyy 5: MOV TEMP[1].z, IN[3].yyyy 6: MOV TEMP[2].x, IN[1].zzzz 7: MOV TEMP[2].y, IN[2].zzzz 8: MOV TEMP[2].z, IN[3].zzzz 9: MOV TEMP[3].xy, IN[0].xyyy 10: TEX TEMP[3].yw, TEMP[3], SAMP[1], 2D 11: MAD TEMP[3].xy, TEMP[3].wyyy, IMM[0].xxxx, IMM[0].yyyy 12: MUL TEMP[3].xy, TEMP[3].xyyy, CONST[7].xxxx 13: DP2 TEMP[4].x, TEMP[3].xyyy, TEMP[3].xyyy 14: MOV_SAT TEMP[4].x, TEMP[4].xxxx 15: ADD TEMP[4].x, IMM[0].zzzz, -TEMP[4].xxxx 16: SQRT TEMP[4].x, TEMP[4].xxxx 17: MOV TEMP[3].z, TEMP[4].xxxx 18: DP3 TEMP[0].x, TEMP[3].xyzz, TEMP[0].xyzz 19: DP3 TEMP[1].x, TEMP[3].xyzz, TEMP[1].xyzz 20: MOV TEMP[0].y, TEMP[1].xxxx 21: DP3 TEMP[1].x, TEMP[3].xyzz, TEMP[2].xyzz 22: MOV TEMP[0].z, TEMP[1].xxxx 23: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[0].xyzz 24: RSQ TEMP[1].x, TEMP[1].xxxx 25: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xxxx 26: MOV TEMP[1].xy, IN[0].xyyy 27: TEX TEMP[1].xyz, TEMP[1], SAMP[0], 2D 28: MUL TEMP[1].xyz, CONST[4].xyzz, TEMP[1].xyzz 29: LRP TEMP[2].xyz, CONST[8].xxxx, TEMP[1].xyzz, CONST[1].xyzz 30: MOV TEMP[3].x, IN[1].wwww 31: MOV TEMP[3].y, IN[2].wwww 32: MOV TEMP[3].z, IN[3].wwww 33: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz 34: RSQ TEMP[4].x, TEMP[4].xxxx 35: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx 36: DP3 TEMP[4].x, TEMP[0].xyzz, TEMP[3].xyzz 37: MAX TEMP[4].x, IMM[0].wwww, TEMP[4].xxxx 38: DP3 TEMP[5].x, IN[5].xyzz, IN[5].xyzz 39: MOV TEMP[5].xy, TEMP[5].xxxx 40: TEX TEMP[5].w, TEMP[5], SAMP[2], 2D 41: MUL TEMP[5].xyz, CONST[2].xyzz, TEMP[5].wwww 42: DP3 TEMP[6].x, IN[4].yzww, IN[4].yzww 43: RSQ TEMP[6].x, TEMP[6].xxxx 44: MUL TEMP[6].xyz, IN[4].yzww, TEMP[6].xxxx 45: MOV TEMP[6].xyz, -TEMP[6].xyzx 46: ADD TEMP[7].x, IMM[0].zzzz, -CONST[9].xxxx 47: ADD TEMP[8].xyz, TEMP[3].xyzz, TEMP[6].xyzz 48: DP3 TEMP[9].x, TEMP[8].xyzz, TEMP[8].xyzz 49: RSQ TEMP[9].x, TEMP[9].xxxx 50: MUL TEMP[8].xyz, TEMP[8].xyzz, TEMP[9].xxxx 51: DP3 TEMP[6].x, TEMP[0].xyzz, TEMP[6].xyzz 52: MAX TEMP[6].x, IMM[0].wwww, TEMP[6].xxxx 53: DP3 TEMP[3].x, TEMP[3].xyzz, TEMP[8].xyzz 54: MAX TEMP[3].x, IMM[0].wwww, TEMP[3].xxxx 55: MUL TEMP[9].x, TEMP[7].xxxx, TEMP[7].xxxx 56: MUL TEMP[9].x, TEMP[9].xxxx, CONST[3].wwww 57: ADD TEMP[10].x, IMM[0].zzzz, -TEMP[7].xxxx 58: MAD TEMP[10].x, TEMP[10].xxxx, IMM[1].yyyy, IMM[1].zzzz 59: LG2 TEMP[10].x, TEMP[10].xxxx 60: RCP TEMP[10].x, TEMP[10].xxxx 61: MUL TEMP[10].x, IMM[1].xxxx, TEMP[10].xxxx 62: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[10].xxxx 63: ADD TEMP[11].x, IMM[0].zzzz, -TEMP[4].xxxx 64: ADD TEMP[12].x, IMM[0].zzzz, -TEMP[6].xxxx 65: MUL TEMP[13].x, IMM[0].xxxx, TEMP[3].xxxx 66: MUL TEMP[7].x, TEMP[3].xxxx, TEMP[7].xxxx 67: MAD TEMP[7].x, TEMP[13].xxxx, TEMP[7].xxxx, IMM[1].wwww 68: ADD TEMP[3].x, IMM[0].zzzz, -TEMP[3].xxxx 69: LRP TEMP[13].x, TEMP[4].xxxx, IMM[0].zzzz, TEMP[9].xxxx 70: LRP TEMP[6].x, TEMP[6].xxxx, IMM[0].zzzz, TEMP[9].xxxx 71: MAD TEMP[6].x, TEMP[13].xxxx, TEMP[6].xxxx, IMM[2].xxxx 72: RCP TEMP[6].x, TEMP[6].xxxx 73: DP3 TEMP[8].x, TEMP[0].xyzz, TEMP[8].xyzz 74: MAX TEMP[8].x, IMM[0].wwww, TEMP[8].xxxx 75: POW TEMP[8].x, TEMP[8].xxxx, TEMP[10].xxxx 76: ADD TEMP[9].x, TEMP[10].xxxx, IMM[0].zzzz 77: MUL TEMP[9].x, TEMP[9].xxxx, CONST[3].yyyy 78: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[9].xxxx 79: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[8].xxxx 80: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[4].xxxx 81: MUL TEMP[6].x, TEMP[6].xxxx, CONST[3].xxxx 82: MAX TEMP[6].x, IMM[0].wwww, TEMP[6].xxxx 83: MUL TEMP[6].xyz, TEMP[6].xxxx, TEMP[5].xyzz 84: ADD TEMP[8].xyz, IMM[0].zzzz, -TEMP[2].xyzz 85: MUL TEMP[9].x, TEMP[3].xxxx, TEMP[3].xxxx 86: MUL TEMP[10].x, TEMP[3].xxxx, TEMP[3].xxxx 87: MUL TEMP[3].x, TEMP[10].xxxx, TEMP[3].xxxx 88: MUL TEMP[3].x, TEMP[9].xxxx, TEMP[3].xxxx 89: MAD TEMP[2].xyz, TEMP[8].xyzz, TEMP[3].xxxx, TEMP[2].xyzz 90: MUL TEMP[3].x, CONST[8].xxxx, CONST[1].wwww 91: ADD TEMP[3].x, CONST[1].wwww, -TEMP[3].xxxx 92: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xxxx 93: ADD TEMP[3].x, TEMP[7].xxxx, IMM[0].yyyy 94: MUL TEMP[8].x, TEMP[11].xxxx, TEMP[11].xxxx 95: MUL TEMP[9].x, TEMP[11].xxxx, TEMP[11].xxxx 96: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[11].xxxx 97: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[9].xxxx 98: MAD TEMP[3].x, TEMP[3].xxxx, TEMP[8].xxxx, IMM[0].zzzz 99: ADD TEMP[7].x, TEMP[7].xxxx, IMM[0].yyyy 100: MUL TEMP[8].x, TEMP[12].xxxx, TEMP[12].xxxx 101: MUL TEMP[9].x, TEMP[12].xxxx, TEMP[12].xxxx 102: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[12].xxxx 103: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[9].xxxx 104: MAD TEMP[7].x, TEMP[7].xxxx, TEMP[8].xxxx, IMM[0].zzzz 105: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[7].xxxx 106: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[4].xxxx 107: MUL TEMP[3].xyz, TEMP[5].xyzz, TEMP[3].xxxx 108: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xyzz 109: MAD TEMP[0].xyz, TEMP[6].xyzz, TEMP[2].xyzz, TEMP[1].xyzz 110: MAD TEMP[1].x, IN[4].xxxx, CONST[0].zzzz, CONST[0].wwww 111: MOV_SAT TEMP[1].x, TEMP[1].xxxx 112: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xxxx 113: MOV TEMP[0].xyz, TEMP[0].xyzx 114: MOV TEMP[0].w, IMM[0].zzzz 115: MOV OUT[0], TEMP[0] 116: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %42 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %43 = load <32 x i8>, <32 x i8> addrspace(2)* %42, align 32, !tbaa !0 %44 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %45 = load <16 x i8>, <16 x i8> addrspace(2)* %44, align 16, !tbaa !0 %46 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %47 = bitcast <8 x i32> addrspace(2)* %46 to <32 x i8> addrspace(2)* %48 = load <32 x i8>, <32 x i8> addrspace(2)* %47, align 32, !tbaa !0 %49 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %50 = bitcast <4 x i32> addrspace(2)* %49 to <16 x i8> addrspace(2)* %51 = load <16 x i8>, <16 x i8> addrspace(2)* %50, align 16, !tbaa !0 %52 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %53 = bitcast <8 x i32> addrspace(2)* %52 to <32 x i8> addrspace(2)* %54 = load <32 x i8>, <32 x i8> addrspace(2)* %53, align 32, !tbaa !0 %55 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %56 = bitcast <4 x i32> addrspace(2)* %55 to <16 x i8> addrspace(2)* %57 = load <16 x i8>, <16 x i8> addrspace(2)* %56, align 16, !tbaa !0 %58 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %59 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %60 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %61 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %62 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %63 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %64 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %65 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %66 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %67 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %68 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %69 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %70 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %71 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7) %72 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %73 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %74 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %75 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7) %76 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7) %77 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %78 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7) %79 = bitcast float %58 to i32 %80 = bitcast float %59 to i32 %81 = insertelement <2 x i32> undef, i32 %79, i32 0 %82 = insertelement <2 x i32> %81, i32 %80, i32 1 %83 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %82, <32 x i8> %48, <16 x i8> %51, i32 2) %84 = extractelement <4 x float> %83, i32 1 %85 = extractelement <4 x float> %83, i32 3 %86 = fmul float %85, 2.000000e+00 %87 = fadd float %86, -1.000000e+00 %88 = fmul float %84, 2.000000e+00 %89 = fadd float %88, -1.000000e+00 %90 = fmul float %87, %39 %91 = fmul float %89, %39 %92 = fmul float %90, %90 %93 = fmul float %91, %91 %94 = fadd float %92, %93 %95 = call float @llvm.AMDIL.clamp.(float %94, float 0.000000e+00, float 1.000000e+00) %96 = fsub float 1.000000e+00, %95 %97 = call float @llvm.sqrt.f32(float %96) %98 = fmul float %90, %60 %99 = fmul float %91, %64 %100 = fadd float %99, %98 %101 = fmul float %97, %68 %102 = fadd float %100, %101 %103 = fmul float %90, %61 %104 = fmul float %91, %65 %105 = fadd float %104, %103 %106 = fmul float %97, %69 %107 = fadd float %105, %106 %108 = fmul float %90, %62 %109 = fmul float %91, %66 %110 = fadd float %109, %108 %111 = fmul float %97, %70 %112 = fadd float %110, %111 %113 = fmul float %102, %102 %114 = fmul float %107, %107 %115 = fadd float %114, %113 %116 = fmul float %112, %112 %117 = fadd float %115, %116 %118 = call float @llvm.AMDGPU.rsq.clamped.f32(float %117) %119 = fmul float %102, %118 %120 = fmul float %107, %118 %121 = fmul float %112, %118 %122 = bitcast float %58 to i32 %123 = bitcast float %59 to i32 %124 = insertelement <2 x i32> undef, i32 %122, i32 0 %125 = insertelement <2 x i32> %124, i32 %123, i32 1 %126 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %125, <32 x i8> %43, <16 x i8> %45, i32 2) %127 = extractelement <4 x float> %126, i32 0 %128 = extractelement <4 x float> %126, i32 1 %129 = extractelement <4 x float> %126, i32 2 %130 = fmul float %36, %127 %131 = fmul float %37, %128 %132 = fmul float %38, %129 %133 = call float @llvm.AMDGPU.lrp(float %40, float %130, float %26) %134 = call float @llvm.AMDGPU.lrp(float %40, float %131, float %27) %135 = call float @llvm.AMDGPU.lrp(float %40, float %132, float %28) %136 = fmul float %63, %63 %137 = fmul float %67, %67 %138 = fadd float %137, %136 %139 = fmul float %71, %71 %140 = fadd float %138, %139 %141 = call float @llvm.AMDGPU.rsq.clamped.f32(float %140) %142 = fmul float %63, %141 %143 = fmul float %67, %141 %144 = fmul float %71, %141 %145 = fmul float %119, %142 %146 = fmul float %120, %143 %147 = fadd float %146, %145 %148 = fmul float %121, %144 %149 = fadd float %147, %148 %150 = call float @llvm.maxnum.f32(float %149, float 0.000000e+00) %151 = fmul float %76, %76 %152 = fmul float %77, %77 %153 = fadd float %152, %151 %154 = fmul float %78, %78 %155 = fadd float %153, %154 %156 = bitcast float %155 to i32 %157 = bitcast float %155 to i32 %158 = insertelement <2 x i32> undef, i32 %156, i32 0 %159 = insertelement <2 x i32> %158, i32 %157, i32 1 %160 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %159, <32 x i8> %54, <16 x i8> %57, i32 2) %161 = extractelement <4 x float> %160, i32 3 %162 = fmul float %30, %161 %163 = fmul float %31, %161 %164 = fmul float %32, %161 %165 = fmul float %73, %73 %166 = fmul float %74, %74 %167 = fadd float %166, %165 %168 = fmul float %75, %75 %169 = fadd float %167, %168 %170 = call float @llvm.AMDGPU.rsq.clamped.f32(float %169) %171 = fmul float %73, %170 %172 = fmul float %74, %170 %173 = fmul float %75, %170 %174 = fsub float 1.000000e+00, %41 %175 = fsub float %142, %171 %176 = fsub float %143, %172 %177 = fsub float %144, %173 %178 = fmul float %175, %175 %179 = fmul float %176, %176 %180 = fadd float %179, %178 %181 = fmul float %177, %177 %182 = fadd float %180, %181 %183 = call float @llvm.AMDGPU.rsq.clamped.f32(float %182) %184 = fmul float %175, %183 %185 = fmul float %176, %183 %186 = fmul float %177, %183 %187 = fmul float %171, %119 %188 = fsub float -0.000000e+00, %187 %189 = fmul float %172, %120 %190 = fsub float %188, %189 %191 = fmul float %173, %121 %192 = fsub float %190, %191 %193 = call float @llvm.maxnum.f32(float %192, float 0.000000e+00) %194 = fmul float %142, %184 %195 = fmul float %143, %185 %196 = fadd float %195, %194 %197 = fmul float %144, %186 %198 = fadd float %196, %197 %199 = call float @llvm.maxnum.f32(float %198, float 0.000000e+00) %200 = fmul float %174, %174 %201 = fmul float %200, %35 %202 = fsub float 1.000000e+00, %174 %203 = fmul float %202, 0x3FEEF9DB20000000 %204 = fadd float %203, 0x3F9EB851E0000000 %205 = call float @llvm.log2.f32(float %204) %206 = fdiv float 1.000000e+00, %205 %207 = fmul float %206, 1.000000e+01 %208 = fmul float %207, %207 %209 = fsub float 1.000000e+00, %150 %210 = fsub float 1.000000e+00, %193 %211 = fmul float %199, 2.000000e+00 %212 = fmul float %199, %174 %213 = fmul float %211, %212 %214 = fadd float %213, 5.000000e-01 %215 = fsub float 1.000000e+00, %199 %216 = call float @llvm.AMDGPU.lrp(float %150, float 1.000000e+00, float %201) %217 = call float @llvm.AMDGPU.lrp(float %193, float 1.000000e+00, float %201) %218 = fmul float %216, %217 %219 = fadd float %218, 0x3F1A36E2E0000000 %220 = fdiv float 1.000000e+00, %219 %221 = fmul float %119, %184 %222 = fmul float %120, %185 %223 = fadd float %222, %221 %224 = fmul float %121, %186 %225 = fadd float %223, %224 %226 = call float @llvm.maxnum.f32(float %225, float 0.000000e+00) %227 = call float @llvm.pow.f32(float %226, float %208) %228 = fadd float %208, 1.000000e+00 %229 = fmul float %228, %34 %230 = fmul float %227, %229 %231 = fmul float %220, %230 %232 = fmul float %231, %150 %233 = fmul float %232, %33 %234 = call float @llvm.maxnum.f32(float %233, float 0.000000e+00) %235 = fmul float %234, %162 %236 = fmul float %234, %163 %237 = fmul float %234, %164 %238 = fsub float 1.000000e+00, %133 %239 = fsub float 1.000000e+00, %134 %240 = fsub float 1.000000e+00, %135 %241 = fmul float %215, %215 %242 = fmul float %215, %215 %243 = fmul float %242, %215 %244 = fmul float %241, %243 %245 = fmul float %238, %244 %246 = fadd float %245, %133 %247 = fmul float %239, %244 %248 = fadd float %247, %134 %249 = fmul float %240, %244 %250 = fadd float %249, %135 %251 = fmul float %40, %29 %252 = fsub float %29, %251 %253 = fmul float %130, %252 %254 = fmul float %131, %252 %255 = fmul float %132, %252 %256 = fadd float %214, -1.000000e+00 %257 = fmul float %209, %209 %258 = fmul float %209, %209 %259 = fmul float %258, %209 %260 = fmul float %257, %259 %261 = fmul float %256, %260 %262 = fadd float %261, 1.000000e+00 %263 = fadd float %214, -1.000000e+00 %264 = fmul float %210, %210 %265 = fmul float %210, %210 %266 = fmul float %265, %210 %267 = fmul float %264, %266 %268 = fmul float %263, %267 %269 = fadd float %268, 1.000000e+00 %270 = fmul float %262, %269 %271 = fmul float %270, %150 %272 = fmul float %162, %271 %273 = fmul float %163, %271 %274 = fmul float %164, %271 %275 = fmul float %253, %272 %276 = fmul float %254, %273 %277 = fmul float %255, %274 %278 = fmul float %235, %246 %279 = fadd float %278, %275 %280 = fmul float %236, %248 %281 = fadd float %280, %276 %282 = fmul float %237, %250 %283 = fadd float %282, %277 %284 = fmul float %72, %24 %285 = fadd float %284, %25 %286 = call float @llvm.AMDIL.clamp.(float %285, float 0.000000e+00, float 1.000000e+00) %287 = fmul float %279, %286 %288 = fmul float %281, %286 %289 = fmul float %283, %286 %290 = call i32 @llvm.SI.packf16(float %287, float %288) %291 = bitcast i32 %290 to float %292 = call i32 @llvm.SI.packf16(float %289, float 1.000000e+00) %293 = bitcast i32 %292 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %291, float %293, float %291, float %293) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400 v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401 v_interp_p1_f32 v5, v0, 1, 1, [m0] ; C8140500 v_interp_p2_f32 v5, [v5], v1, 1, 1, [m0] ; C8150501 v_interp_p1_f32 v6, v0, 2, 1, [m0] ; C8180600 v_interp_p2_f32 v6, [v6], v1, 2, 1, [m0] ; C8190601 v_interp_p1_f32 v7, v0, 3, 1, [m0] ; C81C0700 v_interp_p2_f32 v7, [v7], v1, 3, 1, [m0] ; C81D0701 v_interp_p1_f32 v8, v0, 0, 2, [m0] ; C8200800 v_interp_p2_f32 v8, [v8], v1, 0, 2, [m0] ; C8210801 v_interp_p1_f32 v9, v0, 1, 2, [m0] ; C8240900 v_interp_p2_f32 v9, [v9], v1, 1, 2, [m0] ; C8250901 v_interp_p1_f32 v10, v0, 2, 2, [m0] ; C8280A00 v_interp_p2_f32 v10, [v10], v1, 2, 2, [m0] ; C8290A01 v_interp_p1_f32 v11, v0, 3, 2, [m0] ; C82C0B00 v_interp_p2_f32 v11, [v11], v1, 3, 2, [m0] ; C82D0B01 v_interp_p1_f32 v12, v0, 0, 3, [m0] ; C8300C00 v_interp_p2_f32 v12, [v12], v1, 0, 3, [m0] ; C8310C01 v_interp_p1_f32 v13, v0, 1, 3, [m0] ; C8340D00 v_interp_p2_f32 v13, [v13], v1, 1, 3, [m0] ; C8350D01 v_interp_p1_f32 v14, v0, 2, 3, [m0] ; C8380E00 v_interp_p2_f32 v14, [v14], v1, 2, 3, [m0] ; C8390E01 v_interp_p1_f32 v15, v0, 3, 3, [m0] ; C83C0F00 v_interp_p2_f32 v15, [v15], v1, 3, 3, [m0] ; C83D0F01 v_interp_p1_f32 v16, v0, 0, 4, [m0] ; C8401000 v_interp_p2_f32 v16, [v16], v1, 0, 4, [m0] ; C8411001 v_interp_p1_f32 v17, v0, 1, 4, [m0] ; C8441100 v_interp_p2_f32 v17, [v17], v1, 1, 4, [m0] ; C8451101 v_interp_p1_f32 v18, v0, 2, 4, [m0] ; C8481200 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_interp_p2_f32 v18, [v18], v1, 2, 4, [m0] ; C8491201 v_interp_p1_f32 v19, v0, 3, 4, [m0] ; C84C1300 v_interp_p2_f32 v19, [v19], v1, 3, 4, [m0] ; C84D1301 v_interp_p1_f32 v20, v0, 0, 5, [m0] ; C8501400 v_interp_p2_f32 v20, [v20], v1, 0, 5, [m0] ; C8511401 v_interp_p1_f32 v21, v0, 1, 5, [m0] ; C8541500 v_interp_p2_f32 v21, [v21], v1, 1, 5, [m0] ; C8551501 v_interp_p1_f32 v0, v0, 2, 5, [m0] ; C8001600 v_interp_p2_f32 v0, [v0], v1, 2, 5, [m0] ; C8011601 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s32, s[0:3], 0x10 ; C2100110 s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504 s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708 s_buffer_load_dword s33, s[0:3], 0x11 ; C2108111 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700 s_buffer_load_dword s34, s[0:3], 0x12 ; C2110112 v_mul_f32_e32 v22, v20, v20 ; 102C2914 s_load_dwordx4 s[36:39], s[4:5], 0x8 ; C0920508 s_load_dwordx8 s[40:47], s[6:7], 0x10 ; C0D40710 v_mac_f32_e32 v22, v21, v21 ; 3E2C2B15 v_mac_f32_e32 v22, v0, v0 ; 3E2C0100 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:1], 10, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[24:31], s[12:15] ; F0800A00 00660002 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_buffer_load_dword s5, s[0:3], 0x3 ; C2028103 image_sample v[23:25], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[16:23], s[8:11] ; F0800700 00441702 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v2, s32, v23 ; 10042E20 v_mul_f32_e32 v3, s33, v24 ; 10063021 v_mul_f32_e32 v20, s34, v25 ; 10283222 v_mov_b32_e32 v23, v22 ; 7E2E0316 image_sample v21, 8, 0, 0, 0, 0, 0, 0, 0, v[22:23], s[40:47], s[36:39] ; F0800800 012A1516 v_mad_f32 v1, 2.0, v1, -1.0 ; D2820001 03CE02F4 v_mad_f32 v0, 2.0, v0, -1.0 ; D2820000 03CE00F4 s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104 s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105 s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106 v_mov_b32_e32 v22, s5 ; 7E2C0205 v_mac_f32_e32 v22, s4, v16 ; 3E2C2004 s_buffer_load_dword s4, s[0:3], 0x1c ; C202011C s_buffer_load_dword s5, s[0:3], 0xd ; C202810D s_buffer_load_dword s9, s[0:3], 0xf ; C204810F s_buffer_load_dword s10, s[0:3], 0x7 ; C2050107 s_buffer_load_dword s11, s[0:3], 0x8 ; C2058108 s_buffer_load_dword s12, s[0:3], 0x9 ; C2060109 s_buffer_load_dword s13, s[0:3], 0xa ; C206810A s_buffer_load_dword s14, s[0:3], 0xc ; C207010C s_buffer_load_dword s15, s[0:3], 0x20 ; C2078120 s_buffer_load_dword s0, s[0:3], 0x24 ; C2000124 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v1, s4, v1 ; 10020204 v_mul_f32_e32 v0, s4, v0 ; 10000004 v_mul_f32_e32 v4, v4, v1 ; 10080304 v_mac_f32_e32 v4, v8, v0 ; 3E080108 v_mul_f32_e32 v5, v5, v1 ; 100A0305 v_mac_f32_e32 v5, v9, v0 ; 3E0A0109 v_mul_f32_e32 v6, v6, v1 ; 100C0306 v_mac_f32_e32 v6, v10, v0 ; 3E0C010A v_mul_f32_e32 v0, v0, v0 ; 10000100 v_mac_f32_e32 v0, v1, v1 ; 3E000301 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_sub_f32_e32 v0, 1.0, v0 ; 080000F2 v_sqrt_f32_e32 v0, v0 ; 7E006700 v_mac_f32_e32 v4, v12, v0 ; 3E08010C v_mac_f32_e32 v5, v13, v0 ; 3E0A010D v_mac_f32_e32 v6, v14, v0 ; 3E0C010E v_mul_f32_e32 v0, v4, v4 ; 10000904 v_mac_f32_e32 v0, v5, v5 ; 3E000B05 v_mac_f32_e32 v0, v6, v6 ; 3E000D06 v_rsq_clamp_f32_e32 v0, v0 ; 7E005900 v_mul_f32_e32 v1, v17, v17 ; 10022311 v_mac_f32_e32 v1, v18, v18 ; 3E022512 v_mac_f32_e32 v1, v19, v19 ; 3E022713 v_rsq_clamp_f32_e32 v1, v1 ; 7E025901 v_mul_f32_e32 v8, v7, v7 ; 10100F07 v_mac_f32_e32 v8, v11, v11 ; 3E10170B v_mac_f32_e32 v8, v15, v15 ; 3E101F0F v_rsq_clamp_f32_e32 v8, v8 ; 7E105908 v_mul_f32_e32 v9, v1, v17 ; 10122301 v_mul_f32_e32 v10, v1, v18 ; 10142501 v_mul_f32_e32 v1, v1, v19 ; 10022701 v_mul_f32_e32 v4, v0, v4 ; 10080900 v_mul_f32_e32 v5, v0, v5 ; 100A0B00 v_mul_f32_e32 v12, v8, v7 ; 10180F08 v_mad_f32 v7, v7, v8, -v9 ; D2820007 84261107 v_mul_f32_e32 v9, v4, v9 ; 10121304 v_mad_f32 v9, -v10, v5, -v9 ; D2820009 A4260B0A v_mad_f32 v10, v11, v8, -v10 ; D282000A 842A110B v_mul_f32_e32 v13, v7, v7 ; 101A0F07 v_mac_f32_e32 v13, v10, v10 ; 3E1A150A v_mad_f32 v14, v15, v8, -v1 ; D282000E 8406110F v_mac_f32_e32 v13, v14, v14 ; 3E1A1D0E v_rsq_clamp_f32_e32 v13, v13 ; 7E1A590D v_mul_f32_e32 v11, v8, v11 ; 10161708 v_mul_f32_e32 v16, v12, v4 ; 1020090C v_mac_f32_e32 v16, v11, v5 ; 3E200B0B v_mul_f32_e32 v7, v13, v7 ; 100E0F0D v_mul_f32_e32 v10, v13, v10 ; 1014150D v_mul_f32_e32 v12, v7, v12 ; 10181907 v_mac_f32_e32 v12, v10, v11 ; 3E18170A v_mul_f32_e32 v4, v7, v4 ; 10080907 v_mac_f32_e32 v4, v10, v5 ; 3E080B0A v_mul_f32_e32 v0, v0, v6 ; 10000D00 v_mul_f32_e32 v5, v8, v15 ; 100A1F08 v_mul_f32_e32 v6, v13, v14 ; 100C1D0D v_mad_f32 v1, -v1, v0, v9 ; D2820001 24260101 v_mac_f32_e32 v16, v5, v0 ; 3E200105 v_mac_f32_e32 v12, v6, v5 ; 3E180B06 v_mac_f32_e32 v4, v6, v0 ; 3E080106 v_sub_f32_e64 v0, 1.0, s15 ; D2080000 00001EF2 v_mul_f32_e32 v5, s6, v0 ; 100A0006 v_mad_f32 v6, -v0, s6, 1.0 ; D2820006 23C80D00 v_mul_f32_e32 v7, s7, v0 ; 100E0007 v_mad_f32 v8, -v0, s7, 1.0 ; D2820008 23C80F00 v_mul_f32_e32 v9, s8, v0 ; 10120008 v_mad_f32 v0, -v0, s8, 1.0 ; D2820000 23C81100 v_sub_f32_e64 v10, 1.0, s0 ; D208000A 000000F2 v_sub_f32_e32 v11, 1.0, v10 ; 081614F2 v_mov_b32_e32 v13, 0x3cf5c28f ; 7E1A02FF 3CF5C28F v_madmk_f32_e32 v11, v11, v13, 0x3f77ced9 ; 40161B0B 3F77CED9 v_max_f32_e32 v12, 0, v12 ; 20181880 v_sub_f32_e32 v13, 1.0, v12 ; 081A18F2 v_mul_f32_e32 v14, v13, v13 ; 101C1B0D v_mul_f32_e32 v13, v13, v14 ; 101A1D0D v_mul_f32_e32 v13, v13, v14 ; 101A1D0D v_mac_f32_e32 v5, s15, v2 ; 3E0A040F v_mad_f32 v6, -s15, v2, v6 ; D2820006 241A040F v_mac_f32_e32 v5, v13, v6 ; 3E0A0D0D v_mac_f32_e32 v7, s15, v3 ; 3E0E060F v_mad_f32 v6, -s15, v3, v8 ; D2820006 2422060F v_mac_f32_e32 v7, v13, v6 ; 3E0E0D0D v_mac_f32_e32 v9, s15, v20 ; 3E12280F v_log_f32_e32 v6, v11 ; 7E0C4F0B v_mad_f32 v0, -s15, v20, v0 ; D2820000 2402280F v_mac_f32_e32 v9, v13, v0 ; 3E12010D v_mul_f32_e32 v0, s11, v21 ; 10002A0B v_mul_f32_e32 v8, s12, v21 ; 10102A0C v_rcp_f32_e32 v6, v6 ; 7E0C5506 v_mul_f32_e32 v11, s13, v21 ; 10162A0D v_mul_f32_e32 v13, v10, v10 ; 101A150A v_mul_f32_e32 v13, s9, v13 ; 101A1A09 v_mul_f32_e32 v6, 0x41200000, v6 ; 100C0CFF 41200000 v_mad_f32 v14, v6, v6, 1.0 ; D282000E 03CA0D06 v_mul_f32_e32 v14, s5, v14 ; 101C1C05 v_max_f32_e32 v1, 0, v1 ; 20020280 v_sub_f32_e32 v15, 1.0, v1 ; 081E02F2 v_mul_f32_e32 v17, v13, v15 ; 10221F0D v_mac_f32_e32 v17, 1.0, v1 ; 3E2202F2 v_max_f32_e32 v1, 0, v16 ; 20022080 v_sub_f32_e32 v16, 1.0, v1 ; 082002F2 v_mul_f32_e32 v13, v13, v16 ; 101A210D v_mac_f32_e32 v13, 1.0, v1 ; 3E1A02F2 v_max_f32_e32 v4, 0, v4 ; 20080880 v_log_f32_e32 v4, v4 ; 7E084F04 v_madak_f32_e32 v13, v13, v17, 0x38d1b717 ; 421A230D 38D1B717 v_mul_f32_e32 v6, v6, v6 ; 100C0D06 v_rcp_f32_e32 v13, v13 ; 7E1A550D v_mul_legacy_f32_e32 v4, v6, v4 ; 0E080906 v_exp_f32_e32 v4, v4 ; 7E084B04 v_mul_f32_e32 v4, v14, v4 ; 1008090E v_mul_f32_e32 v4, v4, v13 ; 10081B04 v_mul_f32_e32 v4, v1, v4 ; 10080901 v_mul_f32_e32 v4, s14, v4 ; 1008080E v_mov_b32_e32 v6, s15 ; 7E0C020F v_mad_f32 v6, -v6, s10, s10 ; D2820006 20281506 v_mul_f32_e32 v10, v10, v12 ; 1014190A v_add_f32_e32 v12, v12, v12 ; 0618190C v_mad_f32 v10, v12, v10, 0.5 ; D282000A 03C2150C v_mul_f32_e32 v12, v16, v16 ; 10182110 v_mul_f32_e32 v13, v16, v12 ; 101A1910 v_mul_f32_e32 v12, v13, v12 ; 1018190D v_mul_f32_e32 v13, v15, v15 ; 101A1F0F v_mul_f32_e32 v14, v15, v13 ; 101C1B0F v_mul_f32_e32 v13, v14, v13 ; 101A1B0E v_add_f32_e32 v10, -1.0, v10 ; 061414F3 v_mad_f32 v12, v10, v12, 1.0 ; D282000C 03CA190A v_mad_f32 v10, v10, v13, 1.0 ; D282000A 03CA1B0A v_mul_f32_e32 v10, v10, v12 ; 1014190A v_mul_f32_e32 v2, v6, v2 ; 10040506 v_mul_f32_e32 v1, v1, v10 ; 10021501 v_mul_f32_e32 v10, v1, v0 ; 10140101 v_mul_f32_e32 v2, v10, v2 ; 1004050A v_max_f32_e32 v4, 0, v4 ; 20080880 v_mul_f32_e32 v0, v0, v4 ; 10000900 v_mac_f32_e32 v2, v5, v0 ; 3E040105 v_mul_f32_e32 v0, v6, v3 ; 10000706 v_mul_f32_e32 v3, v1, v8 ; 10061101 v_mul_f32_e32 v0, v3, v0 ; 10000103 v_mul_f32_e32 v3, v8, v4 ; 10060908 v_mac_f32_e32 v0, v7, v3 ; 3E000707 v_mul_f32_e32 v3, v6, v20 ; 10062906 v_mul_f32_e32 v4, v11, v4 ; 1008090B v_mul_f32_e32 v1, v1, v11 ; 10021701 v_mul_f32_e32 v1, v1, v3 ; 10020701 v_mac_f32_e32 v1, v9, v4 ; 3E020909 v_add_f32_e64 v3, 0, v22 clamp ; D2060803 00022C80 v_mul_f32_e32 v2, v3, v2 ; 10040503 v_mul_f32_e32 v0, v3, v0 ; 10000103 v_mul_f32_e32 v1, v3, v1 ; 10020303 v_cvt_pkrtz_f16_f32_e32 v0, v2, v0 ; 5E000102 v_cvt_pkrtz_f16_f32_e64 v1, v1, 1.0 ; D25E0001 0001E501 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 48 VGPRS: 28 Code Size: 1084 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..7] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[1].xyxx 1: MOV TEMP[0].z, IMM[0].xxxx 2: MUL TEMP[1], CONST[0], IN[0].xxxx 3: MAD TEMP[1], CONST[1], IN[0].yyyy, TEMP[1] 4: MAD TEMP[1], CONST[2], IN[0].zzzz, TEMP[1] 5: MAD TEMP[1].xyz, CONST[3], IN[0].wwww, TEMP[1] 6: MUL TEMP[2], CONST[4], IN[0].xxxx 7: MAD TEMP[2], CONST[5], IN[0].yyyy, TEMP[2] 8: MAD TEMP[2], CONST[6], IN[0].zzzz, TEMP[2] 9: MAD TEMP[2], CONST[7], IN[0].wwww, TEMP[2] 10: MOV TEMP[0].xyz, TEMP[0].xyzx 11: MOV TEMP[0].w, TEMP[1].xxxx 12: MOV TEMP[1].xy, TEMP[1].yzyy 13: MOV OUT[2], TEMP[1] 14: MOV OUT[1], TEMP[0] 15: MOV OUT[0], TEMP[2] 16: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %44 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %45 = load <16 x i8>, <16 x i8> addrspace(2)* %44, align 16, !tbaa !0 %46 = add i32 %5, %7 %47 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %45, i32 0, i32 %46) %48 = extractelement <4 x float> %47, i32 0 %49 = extractelement <4 x float> %47, i32 1 %50 = extractelement <4 x float> %47, i32 2 %51 = extractelement <4 x float> %47, i32 3 %52 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %53 = load <16 x i8>, <16 x i8> addrspace(2)* %52, align 16, !tbaa !0 %54 = add i32 %5, %7 %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %54) %56 = extractelement <4 x float> %55, i32 0 %57 = extractelement <4 x float> %55, i32 1 %58 = fmul float %13, %48 %59 = fmul float %14, %48 %60 = fmul float %15, %48 %61 = fmul float %16, %48 %62 = fmul float %17, %49 %63 = fadd float %62, %58 %64 = fmul float %18, %49 %65 = fadd float %64, %59 %66 = fmul float %19, %49 %67 = fadd float %66, %60 %68 = fmul float %20, %49 %69 = fadd float %68, %61 %70 = fmul float %21, %50 %71 = fadd float %70, %63 %72 = fmul float %22, %50 %73 = fadd float %72, %65 %74 = fmul float %23, %50 %75 = fadd float %74, %67 %76 = fmul float %24, %50 %77 = fadd float %76, %69 %78 = fmul float %25, %51 %79 = fadd float %78, %71 %80 = fmul float %26, %51 %81 = fadd float %80, %73 %82 = fmul float %27, %51 %83 = fadd float %82, %75 %84 = fmul float %28, %48 %85 = fmul float %29, %48 %86 = fmul float %30, %48 %87 = fmul float %31, %48 %88 = fmul float %32, %49 %89 = fadd float %88, %84 %90 = fmul float %33, %49 %91 = fadd float %90, %85 %92 = fmul float %34, %49 %93 = fadd float %92, %86 %94 = fmul float %35, %49 %95 = fadd float %94, %87 %96 = fmul float %36, %50 %97 = fadd float %96, %89 %98 = fmul float %37, %50 %99 = fadd float %98, %91 %100 = fmul float %38, %50 %101 = fadd float %100, %93 %102 = fmul float %39, %50 %103 = fadd float %102, %95 %104 = fmul float %40, %51 %105 = fadd float %104, %97 %106 = fmul float %41, %51 %107 = fadd float %106, %99 %108 = fmul float %42, %51 %109 = fadd float %108, %101 %110 = fmul float %43, %51 %111 = fadd float %110, %103 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %56, float %57, float 0.000000e+00, float %79) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %81, float %83, float %83, float %77) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %105, float %107, float %109, float %111) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100 s_buffer_load_dword s9, s[0:3], 0x1 ; C2048101 s_buffer_load_dword s10, s[0:3], 0x2 ; C2050102 buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_buffer_load_dword s5, s[0:3], 0x8 ; C2028108 s_buffer_load_dword s6, s[0:3], 0xc ; C203010C s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 v_mul_f32_e32 v0, s8, v1 ; 10000208 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v0, s4, v2 ; 3E000404 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 v_mac_f32_e32 v0, s5, v3 ; 3E000605 s_buffer_load_dword s5, s[0:3], 0xd ; C202810D s_buffer_load_dword s8, s[0:3], 0xe ; C204010E v_mac_f32_e32 v0, s6, v4 ; 3E000806 v_mov_b32_e32 v7, 0 ; 7E0E0280 exp 15, 32, 0, 0, 0, v5, v6, v7, v0 ; F800020F 00070605 s_buffer_load_dword s6, s[0:3], 0x5 ; C2030105 s_buffer_load_dword s11, s[0:3], 0x6 ; C2058106 s_buffer_load_dword s12, s[0:3], 0x7 ; C2060107 s_buffer_load_dword s13, s[0:3], 0xa ; C206810A s_buffer_load_dword s14, s[0:3], 0xb ; C207010B s_buffer_load_dword s15, s[0:3], 0x10 ; C2078110 s_buffer_load_dword s16, s[0:3], 0x11 ; C2080111 s_buffer_load_dword s17, s[0:3], 0x12 ; C2088112 s_buffer_load_dword s18, s[0:3], 0x13 ; C2090113 s_buffer_load_dword s19, s[0:3], 0x14 ; C2098114 s_buffer_load_dword s20, s[0:3], 0x15 ; C20A0115 s_buffer_load_dword s21, s[0:3], 0x16 ; C20A8116 s_buffer_load_dword s22, s[0:3], 0x17 ; C20B0117 s_buffer_load_dword s23, s[0:3], 0x18 ; C20B8118 s_buffer_load_dword s24, s[0:3], 0x19 ; C20C0119 s_buffer_load_dword s25, s[0:3], 0x1a ; C20C811A s_buffer_load_dword s26, s[0:3], 0x1b ; C20D011B s_buffer_load_dword s27, s[0:3], 0x1c ; C20D811C s_buffer_load_dword s28, s[0:3], 0x1d ; C20E011D s_buffer_load_dword s29, s[0:3], 0x1e ; C20E811E s_buffer_load_dword s0, s[0:3], 0x1f ; C200011F s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v0, s9, v1 ; 10000209 v_mac_f32_e32 v0, s6, v2 ; 3E000406 v_mul_f32_e32 v5, s10, v1 ; 100A020A v_mac_f32_e32 v5, s11, v2 ; 3E0A040B v_mul_f32_e32 v6, s7, v1 ; 100C0207 v_mac_f32_e32 v6, s12, v2 ; 3E0C040C v_mul_f32_e32 v7, s15, v1 ; 100E020F v_mac_f32_e32 v7, s19, v2 ; 3E0E0413 v_mul_f32_e32 v8, s16, v1 ; 10100210 v_mac_f32_e32 v8, s20, v2 ; 3E100414 v_mul_f32_e32 v9, s17, v1 ; 10120211 v_mac_f32_e32 v9, s21, v2 ; 3E120415 v_mul_f32_e32 v1, s18, v1 ; 10020212 v_mac_f32_e32 v1, s22, v2 ; 3E020416 v_mac_f32_e32 v0, s4, v3 ; 3E000604 v_mac_f32_e32 v5, s13, v3 ; 3E0A060D v_mac_f32_e32 v6, s14, v3 ; 3E0C060E v_mac_f32_e32 v7, s23, v3 ; 3E0E0617 v_mac_f32_e32 v8, s24, v3 ; 3E100618 v_mac_f32_e32 v9, s25, v3 ; 3E120619 v_mac_f32_e32 v1, s26, v3 ; 3E02061A v_mac_f32_e32 v0, s5, v4 ; 3E000805 v_mac_f32_e32 v5, s8, v4 ; 3E0A0808 v_mac_f32_e32 v7, s27, v4 ; 3E0E081B v_mac_f32_e32 v8, s28, v4 ; 3E10081C v_mac_f32_e32 v9, s29, v4 ; 3E12081D v_mac_f32_e32 v1, s0, v4 ; 3E020800 exp 15, 33, 0, 0, 0, v0, v5, v5, v6 ; F800021F 06050500 exp 15, 12, 0, 1, 0, v7, v8, v9, v1 ; F80008CF 01090807 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 12 Code Size: 328 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[0] DCL CONST[2..14] DCL TEMP[0..14], LOCAL IMM[0] FLT32 { 0.1000, 0.0039, 0.0020, 0.0000} IMM[1] FLT32 { 4.0000, -1.0000, -0.0000, 0.2000} IMM[2] FLT32 { 1.1000, 1.0000, 0.1900, 3.3000} IMM[3] FLT32 { 0.3030, 0.2100, 0.2300, 0.2500} IMM[4] FLT32 { 0.5000, -0.2000, 0.1140, 0.5870} IMM[5] FLT32 { 0.2990, 1.5000, -0.5000, 0.0000} 0: MOV TEMP[0].x, IN[0].wwww 1: MOV TEMP[0].yz, IN[1].yxyy 2: MAD TEMP[1].xyz, IN[0].xyzz, CONST[2].xxxx, CONST[12].xyzz 3: MOV TEMP[2].z, TEMP[1].zzzz 4: MUL TEMP[3].xyz, TEMP[1].xyzz, IMM[0].xxxx 5: FLR TEMP[4].xyz, TEMP[3].xyzz 6: MAD TEMP[4].xyz, IMM[0].yyyy, TEMP[4].xyzz, IMM[0].zzzz 7: FRC TEMP[3].xyz, TEMP[3].xyzz 8: MOV TEMP[5].xy, TEMP[4].xyyy 9: TEX TEMP[5].w, TEMP[5], SAMP[0], 2D 10: MOV TEMP[6].x, TEMP[5].wwww 11: MOV TEMP[6].y, TEMP[4].zzzz 12: MOV TEMP[5].x, TEMP[5].wwww 13: ADD TEMP[7].x, TEMP[4].zzzz, IMM[0].yyyy 14: MOV TEMP[5].y, TEMP[7].xxxx 15: ADD TEMP[7].xy, TEMP[4].xyyy, IMM[0].wyyy 16: MOV TEMP[7].xy, TEMP[7].xyyy 17: TEX TEMP[7].w, TEMP[7], SAMP[0], 2D 18: MOV TEMP[8].x, TEMP[7].wwww 19: MOV TEMP[8].y, TEMP[4].zzzz 20: MOV TEMP[7].x, TEMP[7].wwww 21: ADD TEMP[9].x, TEMP[4].zzzz, IMM[0].yyyy 22: MOV TEMP[7].y, TEMP[9].xxxx 23: ADD TEMP[9].xy, TEMP[4].xyyy, IMM[0].ywww 24: MOV TEMP[9].xy, TEMP[9].xyyy 25: TEX TEMP[9].w, TEMP[9], SAMP[0], 2D 26: MOV TEMP[10].x, TEMP[9].wwww 27: MOV TEMP[10].y, TEMP[4].zzzz 28: MOV TEMP[9].x, TEMP[9].wwww 29: ADD TEMP[11].x, TEMP[4].zzzz, IMM[0].yyyy 30: MOV TEMP[9].y, TEMP[11].xxxx 31: ADD TEMP[11].xy, TEMP[4].xyyy, IMM[0].yyyy 32: MOV TEMP[11].xy, TEMP[11].xyyy 33: TEX TEMP[11].w, TEMP[11], SAMP[0], 2D 34: MOV TEMP[12].x, TEMP[11].wwww 35: MOV TEMP[12].y, TEMP[4].zzzz 36: MOV TEMP[11].x, TEMP[11].wwww 37: ADD TEMP[4].x, TEMP[4].zzzz, IMM[0].yyyy 38: MOV TEMP[11].y, TEMP[4].xxxx 39: MOV TEMP[4].xy, TEMP[6].xyyy 40: TEX TEMP[4].xyz, TEMP[4], SAMP[0], 2D 41: MAD TEMP[4].xyz, TEMP[4].xyzz, IMM[1].xxxx, IMM[1].yyyy 42: DP3 TEMP[4].x, TEMP[4].xyzz, TEMP[3].xyzz 43: MOV TEMP[5].xy, TEMP[5].xyyy 44: TEX TEMP[5].xyz, TEMP[5], SAMP[0], 2D 45: MAD TEMP[5].xyz, TEMP[5].xyzz, IMM[1].xxxx, IMM[1].yyyy 46: ADD TEMP[6].xyz, TEMP[3].xyzz, IMM[1].zzyy 47: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[6].xyzz 48: MOV TEMP[4].y, TEMP[5].xxxx 49: MOV TEMP[5].xy, TEMP[8].xyyy 50: TEX TEMP[5].xyz, TEMP[5], SAMP[0], 2D 51: MAD TEMP[5].xyz, TEMP[5].xyzz, IMM[1].xxxx, IMM[1].yyyy 52: ADD TEMP[6].xyz, TEMP[3].xyzz, IMM[1].zyzz 53: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[6].xyzz 54: MOV TEMP[4].z, TEMP[5].xxxx 55: MOV TEMP[5].xy, TEMP[7].xyyy 56: TEX TEMP[5].xyz, TEMP[5], SAMP[0], 2D 57: MAD TEMP[5].xyz, TEMP[5].xyzz, IMM[1].xxxx, IMM[1].yyyy 58: ADD TEMP[6].xyz, TEMP[3].xyzz, IMM[1].zyyy 59: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[6].xyzz 60: MOV TEMP[4].w, TEMP[5].xxxx 61: MOV TEMP[5].xy, TEMP[10].xyyy 62: TEX TEMP[5].xyz, TEMP[5], SAMP[0], 2D 63: MAD TEMP[5].xyz, TEMP[5].xyzz, IMM[1].xxxx, IMM[1].yyyy 64: ADD TEMP[6].xyz, TEMP[3].xyzz, IMM[1].yzzz 65: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[6].xyzz 66: MOV TEMP[6].xy, TEMP[9].xyyy 67: TEX TEMP[6].xyz, TEMP[6], SAMP[0], 2D 68: MAD TEMP[6].xyz, TEMP[6].xyzz, IMM[1].xxxx, IMM[1].yyyy 69: ADD TEMP[7].xyz, TEMP[3].xyzz, IMM[1].yzyy 70: DP3 TEMP[6].x, TEMP[6].xyzz, TEMP[7].xyzz 71: MOV TEMP[5].y, TEMP[6].xxxx 72: MOV TEMP[6].xy, TEMP[12].xyyy 73: TEX TEMP[6].xyz, TEMP[6], SAMP[0], 2D 74: MAD TEMP[6].xyz, TEMP[6].xyzz, IMM[1].xxxx, IMM[1].yyyy 75: ADD TEMP[7].xyz, TEMP[3].xyzz, IMM[1].yyzz 76: DP3 TEMP[6].x, TEMP[6].xyzz, TEMP[7].xyzz 77: MOV TEMP[5].z, TEMP[6].xxxx 78: MOV TEMP[6].xy, TEMP[11].xyyy 79: TEX TEMP[6].xyz, TEMP[6], SAMP[0], 2D 80: MAD TEMP[6].xyz, TEMP[6].xyzz, IMM[1].xxxx, IMM[1].yyyy 81: ADD TEMP[7].xyz, TEMP[3].xyzz, IMM[1].yyyy 82: DP3 TEMP[6].x, TEMP[6].xyzz, TEMP[7].xyzz 83: MOV TEMP[5].w, TEMP[6].xxxx 84: LRP TEMP[4], TEMP[3].xxxx, TEMP[5], TEMP[4] 85: LRP TEMP[4].xy, TEMP[3].yyyy, TEMP[4].zwww, TEMP[4].xyyy 86: MUL TEMP[5].xyz, TEMP[1].yxzz, IMM[0].xxxx 87: FLR TEMP[6].xyz, TEMP[5].xyzz 88: MAD TEMP[6].xyz, IMM[0].yyyy, TEMP[6].xyzz, IMM[0].zzzz 89: FRC TEMP[5].xyz, TEMP[5].xyzz 90: MOV TEMP[7].xy, TEMP[6].xyyy 91: TEX TEMP[7].w, TEMP[7], SAMP[0], 2D 92: MOV TEMP[8].x, TEMP[7].wwww 93: MOV TEMP[8].y, TEMP[6].zzzz 94: MOV TEMP[7].x, TEMP[7].wwww 95: ADD TEMP[9].x, TEMP[6].zzzz, IMM[0].yyyy 96: MOV TEMP[7].y, TEMP[9].xxxx 97: ADD TEMP[9].xy, TEMP[6].xyyy, IMM[0].wyyy 98: MOV TEMP[9].xy, TEMP[9].xyyy 99: TEX TEMP[9].w, TEMP[9], SAMP[0], 2D 100: MOV TEMP[10].x, TEMP[9].wwww 101: MOV TEMP[10].y, TEMP[6].zzzz 102: MOV TEMP[9].x, TEMP[9].wwww 103: ADD TEMP[11].x, TEMP[6].zzzz, IMM[0].yyyy 104: MOV TEMP[9].y, TEMP[11].xxxx 105: ADD TEMP[11].xy, TEMP[6].xyyy, IMM[0].ywww 106: MOV TEMP[11].xy, TEMP[11].xyyy 107: TEX TEMP[11].w, TEMP[11], SAMP[0], 2D 108: MOV TEMP[12].x, TEMP[11].wwww 109: MOV TEMP[12].y, TEMP[6].zzzz 110: MOV TEMP[11].x, TEMP[11].wwww 111: ADD TEMP[13].x, TEMP[6].zzzz, IMM[0].yyyy 112: MOV TEMP[11].y, TEMP[13].xxxx 113: ADD TEMP[13].xy, TEMP[6].xyyy, IMM[0].yyyy 114: MOV TEMP[13].xy, TEMP[13].xyyy 115: TEX TEMP[13].w, TEMP[13], SAMP[0], 2D 116: MOV TEMP[14].x, TEMP[13].wwww 117: MOV TEMP[14].y, TEMP[6].zzzz 118: MOV TEMP[13].x, TEMP[13].wwww 119: ADD TEMP[6].x, TEMP[6].zzzz, IMM[0].yyyy 120: MOV TEMP[13].y, TEMP[6].xxxx 121: MOV TEMP[6].xy, TEMP[8].xyyy 122: TEX TEMP[6].xyz, TEMP[6], SAMP[0], 2D 123: MAD TEMP[6].xyz, TEMP[6].xyzz, IMM[1].xxxx, IMM[1].yyyy 124: DP3 TEMP[6].x, TEMP[6].xyzz, TEMP[5].xyzz 125: MOV TEMP[7].xy, TEMP[7].xyyy 126: TEX TEMP[7].xyz, TEMP[7], SAMP[0], 2D 127: MAD TEMP[7].xyz, TEMP[7].xyzz, IMM[1].xxxx, IMM[1].yyyy 128: ADD TEMP[8].xyz, TEMP[5].xyzz, IMM[1].zzyy 129: DP3 TEMP[7].x, TEMP[7].xyzz, TEMP[8].xyzz 130: MOV TEMP[6].y, TEMP[7].xxxx 131: MOV TEMP[7].xy, TEMP[10].xyyy 132: TEX TEMP[7].xyz, TEMP[7], SAMP[0], 2D 133: MAD TEMP[7].xyz, TEMP[7].xyzz, IMM[1].xxxx, IMM[1].yyyy 134: ADD TEMP[8].xyz, TEMP[5].xyzz, IMM[1].zyzz 135: DP3 TEMP[7].x, TEMP[7].xyzz, TEMP[8].xyzz 136: MOV TEMP[6].z, TEMP[7].xxxx 137: MOV TEMP[7].xy, TEMP[9].xyyy 138: TEX TEMP[7].xyz, TEMP[7], SAMP[0], 2D 139: MAD TEMP[7].xyz, TEMP[7].xyzz, IMM[1].xxxx, IMM[1].yyyy 140: ADD TEMP[8].xyz, TEMP[5].xyzz, IMM[1].zyyy 141: DP3 TEMP[7].x, TEMP[7].xyzz, TEMP[8].xyzz 142: MOV TEMP[6].w, TEMP[7].xxxx 143: MOV TEMP[7].xy, TEMP[12].xyyy 144: TEX TEMP[7].xyz, TEMP[7], SAMP[0], 2D 145: MAD TEMP[7].xyz, TEMP[7].xyzz, IMM[1].xxxx, IMM[1].yyyy 146: ADD TEMP[8].xyz, TEMP[5].xyzz, IMM[1].yzzz 147: DP3 TEMP[7].x, TEMP[7].xyzz, TEMP[8].xyzz 148: MOV TEMP[8].xy, TEMP[11].xyyy 149: TEX TEMP[8].xyz, TEMP[8], SAMP[0], 2D 150: MAD TEMP[8].xyz, TEMP[8].xyzz, IMM[1].xxxx, IMM[1].yyyy 151: ADD TEMP[9].xyz, TEMP[5].xyzz, IMM[1].yzyy 152: DP3 TEMP[8].x, TEMP[8].xyzz, TEMP[9].xyzz 153: MOV TEMP[7].y, TEMP[8].xxxx 154: MOV TEMP[8].xy, TEMP[14].xyyy 155: TEX TEMP[8].xyz, TEMP[8], SAMP[0], 2D 156: MAD TEMP[8].xyz, TEMP[8].xyzz, IMM[1].xxxx, IMM[1].yyyy 157: ADD TEMP[9].xyz, TEMP[5].xyzz, IMM[1].yyzz 158: DP3 TEMP[8].x, TEMP[8].xyzz, TEMP[9].xyzz 159: MOV TEMP[7].z, TEMP[8].xxxx 160: MOV TEMP[8].xy, TEMP[13].xyyy 161: TEX TEMP[8].xyz, TEMP[8], SAMP[0], 2D 162: MAD TEMP[8].xyz, TEMP[8].xyzz, IMM[1].xxxx, IMM[1].yyyy 163: ADD TEMP[9].xyz, TEMP[5].xyzz, IMM[1].yyyy 164: DP3 TEMP[8].x, TEMP[8].xyzz, TEMP[9].xyzz 165: MOV TEMP[7].w, TEMP[8].xxxx 166: LRP TEMP[6], TEMP[5].xxxx, TEMP[7], TEMP[6] 167: LRP TEMP[6].xy, TEMP[5].yyyy, TEMP[6].zwww, TEMP[6].xyyy 168: LRP TEMP[3].x, TEMP[3].zzzz, TEMP[4].yyyy, TEMP[4].xxxx 169: LRP TEMP[4].x, TEMP[5].zzzz, TEMP[6].yyyy, TEMP[6].xxxx 170: MOV TEMP[3].y, TEMP[4].xxxx 171: ADD TEMP[2].xy, TEMP[1].xyyy, TEMP[3].xyyy 172: ADD TEMP[1].xyz, TEMP[2].xyzz, IMM[1].wwww 173: FLR TEMP[3].xyz, TEMP[1].xyzz 174: MAD TEMP[3].xyz, IMM[0].yyyy, TEMP[3].xyzz, IMM[0].zzzz 175: FRC TEMP[1].xyz, TEMP[1].xyzz 176: MOV TEMP[4].xy, TEMP[3].xyyy 177: TEX TEMP[4].w, TEMP[4], SAMP[0], 2D 178: MOV TEMP[5].x, TEMP[4].wwww 179: MOV TEMP[5].y, TEMP[3].zzzz 180: MOV TEMP[4].x, TEMP[4].wwww 181: ADD TEMP[6].x, TEMP[3].zzzz, IMM[0].yyyy 182: MOV TEMP[4].y, TEMP[6].xxxx 183: ADD TEMP[6].xy, TEMP[3].xyyy, IMM[0].wyyy 184: MOV TEMP[6].xy, TEMP[6].xyyy 185: TEX TEMP[6].w, TEMP[6], SAMP[0], 2D 186: MOV TEMP[7].x, TEMP[6].wwww 187: MOV TEMP[7].y, TEMP[3].zzzz 188: MOV TEMP[6].x, TEMP[6].wwww 189: ADD TEMP[8].x, TEMP[3].zzzz, IMM[0].yyyy 190: MOV TEMP[6].y, TEMP[8].xxxx 191: ADD TEMP[8].xy, TEMP[3].xyyy, IMM[0].ywww 192: MOV TEMP[8].xy, TEMP[8].xyyy 193: TEX TEMP[8].w, TEMP[8], SAMP[0], 2D 194: MOV TEMP[9].x, TEMP[8].wwww 195: MOV TEMP[9].y, TEMP[3].zzzz 196: MOV TEMP[8].x, TEMP[8].wwww 197: ADD TEMP[10].x, TEMP[3].zzzz, IMM[0].yyyy 198: MOV TEMP[8].y, TEMP[10].xxxx 199: ADD TEMP[10].xy, TEMP[3].xyyy, IMM[0].yyyy 200: MOV TEMP[10].xy, TEMP[10].xyyy 201: TEX TEMP[10].w, TEMP[10], SAMP[0], 2D 202: MOV TEMP[11].x, TEMP[10].wwww 203: MOV TEMP[11].y, TEMP[3].zzzz 204: MOV TEMP[10].x, TEMP[10].wwww 205: ADD TEMP[3].x, TEMP[3].zzzz, IMM[0].yyyy 206: MOV TEMP[10].y, TEMP[3].xxxx 207: MOV TEMP[3].xy, TEMP[5].xyyy 208: TEX TEMP[3].xyz, TEMP[3], SAMP[0], 2D 209: MAD TEMP[3].xyz, TEMP[3].xyzz, IMM[1].xxxx, IMM[1].yyyy 210: DP3 TEMP[3].x, TEMP[3].xyzz, TEMP[1].xyzz 211: MOV TEMP[4].xy, TEMP[4].xyyy 212: TEX TEMP[4].xyz, TEMP[4], SAMP[0], 2D 213: MAD TEMP[4].xyz, TEMP[4].xyzz, IMM[1].xxxx, IMM[1].yyyy 214: ADD TEMP[5].xyz, TEMP[1].xyzz, IMM[1].zzyy 215: DP3 TEMP[4].x, TEMP[4].xyzz, TEMP[5].xyzz 216: MOV TEMP[3].y, TEMP[4].xxxx 217: MOV TEMP[4].xy, TEMP[7].xyyy 218: TEX TEMP[4].xyz, TEMP[4], SAMP[0], 2D 219: MAD TEMP[4].xyz, TEMP[4].xyzz, IMM[1].xxxx, IMM[1].yyyy 220: ADD TEMP[5].xyz, TEMP[1].xyzz, IMM[1].zyzz 221: DP3 TEMP[4].x, TEMP[4].xyzz, TEMP[5].xyzz 222: MOV TEMP[3].z, TEMP[4].xxxx 223: MOV TEMP[4].xy, TEMP[6].xyyy 224: TEX TEMP[4].xyz, TEMP[4], SAMP[0], 2D 225: MAD TEMP[4].xyz, TEMP[4].xyzz, IMM[1].xxxx, IMM[1].yyyy 226: ADD TEMP[5].xyz, TEMP[1].xyzz, IMM[1].zyyy 227: DP3 TEMP[4].x, TEMP[4].xyzz, TEMP[5].xyzz 228: MOV TEMP[3].w, TEMP[4].xxxx 229: MOV TEMP[4].xy, TEMP[9].xyyy 230: TEX TEMP[4].xyz, TEMP[4], SAMP[0], 2D 231: MAD TEMP[4].xyz, TEMP[4].xyzz, IMM[1].xxxx, IMM[1].yyyy 232: ADD TEMP[5].xyz, TEMP[1].xyzz, IMM[1].yzzz 233: DP3 TEMP[4].x, TEMP[4].xyzz, TEMP[5].xyzz 234: MOV TEMP[5].xy, TEMP[8].xyyy 235: TEX TEMP[5].xyz, TEMP[5], SAMP[0], 2D 236: MAD TEMP[5].xyz, TEMP[5].xyzz, IMM[1].xxxx, IMM[1].yyyy 237: ADD TEMP[6].xyz, TEMP[1].xyzz, IMM[1].yzyy 238: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[6].xyzz 239: MOV TEMP[4].y, TEMP[5].xxxx 240: MOV TEMP[5].xy, TEMP[11].xyyy 241: TEX TEMP[5].xyz, TEMP[5], SAMP[0], 2D 242: MAD TEMP[5].xyz, TEMP[5].xyzz, IMM[1].xxxx, IMM[1].yyyy 243: ADD TEMP[6].xyz, TEMP[1].xyzz, IMM[1].yyzz 244: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[6].xyzz 245: MOV TEMP[4].z, TEMP[5].xxxx 246: MOV TEMP[5].xy, TEMP[10].xyyy 247: TEX TEMP[5].xyz, TEMP[5], SAMP[0], 2D 248: MAD TEMP[5].xyz, TEMP[5].xyzz, IMM[1].xxxx, IMM[1].yyyy 249: ADD TEMP[6].xyz, TEMP[1].xyzz, IMM[1].yyyy 250: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[6].xyzz 251: MOV TEMP[4].w, TEMP[5].xxxx 252: LRP TEMP[3], TEMP[1].xxxx, TEMP[4], TEMP[3] 253: LRP TEMP[3].xy, TEMP[1].yyyy, TEMP[3].zwww, TEMP[3].xyyy 254: LRP TEMP[1].x, TEMP[1].zzzz, TEMP[3].yyyy, TEMP[3].xxxx 255: MAD TEMP[1].x, TEMP[1].xxxx, IMM[2].xxxx, IMM[2].yyyy 256: MAD TEMP[3].xyz, TEMP[1].xxxx, IMM[2].zzzz, TEMP[2].xyzz 257: MUL TEMP[3].xyz, TEMP[3].xyzz, IMM[2].wwww 258: FLR TEMP[4].xyz, TEMP[3].xyzz 259: MAD TEMP[4].xyz, IMM[0].yyyy, TEMP[4].xyzz, IMM[0].zzzz 260: FRC TEMP[3].xyz, TEMP[3].xyzz 261: MOV TEMP[5].xy, TEMP[4].xyyy 262: TEX TEMP[5].w, TEMP[5], SAMP[0], 2D 263: MOV TEMP[6].x, TEMP[5].wwww 264: MOV TEMP[6].y, TEMP[4].zzzz 265: MOV TEMP[5].x, TEMP[5].wwww 266: ADD TEMP[7].x, TEMP[4].zzzz, IMM[0].yyyy 267: MOV TEMP[5].y, TEMP[7].xxxx 268: ADD TEMP[7].xy, TEMP[4].xyyy, IMM[0].wyyy 269: MOV TEMP[7].xy, TEMP[7].xyyy 270: TEX TEMP[7].w, TEMP[7], SAMP[0], 2D 271: MOV TEMP[8].x, TEMP[7].wwww 272: MOV TEMP[8].y, TEMP[4].zzzz 273: MOV TEMP[7].x, TEMP[7].wwww 274: ADD TEMP[9].x, TEMP[4].zzzz, IMM[0].yyyy 275: MOV TEMP[7].y, TEMP[9].xxxx 276: ADD TEMP[9].xy, TEMP[4].xyyy, IMM[0].ywww 277: MOV TEMP[9].xy, TEMP[9].xyyy 278: TEX TEMP[9].w, TEMP[9], SAMP[0], 2D 279: MOV TEMP[10].x, TEMP[9].wwww 280: MOV TEMP[10].y, TEMP[4].zzzz 281: MOV TEMP[9].x, TEMP[9].wwww 282: ADD TEMP[11].x, TEMP[4].zzzz, IMM[0].yyyy 283: MOV TEMP[9].y, TEMP[11].xxxx 284: ADD TEMP[11].xy, TEMP[4].xyyy, IMM[0].yyyy 285: MOV TEMP[11].xy, TEMP[11].xyyy 286: TEX TEMP[11].w, TEMP[11], SAMP[0], 2D 287: MOV TEMP[12].x, TEMP[11].wwww 288: MOV TEMP[12].y, TEMP[4].zzzz 289: MOV TEMP[11].x, TEMP[11].wwww 290: ADD TEMP[4].x, TEMP[4].zzzz, IMM[0].yyyy 291: MOV TEMP[11].y, TEMP[4].xxxx 292: MOV TEMP[4].xy, TEMP[6].xyyy 293: TEX TEMP[4].xyz, TEMP[4], SAMP[0], 2D 294: MAD TEMP[4].xyz, TEMP[4].xyzz, IMM[1].xxxx, IMM[1].yyyy 295: DP3 TEMP[4].x, TEMP[4].xyzz, TEMP[3].xyzz 296: MOV TEMP[5].xy, TEMP[5].xyyy 297: TEX TEMP[5].xyz, TEMP[5], SAMP[0], 2D 298: MAD TEMP[5].xyz, TEMP[5].xyzz, IMM[1].xxxx, IMM[1].yyyy 299: ADD TEMP[6].xyz, TEMP[3].xyzz, IMM[1].zzyy 300: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[6].xyzz 301: MOV TEMP[4].y, TEMP[5].xxxx 302: MOV TEMP[5].xy, TEMP[8].xyyy 303: TEX TEMP[5].xyz, TEMP[5], SAMP[0], 2D 304: MAD TEMP[5].xyz, TEMP[5].xyzz, IMM[1].xxxx, IMM[1].yyyy 305: ADD TEMP[6].xyz, TEMP[3].xyzz, IMM[1].zyzz 306: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[6].xyzz 307: MOV TEMP[4].z, TEMP[5].xxxx 308: MOV TEMP[5].xy, TEMP[7].xyyy 309: TEX TEMP[5].xyz, TEMP[5], SAMP[0], 2D 310: MAD TEMP[5].xyz, TEMP[5].xyzz, IMM[1].xxxx, IMM[1].yyyy 311: ADD TEMP[6].xyz, TEMP[3].xyzz, IMM[1].zyyy 312: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[6].xyzz 313: MOV TEMP[4].w, TEMP[5].xxxx 314: MOV TEMP[5].xy, TEMP[10].xyyy 315: TEX TEMP[5].xyz, TEMP[5], SAMP[0], 2D 316: MAD TEMP[5].xyz, TEMP[5].xyzz, IMM[1].xxxx, IMM[1].yyyy 317: ADD TEMP[6].xyz, TEMP[3].xyzz, IMM[1].yzzz 318: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[6].xyzz 319: MOV TEMP[6].xy, TEMP[9].xyyy 320: TEX TEMP[6].xyz, TEMP[6], SAMP[0], 2D 321: MAD TEMP[6].xyz, TEMP[6].xyzz, IMM[1].xxxx, IMM[1].yyyy 322: ADD TEMP[7].xyz, TEMP[3].xyzz, IMM[1].yzyy 323: DP3 TEMP[6].x, TEMP[6].xyzz, TEMP[7].xyzz 324: MOV TEMP[5].y, TEMP[6].xxxx 325: MOV TEMP[6].xy, TEMP[12].xyyy 326: TEX TEMP[6].xyz, TEMP[6], SAMP[0], 2D 327: MAD TEMP[6].xyz, TEMP[6].xyzz, IMM[1].xxxx, IMM[1].yyyy 328: ADD TEMP[7].xyz, TEMP[3].xyzz, IMM[1].yyzz 329: DP3 TEMP[6].x, TEMP[6].xyzz, TEMP[7].xyzz 330: MOV TEMP[5].z, TEMP[6].xxxx 331: MOV TEMP[6].xy, TEMP[11].xyyy 332: TEX TEMP[6].xyz, TEMP[6], SAMP[0], 2D 333: MAD TEMP[6].xyz, TEMP[6].xyzz, IMM[1].xxxx, IMM[1].yyyy 334: ADD TEMP[7].xyz, TEMP[3].xyzz, IMM[1].yyyy 335: DP3 TEMP[6].x, TEMP[6].xyzz, TEMP[7].xyzz 336: MOV TEMP[5].w, TEMP[6].xxxx 337: LRP TEMP[4], TEMP[3].xxxx, TEMP[5], TEMP[4] 338: LRP TEMP[4].xy, TEMP[3].yyyy, TEMP[4].zwww, TEMP[4].xyyy 339: LRP TEMP[3].x, TEMP[3].zzzz, TEMP[4].yyyy, TEMP[4].xxxx 340: ABS TEMP[3].x, TEMP[3].xxxx 341: MAD TEMP[1].x, TEMP[3].xxxx, IMM[3].xxxx, TEMP[1].xxxx 342: MAD TEMP[3].xyz, TEMP[1].xxxx, IMM[3].yyyy, TEMP[2].xyzz 343: MUL TEMP[3].xyz, TEMP[3].xyzz, CONST[13].xxxx 344: FLR TEMP[4].xyz, TEMP[3].xyzz 345: MAD TEMP[4].xyz, IMM[0].yyyy, TEMP[4].xyzz, IMM[0].zzzz 346: FRC TEMP[3].xyz, TEMP[3].xyzz 347: MOV TEMP[5].xy, TEMP[4].xyyy 348: TEX TEMP[5].w, TEMP[5], SAMP[0], 2D 349: MOV TEMP[6].x, TEMP[5].wwww 350: MOV TEMP[6].y, TEMP[4].zzzz 351: MOV TEMP[5].x, TEMP[5].wwww 352: ADD TEMP[7].x, TEMP[4].zzzz, IMM[0].yyyy 353: MOV TEMP[5].y, TEMP[7].xxxx 354: ADD TEMP[7].xy, TEMP[4].xyyy, IMM[0].wyyy 355: MOV TEMP[7].xy, TEMP[7].xyyy 356: TEX TEMP[7].w, TEMP[7], SAMP[0], 2D 357: MOV TEMP[8].x, TEMP[7].wwww 358: MOV TEMP[8].y, TEMP[4].zzzz 359: MOV TEMP[7].x, TEMP[7].wwww 360: ADD TEMP[9].x, TEMP[4].zzzz, IMM[0].yyyy 361: MOV TEMP[7].y, TEMP[9].xxxx 362: ADD TEMP[9].xy, TEMP[4].xyyy, IMM[0].ywww 363: MOV TEMP[9].xy, TEMP[9].xyyy 364: TEX TEMP[9].w, TEMP[9], SAMP[0], 2D 365: MOV TEMP[10].x, TEMP[9].wwww 366: MOV TEMP[10].y, TEMP[4].zzzz 367: MOV TEMP[9].x, TEMP[9].wwww 368: ADD TEMP[11].x, TEMP[4].zzzz, IMM[0].yyyy 369: MOV TEMP[9].y, TEMP[11].xxxx 370: ADD TEMP[11].xy, TEMP[4].xyyy, IMM[0].yyyy 371: MOV TEMP[11].xy, TEMP[11].xyyy 372: TEX TEMP[11].w, TEMP[11], SAMP[0], 2D 373: MOV TEMP[12].x, TEMP[11].wwww 374: MOV TEMP[12].y, TEMP[4].zzzz 375: MOV TEMP[11].x, TEMP[11].wwww 376: ADD TEMP[4].x, TEMP[4].zzzz, IMM[0].yyyy 377: MOV TEMP[11].y, TEMP[4].xxxx 378: MOV TEMP[4].xy, TEMP[6].xyyy 379: TEX TEMP[4].xyz, TEMP[4], SAMP[0], 2D 380: MAD TEMP[4].xyz, TEMP[4].xyzz, IMM[1].xxxx, IMM[1].yyyy 381: DP3 TEMP[4].x, TEMP[4].xyzz, TEMP[3].xyzz 382: MOV TEMP[5].xy, TEMP[5].xyyy 383: TEX TEMP[5].xyz, TEMP[5], SAMP[0], 2D 384: MAD TEMP[5].xyz, TEMP[5].xyzz, IMM[1].xxxx, IMM[1].yyyy 385: ADD TEMP[6].xyz, TEMP[3].xyzz, IMM[1].zzyy 386: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[6].xyzz 387: MOV TEMP[4].y, TEMP[5].xxxx 388: MOV TEMP[5].xy, TEMP[8].xyyy 389: TEX TEMP[5].xyz, TEMP[5], SAMP[0], 2D 390: MAD TEMP[5].xyz, TEMP[5].xyzz, IMM[1].xxxx, IMM[1].yyyy 391: ADD TEMP[6].xyz, TEMP[3].xyzz, IMM[1].zyzz 392: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[6].xyzz 393: MOV TEMP[4].z, TEMP[5].xxxx 394: MOV TEMP[5].xy, TEMP[7].xyyy 395: TEX TEMP[5].xyz, TEMP[5], SAMP[0], 2D 396: MAD TEMP[5].xyz, TEMP[5].xyzz, IMM[1].xxxx, IMM[1].yyyy 397: ADD TEMP[6].xyz, TEMP[3].xyzz, IMM[1].zyyy 398: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[6].xyzz 399: MOV TEMP[4].w, TEMP[5].xxxx 400: MOV TEMP[5].xy, TEMP[10].xyyy 401: TEX TEMP[5].xyz, TEMP[5], SAMP[0], 2D 402: MAD TEMP[5].xyz, TEMP[5].xyzz, IMM[1].xxxx, IMM[1].yyyy 403: ADD TEMP[6].xyz, TEMP[3].xyzz, IMM[1].yzzz 404: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[6].xyzz 405: MOV TEMP[6].xy, TEMP[9].xyyy 406: TEX TEMP[6].xyz, TEMP[6], SAMP[0], 2D 407: MAD TEMP[6].xyz, TEMP[6].xyzz, IMM[1].xxxx, IMM[1].yyyy 408: ADD TEMP[7].xyz, TEMP[3].xyzz, IMM[1].yzyy 409: DP3 TEMP[6].x, TEMP[6].xyzz, TEMP[7].xyzz 410: MOV TEMP[5].y, TEMP[6].xxxx 411: MOV TEMP[6].xy, TEMP[12].xyyy 412: TEX TEMP[6].xyz, TEMP[6], SAMP[0], 2D 413: MAD TEMP[6].xyz, TEMP[6].xyzz, IMM[1].xxxx, IMM[1].yyyy 414: ADD TEMP[7].xyz, TEMP[3].xyzz, IMM[1].yyzz 415: DP3 TEMP[6].x, TEMP[6].xyzz, TEMP[7].xyzz 416: MOV TEMP[5].z, TEMP[6].xxxx 417: MOV TEMP[6].xy, TEMP[11].xyyy 418: TEX TEMP[6].xyz, TEMP[6], SAMP[0], 2D 419: MAD TEMP[6].xyz, TEMP[6].xyzz, IMM[1].xxxx, IMM[1].yyyy 420: ADD TEMP[7].xyz, TEMP[3].xyzz, IMM[1].yyyy 421: DP3 TEMP[6].x, TEMP[6].xyzz, TEMP[7].xyzz 422: MOV TEMP[5].w, TEMP[6].xxxx 423: LRP TEMP[4], TEMP[3].xxxx, TEMP[5], TEMP[4] 424: LRP TEMP[4].xy, TEMP[3].yyyy, TEMP[4].zwww, TEMP[4].xyyy 425: LRP TEMP[3].x, TEMP[3].zzzz, TEMP[4].yyyy, TEMP[4].xxxx 426: ABS TEMP[3].x, TEMP[3].xxxx 427: RCP TEMP[4].x, CONST[14].xxxx 428: MAD TEMP[1].x, TEMP[3].xxxx, TEMP[4].xxxx, TEMP[1].xxxx 429: MAD TEMP[3].xyz, TEMP[1].xxxx, IMM[3].zzzz, TEMP[2].xyzz 430: MUL TEMP[3].xyz, TEMP[3].xyzz, CONST[13].yyyy 431: FLR TEMP[4].xyz, TEMP[3].xyzz 432: MAD TEMP[4].xyz, IMM[0].yyyy, TEMP[4].xyzz, IMM[0].zzzz 433: FRC TEMP[3].xyz, TEMP[3].xyzz 434: MOV TEMP[5].xy, TEMP[4].xyyy 435: TEX TEMP[5].w, TEMP[5], SAMP[0], 2D 436: MOV TEMP[6].x, TEMP[5].wwww 437: MOV TEMP[6].y, TEMP[4].zzzz 438: MOV TEMP[5].x, TEMP[5].wwww 439: ADD TEMP[7].x, TEMP[4].zzzz, IMM[0].yyyy 440: MOV TEMP[5].y, TEMP[7].xxxx 441: ADD TEMP[7].xy, TEMP[4].xyyy, IMM[0].wyyy 442: MOV TEMP[7].xy, TEMP[7].xyyy 443: TEX TEMP[7].w, TEMP[7], SAMP[0], 2D 444: MOV TEMP[8].x, TEMP[7].wwww 445: MOV TEMP[8].y, TEMP[4].zzzz 446: MOV TEMP[7].x, TEMP[7].wwww 447: ADD TEMP[9].x, TEMP[4].zzzz, IMM[0].yyyy 448: MOV TEMP[7].y, TEMP[9].xxxx 449: ADD TEMP[9].xy, TEMP[4].xyyy, IMM[0].ywww 450: MOV TEMP[9].xy, TEMP[9].xyyy 451: TEX TEMP[9].w, TEMP[9], SAMP[0], 2D 452: MOV TEMP[10].x, TEMP[9].wwww 453: MOV TEMP[10].y, TEMP[4].zzzz 454: MOV TEMP[9].x, TEMP[9].wwww 455: ADD TEMP[11].x, TEMP[4].zzzz, IMM[0].yyyy 456: MOV TEMP[9].y, TEMP[11].xxxx 457: ADD TEMP[11].xy, TEMP[4].xyyy, IMM[0].yyyy 458: MOV TEMP[11].xy, TEMP[11].xyyy 459: TEX TEMP[11].w, TEMP[11], SAMP[0], 2D 460: MOV TEMP[12].x, TEMP[11].wwww 461: MOV TEMP[12].y, TEMP[4].zzzz 462: MOV TEMP[11].x, TEMP[11].wwww 463: ADD TEMP[4].x, TEMP[4].zzzz, IMM[0].yyyy 464: MOV TEMP[11].y, TEMP[4].xxxx 465: MOV TEMP[4].xy, TEMP[6].xyyy 466: TEX TEMP[4].xyz, TEMP[4], SAMP[0], 2D 467: MAD TEMP[4].xyz, TEMP[4].xyzz, IMM[1].xxxx, IMM[1].yyyy 468: DP3 TEMP[4].x, TEMP[4].xyzz, TEMP[3].xyzz 469: MOV TEMP[5].xy, TEMP[5].xyyy 470: TEX TEMP[5].xyz, TEMP[5], SAMP[0], 2D 471: MAD TEMP[5].xyz, TEMP[5].xyzz, IMM[1].xxxx, IMM[1].yyyy 472: ADD TEMP[6].xyz, TEMP[3].xyzz, IMM[1].zzyy 473: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[6].xyzz 474: MOV TEMP[4].y, TEMP[5].xxxx 475: MOV TEMP[5].xy, TEMP[8].xyyy 476: TEX TEMP[5].xyz, TEMP[5], SAMP[0], 2D 477: MAD TEMP[5].xyz, TEMP[5].xyzz, IMM[1].xxxx, IMM[1].yyyy 478: ADD TEMP[6].xyz, TEMP[3].xyzz, IMM[1].zyzz 479: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[6].xyzz 480: MOV TEMP[4].z, TEMP[5].xxxx 481: MOV TEMP[5].xy, TEMP[7].xyyy 482: TEX TEMP[5].xyz, TEMP[5], SAMP[0], 2D 483: MAD TEMP[5].xyz, TEMP[5].xyzz, IMM[1].xxxx, IMM[1].yyyy 484: ADD TEMP[6].xyz, TEMP[3].xyzz, IMM[1].zyyy 485: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[6].xyzz 486: MOV TEMP[4].w, TEMP[5].xxxx 487: MOV TEMP[5].xy, TEMP[10].xyyy 488: TEX TEMP[5].xyz, TEMP[5], SAMP[0], 2D 489: MAD TEMP[5].xyz, TEMP[5].xyzz, IMM[1].xxxx, IMM[1].yyyy 490: ADD TEMP[6].xyz, TEMP[3].xyzz, IMM[1].yzzz 491: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[6].xyzz 492: MOV TEMP[6].xy, TEMP[9].xyyy 493: TEX TEMP[6].xyz, TEMP[6], SAMP[0], 2D 494: MAD TEMP[6].xyz, TEMP[6].xyzz, IMM[1].xxxx, IMM[1].yyyy 495: ADD TEMP[7].xyz, TEMP[3].xyzz, IMM[1].yzyy 496: DP3 TEMP[6].x, TEMP[6].xyzz, TEMP[7].xyzz 497: MOV TEMP[5].y, TEMP[6].xxxx 498: MOV TEMP[6].xy, TEMP[12].xyyy 499: TEX TEMP[6].xyz, TEMP[6], SAMP[0], 2D 500: MAD TEMP[6].xyz, TEMP[6].xyzz, IMM[1].xxxx, IMM[1].yyyy 501: ADD TEMP[7].xyz, TEMP[3].xyzz, IMM[1].yyzz 502: DP3 TEMP[6].x, TEMP[6].xyzz, TEMP[7].xyzz 503: MOV TEMP[5].z, TEMP[6].xxxx 504: MOV TEMP[6].xy, TEMP[11].xyyy 505: TEX TEMP[6].xyz, TEMP[6], SAMP[0], 2D 506: MAD TEMP[6].xyz, TEMP[6].xyzz, IMM[1].xxxx, IMM[1].yyyy 507: ADD TEMP[7].xyz, TEMP[3].xyzz, IMM[1].yyyy 508: DP3 TEMP[6].x, TEMP[6].xyzz, TEMP[7].xyzz 509: MOV TEMP[5].w, TEMP[6].xxxx 510: LRP TEMP[4], TEMP[3].xxxx, TEMP[5], TEMP[4] 511: LRP TEMP[4].xy, TEMP[3].yyyy, TEMP[4].zwww, TEMP[4].xyyy 512: LRP TEMP[3].x, TEMP[3].zzzz, TEMP[4].yyyy, TEMP[4].xxxx 513: ABS TEMP[3].x, TEMP[3].xxxx 514: RCP TEMP[4].x, CONST[14].yyyy 515: MAD TEMP[1].x, TEMP[3].xxxx, TEMP[4].xxxx, TEMP[1].xxxx 516: MAD TEMP[2].xyz, TEMP[1].xxxx, IMM[3].wwww, TEMP[2].xyzz 517: MUL TEMP[2].xyz, TEMP[2].xyzz, CONST[13].zzzz 518: FLR TEMP[3].xyz, TEMP[2].xyzz 519: MAD TEMP[3].xyz, IMM[0].yyyy, TEMP[3].xyzz, IMM[0].zzzz 520: FRC TEMP[2].xyz, TEMP[2].xyzz 521: MOV TEMP[4].xy, TEMP[3].xyyy 522: TEX TEMP[4].w, TEMP[4], SAMP[0], 2D 523: MOV TEMP[5].x, TEMP[4].wwww 524: MOV TEMP[5].y, TEMP[3].zzzz 525: MOV TEMP[4].x, TEMP[4].wwww 526: ADD TEMP[6].x, TEMP[3].zzzz, IMM[0].yyyy 527: MOV TEMP[4].y, TEMP[6].xxxx 528: ADD TEMP[6].xy, TEMP[3].xyyy, IMM[0].wyyy 529: MOV TEMP[6].xy, TEMP[6].xyyy 530: TEX TEMP[6].w, TEMP[6], SAMP[0], 2D 531: MOV TEMP[7].x, TEMP[6].wwww 532: MOV TEMP[7].y, TEMP[3].zzzz 533: MOV TEMP[6].x, TEMP[6].wwww 534: ADD TEMP[8].x, TEMP[3].zzzz, IMM[0].yyyy 535: MOV TEMP[6].y, TEMP[8].xxxx 536: ADD TEMP[8].xy, TEMP[3].xyyy, IMM[0].ywww 537: MOV TEMP[8].xy, TEMP[8].xyyy 538: TEX TEMP[8].w, TEMP[8], SAMP[0], 2D 539: MOV TEMP[9].x, TEMP[8].wwww 540: MOV TEMP[9].y, TEMP[3].zzzz 541: MOV TEMP[8].x, TEMP[8].wwww 542: ADD TEMP[10].x, TEMP[3].zzzz, IMM[0].yyyy 543: MOV TEMP[8].y, TEMP[10].xxxx 544: ADD TEMP[10].xy, TEMP[3].xyyy, IMM[0].yyyy 545: MOV TEMP[10].xy, TEMP[10].xyyy 546: TEX TEMP[10].w, TEMP[10], SAMP[0], 2D 547: MOV TEMP[11].x, TEMP[10].wwww 548: MOV TEMP[11].y, TEMP[3].zzzz 549: MOV TEMP[10].x, TEMP[10].wwww 550: ADD TEMP[3].x, TEMP[3].zzzz, IMM[0].yyyy 551: MOV TEMP[10].y, TEMP[3].xxxx 552: MOV TEMP[3].xy, TEMP[5].xyyy 553: TEX TEMP[3].xyz, TEMP[3], SAMP[0], 2D 554: MAD TEMP[3].xyz, TEMP[3].xyzz, IMM[1].xxxx, IMM[1].yyyy 555: DP3 TEMP[3].x, TEMP[3].xyzz, TEMP[2].xyzz 556: MOV TEMP[4].xy, TEMP[4].xyyy 557: TEX TEMP[4].xyz, TEMP[4], SAMP[0], 2D 558: MAD TEMP[4].xyz, TEMP[4].xyzz, IMM[1].xxxx, IMM[1].yyyy 559: ADD TEMP[5].xyz, TEMP[2].xyzz, IMM[1].zzyy 560: DP3 TEMP[4].x, TEMP[4].xyzz, TEMP[5].xyzz 561: MOV TEMP[3].y, TEMP[4].xxxx 562: MOV TEMP[4].xy, TEMP[7].xyyy 563: TEX TEMP[4].xyz, TEMP[4], SAMP[0], 2D 564: MAD TEMP[4].xyz, TEMP[4].xyzz, IMM[1].xxxx, IMM[1].yyyy 565: ADD TEMP[5].xyz, TEMP[2].xyzz, IMM[1].zyzz 566: DP3 TEMP[4].x, TEMP[4].xyzz, TEMP[5].xyzz 567: MOV TEMP[3].z, TEMP[4].xxxx 568: MOV TEMP[4].xy, TEMP[6].xyyy 569: TEX TEMP[4].xyz, TEMP[4], SAMP[0], 2D 570: MAD TEMP[4].xyz, TEMP[4].xyzz, IMM[1].xxxx, IMM[1].yyyy 571: ADD TEMP[5].xyz, TEMP[2].xyzz, IMM[1].zyyy 572: DP3 TEMP[4].x, TEMP[4].xyzz, TEMP[5].xyzz 573: MOV TEMP[3].w, TEMP[4].xxxx 574: MOV TEMP[4].xy, TEMP[9].xyyy 575: TEX TEMP[4].xyz, TEMP[4], SAMP[0], 2D 576: MAD TEMP[4].xyz, TEMP[4].xyzz, IMM[1].xxxx, IMM[1].yyyy 577: ADD TEMP[5].xyz, TEMP[2].xyzz, IMM[1].yzzz 578: DP3 TEMP[4].x, TEMP[4].xyzz, TEMP[5].xyzz 579: MOV TEMP[5].xy, TEMP[8].xyyy 580: TEX TEMP[5].xyz, TEMP[5], SAMP[0], 2D 581: MAD TEMP[5].xyz, TEMP[5].xyzz, IMM[1].xxxx, IMM[1].yyyy 582: ADD TEMP[6].xyz, TEMP[2].xyzz, IMM[1].yzyy 583: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[6].xyzz 584: MOV TEMP[4].y, TEMP[5].xxxx 585: MOV TEMP[5].xy, TEMP[11].xyyy 586: TEX TEMP[5].xyz, TEMP[5], SAMP[0], 2D 587: MAD TEMP[5].xyz, TEMP[5].xyzz, IMM[1].xxxx, IMM[1].yyyy 588: ADD TEMP[6].xyz, TEMP[2].xyzz, IMM[1].yyzz 589: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[6].xyzz 590: MOV TEMP[4].z, TEMP[5].xxxx 591: MOV TEMP[5].xy, TEMP[10].xyyy 592: TEX TEMP[5].xyz, TEMP[5], SAMP[0], 2D 593: MAD TEMP[5].xyz, TEMP[5].xyzz, IMM[1].xxxx, IMM[1].yyyy 594: ADD TEMP[6].xyz, TEMP[2].xyzz, IMM[1].yyyy 595: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[6].xyzz 596: MOV TEMP[4].w, TEMP[5].xxxx 597: LRP TEMP[3], TEMP[2].xxxx, TEMP[4], TEMP[3] 598: LRP TEMP[3].xy, TEMP[2].yyyy, TEMP[3].zwww, TEMP[3].xyyy 599: LRP TEMP[2].x, TEMP[2].zzzz, TEMP[3].yyyy, TEMP[3].xxxx 600: ABS TEMP[2].x, TEMP[2].xxxx 601: RCP TEMP[3].x, CONST[14].zzzz 602: MAD TEMP[1].x, TEMP[2].xxxx, TEMP[3].xxxx, TEMP[1].xxxx 603: ADD TEMP[1].x, TEMP[1].xxxx, -CONST[3].xxxx 604: ADD TEMP[2].xyz, TEMP[0].xyzz, -CONST[0].xyzz 605: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz 606: RSQ TEMP[3].x, TEMP[3].xxxx 607: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx 608: MUL TEMP[2].xyz, TEMP[2].xyzz, IMM[2].xxxx 609: DP3 TEMP[3].x, CONST[9].xyzz, CONST[9].xyzz 610: RSQ TEMP[3].x, TEMP[3].xxxx 611: MUL TEMP[3].xyz, CONST[9].xyzz, TEMP[3].xxxx 612: DP3 TEMP[2].x, TEMP[2].xyzz, TEMP[3].xyzz 613: MAD TEMP[2].x, TEMP[2].xxxx, IMM[4].xxxx, IMM[4].xxxx 614: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[2].xxxx 615: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[2].xxxx 616: ADD TEMP[3].x, TEMP[1].xxxx, IMM[4].yyyy 617: ABS TEMP[3].x, TEMP[3].xxxx 618: ADD TEMP[3].x, IMM[2].yyyy, -TEMP[3].xxxx 619: MOV_SAT TEMP[3].x, TEMP[3].xxxx 620: MOV_SAT TEMP[4].x, TEMP[2].xxxx 621: LRP TEMP[4], TEMP[4].xxxx, CONST[5], CONST[7] 622: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[3].xxxx 623: MAX TEMP[3].x, TEMP[3].xxxx, IMM[0].wwww 624: MOV_SAT TEMP[2].x, TEMP[2].xxxx 625: LRP TEMP[2], TEMP[2].xxxx, CONST[6], CONST[8] 626: MAD TEMP[2].xyz, TEMP[4], TEMP[3].xxxx, TEMP[2] 627: MUL TEMP[3].x, TEMP[2].xxxx, IMM[5].xxxx 628: MAD TEMP[3].x, TEMP[2].yyyy, IMM[4].wwww, TEMP[3].xxxx 629: MAD TEMP[3].x, TEMP[2].zzzz, IMM[4].zzzz, TEMP[3].xxxx 630: POW TEMP[4].x, CONST[10].xxxx, IMM[5].yyyy 631: LRP TEMP[0].xyz, TEMP[4].xxxx, TEMP[2].xyzz, TEMP[3].xxxx 632: MUL TEMP[0].xyz, TEMP[0].xyzz, CONST[10].xxxx 633: MUL TEMP[1].x, TEMP[1].xxxx, CONST[4].xxxx 634: ADD TEMP[2].xy, IN[0].xyyy, IMM[5].zzzz 635: DP2 TEMP[2].x, TEMP[2].xyyy, TEMP[2].xyyy 636: SQRT TEMP[2].x, TEMP[2].xxxx 637: RCP TEMP[3].x, CONST[11].xxxx 638: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[3].xxxx 639: ADD TEMP[2].x, IMM[2].yyyy, -TEMP[2].xxxx 640: MOV_SAT TEMP[2].x, TEMP[2].xxxx 641: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx 642: MOV_SAT TEMP[1].x, TEMP[1].xxxx 643: MOV TEMP[0].w, TEMP[1].xxxx 644: MOV OUT[0], TEMP[0] 645: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 124) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 140) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200) %54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208) %55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212) %56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216) %57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224) %58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228) %59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 232) %60 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %61 = load <8 x i32>, <8 x i32> addrspace(2)* %60, align 32, !tbaa !0 %62 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %63 = load <4 x i32>, <4 x i32> addrspace(2)* %62, align 16, !tbaa !0 %64 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %65 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %66 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %67 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %68 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %69 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %70 = fmul float %64, %27 %71 = fadd float %70, %51 %72 = fmul float %65, %27 %73 = fadd float %72, %52 %74 = fmul float %66, %27 %75 = fadd float %74, %53 %76 = fmul float %71, 0x3FB99999A0000000 %77 = fmul float %73, 0x3FB99999A0000000 %78 = fmul float %75, 0x3FB99999A0000000 %79 = call float @llvm.floor.f32(float %76) %80 = call float @llvm.floor.f32(float %77) %81 = call float @llvm.floor.f32(float %78) %82 = fmul float %79, 3.906250e-03 %83 = fadd float %82, 1.953125e-03 %84 = fmul float %80, 3.906250e-03 %85 = fadd float %84, 1.953125e-03 %86 = fmul float %81, 3.906250e-03 %87 = fadd float %86, 1.953125e-03 %88 = call float @llvm.floor.f32(float %76) %89 = fsub float %76, %88 %90 = call float @llvm.floor.f32(float %77) %91 = fsub float %77, %90 %92 = call float @llvm.floor.f32(float %78) %93 = fsub float %78, %92 %94 = bitcast float %83 to i32 %95 = bitcast float %85 to i32 %96 = insertelement <2 x i32> undef, i32 %94, i32 0 %97 = insertelement <2 x i32> %96, i32 %95, i32 1 %98 = bitcast <8 x i32> %61 to <32 x i8> %99 = bitcast <4 x i32> %63 to <16 x i8> %100 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %97, <32 x i8> %98, <16 x i8> %99, i32 2) %101 = extractelement <4 x float> %100, i32 3 %102 = fadd float %87, 3.906250e-03 %103 = fadd float %83, 0.000000e+00 %104 = fadd float %85, 3.906250e-03 %105 = bitcast float %103 to i32 %106 = bitcast float %104 to i32 %107 = insertelement <2 x i32> undef, i32 %105, i32 0 %108 = insertelement <2 x i32> %107, i32 %106, i32 1 %109 = bitcast <8 x i32> %61 to <32 x i8> %110 = bitcast <4 x i32> %63 to <16 x i8> %111 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %108, <32 x i8> %109, <16 x i8> %110, i32 2) %112 = extractelement <4 x float> %111, i32 3 %113 = fadd float %87, 3.906250e-03 %114 = fadd float %83, 3.906250e-03 %115 = fadd float %85, 0.000000e+00 %116 = bitcast float %114 to i32 %117 = bitcast float %115 to i32 %118 = insertelement <2 x i32> undef, i32 %116, i32 0 %119 = insertelement <2 x i32> %118, i32 %117, i32 1 %120 = bitcast <8 x i32> %61 to <32 x i8> %121 = bitcast <4 x i32> %63 to <16 x i8> %122 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %119, <32 x i8> %120, <16 x i8> %121, i32 2) %123 = extractelement <4 x float> %122, i32 3 %124 = fadd float %87, 3.906250e-03 %125 = fadd float %83, 3.906250e-03 %126 = fadd float %85, 3.906250e-03 %127 = bitcast float %125 to i32 %128 = bitcast float %126 to i32 %129 = insertelement <2 x i32> undef, i32 %127, i32 0 %130 = insertelement <2 x i32> %129, i32 %128, i32 1 %131 = bitcast <8 x i32> %61 to <32 x i8> %132 = bitcast <4 x i32> %63 to <16 x i8> %133 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %130, <32 x i8> %131, <16 x i8> %132, i32 2) %134 = extractelement <4 x float> %133, i32 3 %135 = fadd float %87, 3.906250e-03 %136 = bitcast float %101 to i32 %137 = bitcast float %87 to i32 %138 = insertelement <2 x i32> undef, i32 %136, i32 0 %139 = insertelement <2 x i32> %138, i32 %137, i32 1 %140 = bitcast <8 x i32> %61 to <32 x i8> %141 = bitcast <4 x i32> %63 to <16 x i8> %142 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %139, <32 x i8> %140, <16 x i8> %141, i32 2) %143 = extractelement <4 x float> %142, i32 0 %144 = extractelement <4 x float> %142, i32 1 %145 = extractelement <4 x float> %142, i32 2 %146 = fmul float %143, 4.000000e+00 %147 = fadd float %146, -1.000000e+00 %148 = fmul float %144, 4.000000e+00 %149 = fadd float %148, -1.000000e+00 %150 = fmul float %145, 4.000000e+00 %151 = fadd float %150, -1.000000e+00 %152 = fmul float %147, %89 %153 = fmul float %149, %91 %154 = fadd float %153, %152 %155 = fmul float %151, %93 %156 = fadd float %154, %155 %157 = bitcast float %101 to i32 %158 = bitcast float %102 to i32 %159 = insertelement <2 x i32> undef, i32 %157, i32 0 %160 = insertelement <2 x i32> %159, i32 %158, i32 1 %161 = bitcast <8 x i32> %61 to <32 x i8> %162 = bitcast <4 x i32> %63 to <16 x i8> %163 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %160, <32 x i8> %161, <16 x i8> %162, i32 2) %164 = extractelement <4 x float> %163, i32 0 %165 = extractelement <4 x float> %163, i32 1 %166 = extractelement <4 x float> %163, i32 2 %167 = fmul float %164, 4.000000e+00 %168 = fadd float %167, -1.000000e+00 %169 = fmul float %165, 4.000000e+00 %170 = fadd float %169, -1.000000e+00 %171 = fmul float %166, 4.000000e+00 %172 = fadd float %171, -1.000000e+00 %173 = fadd float %93, -1.000000e+00 %174 = fmul float %168, %89 %175 = fmul float %170, %91 %176 = fadd float %175, %174 %177 = fmul float %172, %173 %178 = fadd float %176, %177 %179 = bitcast float %112 to i32 %180 = bitcast float %87 to i32 %181 = insertelement <2 x i32> undef, i32 %179, i32 0 %182 = insertelement <2 x i32> %181, i32 %180, i32 1 %183 = bitcast <8 x i32> %61 to <32 x i8> %184 = bitcast <4 x i32> %63 to <16 x i8> %185 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %182, <32 x i8> %183, <16 x i8> %184, i32 2) %186 = extractelement <4 x float> %185, i32 0 %187 = extractelement <4 x float> %185, i32 1 %188 = extractelement <4 x float> %185, i32 2 %189 = fmul float %186, 4.000000e+00 %190 = fadd float %189, -1.000000e+00 %191 = fmul float %187, 4.000000e+00 %192 = fadd float %191, -1.000000e+00 %193 = fmul float %188, 4.000000e+00 %194 = fadd float %193, -1.000000e+00 %195 = fadd float %91, -1.000000e+00 %196 = fmul float %190, %89 %197 = fmul float %192, %195 %198 = fadd float %197, %196 %199 = fmul float %194, %93 %200 = fadd float %198, %199 %201 = bitcast float %112 to i32 %202 = bitcast float %113 to i32 %203 = insertelement <2 x i32> undef, i32 %201, i32 0 %204 = insertelement <2 x i32> %203, i32 %202, i32 1 %205 = bitcast <8 x i32> %61 to <32 x i8> %206 = bitcast <4 x i32> %63 to <16 x i8> %207 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %204, <32 x i8> %205, <16 x i8> %206, i32 2) %208 = extractelement <4 x float> %207, i32 0 %209 = extractelement <4 x float> %207, i32 1 %210 = extractelement <4 x float> %207, i32 2 %211 = fmul float %208, 4.000000e+00 %212 = fadd float %211, -1.000000e+00 %213 = fmul float %209, 4.000000e+00 %214 = fadd float %213, -1.000000e+00 %215 = fmul float %210, 4.000000e+00 %216 = fadd float %215, -1.000000e+00 %217 = fadd float %91, -1.000000e+00 %218 = fadd float %93, -1.000000e+00 %219 = fmul float %212, %89 %220 = fmul float %214, %217 %221 = fadd float %220, %219 %222 = fmul float %216, %218 %223 = fadd float %221, %222 %224 = bitcast float %123 to i32 %225 = bitcast float %87 to i32 %226 = insertelement <2 x i32> undef, i32 %224, i32 0 %227 = insertelement <2 x i32> %226, i32 %225, i32 1 %228 = bitcast <8 x i32> %61 to <32 x i8> %229 = bitcast <4 x i32> %63 to <16 x i8> %230 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %227, <32 x i8> %228, <16 x i8> %229, i32 2) %231 = extractelement <4 x float> %230, i32 0 %232 = extractelement <4 x float> %230, i32 1 %233 = extractelement <4 x float> %230, i32 2 %234 = fmul float %231, 4.000000e+00 %235 = fadd float %234, -1.000000e+00 %236 = fmul float %232, 4.000000e+00 %237 = fadd float %236, -1.000000e+00 %238 = fmul float %233, 4.000000e+00 %239 = fadd float %238, -1.000000e+00 %240 = fadd float %89, -1.000000e+00 %241 = fmul float %235, %240 %242 = fmul float %237, %91 %243 = fadd float %242, %241 %244 = fmul float %239, %93 %245 = fadd float %243, %244 %246 = bitcast float %123 to i32 %247 = bitcast float %124 to i32 %248 = insertelement <2 x i32> undef, i32 %246, i32 0 %249 = insertelement <2 x i32> %248, i32 %247, i32 1 %250 = bitcast <8 x i32> %61 to <32 x i8> %251 = bitcast <4 x i32> %63 to <16 x i8> %252 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %249, <32 x i8> %250, <16 x i8> %251, i32 2) %253 = extractelement <4 x float> %252, i32 0 %254 = extractelement <4 x float> %252, i32 1 %255 = extractelement <4 x float> %252, i32 2 %256 = fmul float %253, 4.000000e+00 %257 = fadd float %256, -1.000000e+00 %258 = fmul float %254, 4.000000e+00 %259 = fadd float %258, -1.000000e+00 %260 = fmul float %255, 4.000000e+00 %261 = fadd float %260, -1.000000e+00 %262 = fadd float %89, -1.000000e+00 %263 = fadd float %93, -1.000000e+00 %264 = fmul float %257, %262 %265 = fmul float %259, %91 %266 = fadd float %265, %264 %267 = fmul float %261, %263 %268 = fadd float %266, %267 %269 = bitcast float %134 to i32 %270 = bitcast float %87 to i32 %271 = insertelement <2 x i32> undef, i32 %269, i32 0 %272 = insertelement <2 x i32> %271, i32 %270, i32 1 %273 = bitcast <8 x i32> %61 to <32 x i8> %274 = bitcast <4 x i32> %63 to <16 x i8> %275 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %272, <32 x i8> %273, <16 x i8> %274, i32 2) %276 = extractelement <4 x float> %275, i32 0 %277 = extractelement <4 x float> %275, i32 1 %278 = extractelement <4 x float> %275, i32 2 %279 = fmul float %276, 4.000000e+00 %280 = fadd float %279, -1.000000e+00 %281 = fmul float %277, 4.000000e+00 %282 = fadd float %281, -1.000000e+00 %283 = fmul float %278, 4.000000e+00 %284 = fadd float %283, -1.000000e+00 %285 = fadd float %89, -1.000000e+00 %286 = fadd float %91, -1.000000e+00 %287 = fmul float %280, %285 %288 = fmul float %282, %286 %289 = fadd float %288, %287 %290 = fmul float %284, %93 %291 = fadd float %289, %290 %292 = bitcast float %134 to i32 %293 = bitcast float %135 to i32 %294 = insertelement <2 x i32> undef, i32 %292, i32 0 %295 = insertelement <2 x i32> %294, i32 %293, i32 1 %296 = bitcast <8 x i32> %61 to <32 x i8> %297 = bitcast <4 x i32> %63 to <16 x i8> %298 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %295, <32 x i8> %296, <16 x i8> %297, i32 2) %299 = extractelement <4 x float> %298, i32 0 %300 = extractelement <4 x float> %298, i32 1 %301 = extractelement <4 x float> %298, i32 2 %302 = fmul float %299, 4.000000e+00 %303 = fadd float %302, -1.000000e+00 %304 = fmul float %300, 4.000000e+00 %305 = fadd float %304, -1.000000e+00 %306 = fmul float %301, 4.000000e+00 %307 = fadd float %306, -1.000000e+00 %308 = fadd float %89, -1.000000e+00 %309 = fadd float %91, -1.000000e+00 %310 = fadd float %93, -1.000000e+00 %311 = fmul float %303, %308 %312 = fmul float %305, %309 %313 = fadd float %312, %311 %314 = fmul float %307, %310 %315 = fadd float %313, %314 %316 = call float @llvm.AMDGPU.lrp(float %89, float %245, float %156) %317 = call float @llvm.AMDGPU.lrp(float %89, float %268, float %178) %318 = call float @llvm.AMDGPU.lrp(float %89, float %291, float %200) %319 = call float @llvm.AMDGPU.lrp(float %89, float %315, float %223) %320 = call float @llvm.AMDGPU.lrp(float %91, float %318, float %316) %321 = call float @llvm.AMDGPU.lrp(float %91, float %319, float %317) %322 = fmul float %73, 0x3FB99999A0000000 %323 = fmul float %71, 0x3FB99999A0000000 %324 = fmul float %75, 0x3FB99999A0000000 %325 = call float @llvm.floor.f32(float %322) %326 = call float @llvm.floor.f32(float %323) %327 = call float @llvm.floor.f32(float %324) %328 = fmul float %325, 3.906250e-03 %329 = fadd float %328, 1.953125e-03 %330 = fmul float %326, 3.906250e-03 %331 = fadd float %330, 1.953125e-03 %332 = fmul float %327, 3.906250e-03 %333 = fadd float %332, 1.953125e-03 %334 = call float @llvm.floor.f32(float %322) %335 = fsub float %322, %334 %336 = call float @llvm.floor.f32(float %323) %337 = fsub float %323, %336 %338 = call float @llvm.floor.f32(float %324) %339 = fsub float %324, %338 %340 = bitcast float %329 to i32 %341 = bitcast float %331 to i32 %342 = insertelement <2 x i32> undef, i32 %340, i32 0 %343 = insertelement <2 x i32> %342, i32 %341, i32 1 %344 = bitcast <8 x i32> %61 to <32 x i8> %345 = bitcast <4 x i32> %63 to <16 x i8> %346 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %343, <32 x i8> %344, <16 x i8> %345, i32 2) %347 = extractelement <4 x float> %346, i32 3 %348 = fadd float %333, 3.906250e-03 %349 = fadd float %329, 0.000000e+00 %350 = fadd float %331, 3.906250e-03 %351 = bitcast float %349 to i32 %352 = bitcast float %350 to i32 %353 = insertelement <2 x i32> undef, i32 %351, i32 0 %354 = insertelement <2 x i32> %353, i32 %352, i32 1 %355 = bitcast <8 x i32> %61 to <32 x i8> %356 = bitcast <4 x i32> %63 to <16 x i8> %357 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %354, <32 x i8> %355, <16 x i8> %356, i32 2) %358 = extractelement <4 x float> %357, i32 3 %359 = fadd float %333, 3.906250e-03 %360 = fadd float %329, 3.906250e-03 %361 = fadd float %331, 0.000000e+00 %362 = bitcast float %360 to i32 %363 = bitcast float %361 to i32 %364 = insertelement <2 x i32> undef, i32 %362, i32 0 %365 = insertelement <2 x i32> %364, i32 %363, i32 1 %366 = bitcast <8 x i32> %61 to <32 x i8> %367 = bitcast <4 x i32> %63 to <16 x i8> %368 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %365, <32 x i8> %366, <16 x i8> %367, i32 2) %369 = extractelement <4 x float> %368, i32 3 %370 = fadd float %333, 3.906250e-03 %371 = fadd float %329, 3.906250e-03 %372 = fadd float %331, 3.906250e-03 %373 = bitcast float %371 to i32 %374 = bitcast float %372 to i32 %375 = insertelement <2 x i32> undef, i32 %373, i32 0 %376 = insertelement <2 x i32> %375, i32 %374, i32 1 %377 = bitcast <8 x i32> %61 to <32 x i8> %378 = bitcast <4 x i32> %63 to <16 x i8> %379 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %376, <32 x i8> %377, <16 x i8> %378, i32 2) %380 = extractelement <4 x float> %379, i32 3 %381 = fadd float %333, 3.906250e-03 %382 = bitcast float %347 to i32 %383 = bitcast float %333 to i32 %384 = insertelement <2 x i32> undef, i32 %382, i32 0 %385 = insertelement <2 x i32> %384, i32 %383, i32 1 %386 = bitcast <8 x i32> %61 to <32 x i8> %387 = bitcast <4 x i32> %63 to <16 x i8> %388 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %385, <32 x i8> %386, <16 x i8> %387, i32 2) %389 = extractelement <4 x float> %388, i32 0 %390 = extractelement <4 x float> %388, i32 1 %391 = extractelement <4 x float> %388, i32 2 %392 = fmul float %389, 4.000000e+00 %393 = fadd float %392, -1.000000e+00 %394 = fmul float %390, 4.000000e+00 %395 = fadd float %394, -1.000000e+00 %396 = fmul float %391, 4.000000e+00 %397 = fadd float %396, -1.000000e+00 %398 = fmul float %393, %335 %399 = fmul float %395, %337 %400 = fadd float %399, %398 %401 = fmul float %397, %339 %402 = fadd float %400, %401 %403 = bitcast float %347 to i32 %404 = bitcast float %348 to i32 %405 = insertelement <2 x i32> undef, i32 %403, i32 0 %406 = insertelement <2 x i32> %405, i32 %404, i32 1 %407 = bitcast <8 x i32> %61 to <32 x i8> %408 = bitcast <4 x i32> %63 to <16 x i8> %409 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %406, <32 x i8> %407, <16 x i8> %408, i32 2) %410 = extractelement <4 x float> %409, i32 0 %411 = extractelement <4 x float> %409, i32 1 %412 = extractelement <4 x float> %409, i32 2 %413 = fmul float %410, 4.000000e+00 %414 = fadd float %413, -1.000000e+00 %415 = fmul float %411, 4.000000e+00 %416 = fadd float %415, -1.000000e+00 %417 = fmul float %412, 4.000000e+00 %418 = fadd float %417, -1.000000e+00 %419 = fadd float %339, -1.000000e+00 %420 = fmul float %414, %335 %421 = fmul float %416, %337 %422 = fadd float %421, %420 %423 = fmul float %418, %419 %424 = fadd float %422, %423 %425 = bitcast float %358 to i32 %426 = bitcast float %333 to i32 %427 = insertelement <2 x i32> undef, i32 %425, i32 0 %428 = insertelement <2 x i32> %427, i32 %426, i32 1 %429 = bitcast <8 x i32> %61 to <32 x i8> %430 = bitcast <4 x i32> %63 to <16 x i8> %431 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %428, <32 x i8> %429, <16 x i8> %430, i32 2) %432 = extractelement <4 x float> %431, i32 0 %433 = extractelement <4 x float> %431, i32 1 %434 = extractelement <4 x float> %431, i32 2 %435 = fmul float %432, 4.000000e+00 %436 = fadd float %435, -1.000000e+00 %437 = fmul float %433, 4.000000e+00 %438 = fadd float %437, -1.000000e+00 %439 = fmul float %434, 4.000000e+00 %440 = fadd float %439, -1.000000e+00 %441 = fadd float %337, -1.000000e+00 %442 = fmul float %436, %335 %443 = fmul float %438, %441 %444 = fadd float %443, %442 %445 = fmul float %440, %339 %446 = fadd float %444, %445 %447 = bitcast float %358 to i32 %448 = bitcast float %359 to i32 %449 = insertelement <2 x i32> undef, i32 %447, i32 0 %450 = insertelement <2 x i32> %449, i32 %448, i32 1 %451 = bitcast <8 x i32> %61 to <32 x i8> %452 = bitcast <4 x i32> %63 to <16 x i8> %453 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %450, <32 x i8> %451, <16 x i8> %452, i32 2) %454 = extractelement <4 x float> %453, i32 0 %455 = extractelement <4 x float> %453, i32 1 %456 = extractelement <4 x float> %453, i32 2 %457 = fmul float %454, 4.000000e+00 %458 = fadd float %457, -1.000000e+00 %459 = fmul float %455, 4.000000e+00 %460 = fadd float %459, -1.000000e+00 %461 = fmul float %456, 4.000000e+00 %462 = fadd float %461, -1.000000e+00 %463 = fadd float %337, -1.000000e+00 %464 = fadd float %339, -1.000000e+00 %465 = fmul float %458, %335 %466 = fmul float %460, %463 %467 = fadd float %466, %465 %468 = fmul float %462, %464 %469 = fadd float %467, %468 %470 = bitcast float %369 to i32 %471 = bitcast float %333 to i32 %472 = insertelement <2 x i32> undef, i32 %470, i32 0 %473 = insertelement <2 x i32> %472, i32 %471, i32 1 %474 = bitcast <8 x i32> %61 to <32 x i8> %475 = bitcast <4 x i32> %63 to <16 x i8> %476 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %473, <32 x i8> %474, <16 x i8> %475, i32 2) %477 = extractelement <4 x float> %476, i32 0 %478 = extractelement <4 x float> %476, i32 1 %479 = extractelement <4 x float> %476, i32 2 %480 = fmul float %477, 4.000000e+00 %481 = fadd float %480, -1.000000e+00 %482 = fmul float %478, 4.000000e+00 %483 = fadd float %482, -1.000000e+00 %484 = fmul float %479, 4.000000e+00 %485 = fadd float %484, -1.000000e+00 %486 = fadd float %335, -1.000000e+00 %487 = fmul float %481, %486 %488 = fmul float %483, %337 %489 = fadd float %488, %487 %490 = fmul float %485, %339 %491 = fadd float %489, %490 %492 = bitcast float %369 to i32 %493 = bitcast float %370 to i32 %494 = insertelement <2 x i32> undef, i32 %492, i32 0 %495 = insertelement <2 x i32> %494, i32 %493, i32 1 %496 = bitcast <8 x i32> %61 to <32 x i8> %497 = bitcast <4 x i32> %63 to <16 x i8> %498 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %495, <32 x i8> %496, <16 x i8> %497, i32 2) %499 = extractelement <4 x float> %498, i32 0 %500 = extractelement <4 x float> %498, i32 1 %501 = extractelement <4 x float> %498, i32 2 %502 = fmul float %499, 4.000000e+00 %503 = fadd float %502, -1.000000e+00 %504 = fmul float %500, 4.000000e+00 %505 = fadd float %504, -1.000000e+00 %506 = fmul float %501, 4.000000e+00 %507 = fadd float %506, -1.000000e+00 %508 = fadd float %335, -1.000000e+00 %509 = fadd float %339, -1.000000e+00 %510 = fmul float %503, %508 %511 = fmul float %505, %337 %512 = fadd float %511, %510 %513 = fmul float %507, %509 %514 = fadd float %512, %513 %515 = bitcast float %380 to i32 %516 = bitcast float %333 to i32 %517 = insertelement <2 x i32> undef, i32 %515, i32 0 %518 = insertelement <2 x i32> %517, i32 %516, i32 1 %519 = bitcast <8 x i32> %61 to <32 x i8> %520 = bitcast <4 x i32> %63 to <16 x i8> %521 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %518, <32 x i8> %519, <16 x i8> %520, i32 2) %522 = extractelement <4 x float> %521, i32 0 %523 = extractelement <4 x float> %521, i32 1 %524 = extractelement <4 x float> %521, i32 2 %525 = fmul float %522, 4.000000e+00 %526 = fadd float %525, -1.000000e+00 %527 = fmul float %523, 4.000000e+00 %528 = fadd float %527, -1.000000e+00 %529 = fmul float %524, 4.000000e+00 %530 = fadd float %529, -1.000000e+00 %531 = fadd float %335, -1.000000e+00 %532 = fadd float %337, -1.000000e+00 %533 = fmul float %526, %531 %534 = fmul float %528, %532 %535 = fadd float %534, %533 %536 = fmul float %530, %339 %537 = fadd float %535, %536 %538 = bitcast float %380 to i32 %539 = bitcast float %381 to i32 %540 = insertelement <2 x i32> undef, i32 %538, i32 0 %541 = insertelement <2 x i32> %540, i32 %539, i32 1 %542 = bitcast <8 x i32> %61 to <32 x i8> %543 = bitcast <4 x i32> %63 to <16 x i8> %544 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %541, <32 x i8> %542, <16 x i8> %543, i32 2) %545 = extractelement <4 x float> %544, i32 0 %546 = extractelement <4 x float> %544, i32 1 %547 = extractelement <4 x float> %544, i32 2 %548 = fmul float %545, 4.000000e+00 %549 = fadd float %548, -1.000000e+00 %550 = fmul float %546, 4.000000e+00 %551 = fadd float %550, -1.000000e+00 %552 = fmul float %547, 4.000000e+00 %553 = fadd float %552, -1.000000e+00 %554 = fadd float %335, -1.000000e+00 %555 = fadd float %337, -1.000000e+00 %556 = fadd float %339, -1.000000e+00 %557 = fmul float %549, %554 %558 = fmul float %551, %555 %559 = fadd float %558, %557 %560 = fmul float %553, %556 %561 = fadd float %559, %560 %562 = call float @llvm.AMDGPU.lrp(float %335, float %491, float %402) %563 = call float @llvm.AMDGPU.lrp(float %335, float %514, float %424) %564 = call float @llvm.AMDGPU.lrp(float %335, float %537, float %446) %565 = call float @llvm.AMDGPU.lrp(float %335, float %561, float %469) %566 = call float @llvm.AMDGPU.lrp(float %337, float %564, float %562) %567 = call float @llvm.AMDGPU.lrp(float %337, float %565, float %563) %568 = call float @llvm.AMDGPU.lrp(float %93, float %321, float %320) %569 = call float @llvm.AMDGPU.lrp(float %339, float %567, float %566) %570 = fadd float %71, %568 %571 = fadd float %73, %569 %572 = fadd float %570, 0x3FC99999A0000000 %573 = fadd float %571, 0x3FC99999A0000000 %574 = fadd float %75, 0x3FC99999A0000000 %575 = call float @llvm.floor.f32(float %572) %576 = call float @llvm.floor.f32(float %573) %577 = call float @llvm.floor.f32(float %574) %578 = fmul float %575, 3.906250e-03 %579 = fadd float %578, 1.953125e-03 %580 = fmul float %576, 3.906250e-03 %581 = fadd float %580, 1.953125e-03 %582 = fmul float %577, 3.906250e-03 %583 = fadd float %582, 1.953125e-03 %584 = call float @llvm.floor.f32(float %572) %585 = fsub float %572, %584 %586 = call float @llvm.floor.f32(float %573) %587 = fsub float %573, %586 %588 = call float @llvm.floor.f32(float %574) %589 = fsub float %574, %588 %590 = bitcast float %579 to i32 %591 = bitcast float %581 to i32 %592 = insertelement <2 x i32> undef, i32 %590, i32 0 %593 = insertelement <2 x i32> %592, i32 %591, i32 1 %594 = bitcast <8 x i32> %61 to <32 x i8> %595 = bitcast <4 x i32> %63 to <16 x i8> %596 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %593, <32 x i8> %594, <16 x i8> %595, i32 2) %597 = extractelement <4 x float> %596, i32 3 %598 = fadd float %583, 3.906250e-03 %599 = fadd float %579, 0.000000e+00 %600 = fadd float %581, 3.906250e-03 %601 = bitcast float %599 to i32 %602 = bitcast float %600 to i32 %603 = insertelement <2 x i32> undef, i32 %601, i32 0 %604 = insertelement <2 x i32> %603, i32 %602, i32 1 %605 = bitcast <8 x i32> %61 to <32 x i8> %606 = bitcast <4 x i32> %63 to <16 x i8> %607 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %604, <32 x i8> %605, <16 x i8> %606, i32 2) %608 = extractelement <4 x float> %607, i32 3 %609 = fadd float %583, 3.906250e-03 %610 = fadd float %579, 3.906250e-03 %611 = fadd float %581, 0.000000e+00 %612 = bitcast float %610 to i32 %613 = bitcast float %611 to i32 %614 = insertelement <2 x i32> undef, i32 %612, i32 0 %615 = insertelement <2 x i32> %614, i32 %613, i32 1 %616 = bitcast <8 x i32> %61 to <32 x i8> %617 = bitcast <4 x i32> %63 to <16 x i8> %618 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %615, <32 x i8> %616, <16 x i8> %617, i32 2) %619 = extractelement <4 x float> %618, i32 3 %620 = fadd float %583, 3.906250e-03 %621 = fadd float %579, 3.906250e-03 %622 = fadd float %581, 3.906250e-03 %623 = bitcast float %621 to i32 %624 = bitcast float %622 to i32 %625 = insertelement <2 x i32> undef, i32 %623, i32 0 %626 = insertelement <2 x i32> %625, i32 %624, i32 1 %627 = bitcast <8 x i32> %61 to <32 x i8> %628 = bitcast <4 x i32> %63 to <16 x i8> %629 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %626, <32 x i8> %627, <16 x i8> %628, i32 2) %630 = extractelement <4 x float> %629, i32 3 %631 = fadd float %583, 3.906250e-03 %632 = bitcast float %597 to i32 %633 = bitcast float %583 to i32 %634 = insertelement <2 x i32> undef, i32 %632, i32 0 %635 = insertelement <2 x i32> %634, i32 %633, i32 1 %636 = bitcast <8 x i32> %61 to <32 x i8> %637 = bitcast <4 x i32> %63 to <16 x i8> %638 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %635, <32 x i8> %636, <16 x i8> %637, i32 2) %639 = extractelement <4 x float> %638, i32 0 %640 = extractelement <4 x float> %638, i32 1 %641 = extractelement <4 x float> %638, i32 2 %642 = fmul float %639, 4.000000e+00 %643 = fadd float %642, -1.000000e+00 %644 = fmul float %640, 4.000000e+00 %645 = fadd float %644, -1.000000e+00 %646 = fmul float %641, 4.000000e+00 %647 = fadd float %646, -1.000000e+00 %648 = fmul float %643, %585 %649 = fmul float %645, %587 %650 = fadd float %649, %648 %651 = fmul float %647, %589 %652 = fadd float %650, %651 %653 = bitcast float %597 to i32 %654 = bitcast float %598 to i32 %655 = insertelement <2 x i32> undef, i32 %653, i32 0 %656 = insertelement <2 x i32> %655, i32 %654, i32 1 %657 = bitcast <8 x i32> %61 to <32 x i8> %658 = bitcast <4 x i32> %63 to <16 x i8> %659 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %656, <32 x i8> %657, <16 x i8> %658, i32 2) %660 = extractelement <4 x float> %659, i32 0 %661 = extractelement <4 x float> %659, i32 1 %662 = extractelement <4 x float> %659, i32 2 %663 = fmul float %660, 4.000000e+00 %664 = fadd float %663, -1.000000e+00 %665 = fmul float %661, 4.000000e+00 %666 = fadd float %665, -1.000000e+00 %667 = fmul float %662, 4.000000e+00 %668 = fadd float %667, -1.000000e+00 %669 = fadd float %589, -1.000000e+00 %670 = fmul float %664, %585 %671 = fmul float %666, %587 %672 = fadd float %671, %670 %673 = fmul float %668, %669 %674 = fadd float %672, %673 %675 = bitcast float %608 to i32 %676 = bitcast float %583 to i32 %677 = insertelement <2 x i32> undef, i32 %675, i32 0 %678 = insertelement <2 x i32> %677, i32 %676, i32 1 %679 = bitcast <8 x i32> %61 to <32 x i8> %680 = bitcast <4 x i32> %63 to <16 x i8> %681 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %678, <32 x i8> %679, <16 x i8> %680, i32 2) %682 = extractelement <4 x float> %681, i32 0 %683 = extractelement <4 x float> %681, i32 1 %684 = extractelement <4 x float> %681, i32 2 %685 = fmul float %682, 4.000000e+00 %686 = fadd float %685, -1.000000e+00 %687 = fmul float %683, 4.000000e+00 %688 = fadd float %687, -1.000000e+00 %689 = fmul float %684, 4.000000e+00 %690 = fadd float %689, -1.000000e+00 %691 = fadd float %587, -1.000000e+00 %692 = fmul float %686, %585 %693 = fmul float %688, %691 %694 = fadd float %693, %692 %695 = fmul float %690, %589 %696 = fadd float %694, %695 %697 = bitcast float %608 to i32 %698 = bitcast float %609 to i32 %699 = insertelement <2 x i32> undef, i32 %697, i32 0 %700 = insertelement <2 x i32> %699, i32 %698, i32 1 %701 = bitcast <8 x i32> %61 to <32 x i8> %702 = bitcast <4 x i32> %63 to <16 x i8> %703 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %700, <32 x i8> %701, <16 x i8> %702, i32 2) %704 = extractelement <4 x float> %703, i32 0 %705 = extractelement <4 x float> %703, i32 1 %706 = extractelement <4 x float> %703, i32 2 %707 = fmul float %704, 4.000000e+00 %708 = fadd float %707, -1.000000e+00 %709 = fmul float %705, 4.000000e+00 %710 = fadd float %709, -1.000000e+00 %711 = fmul float %706, 4.000000e+00 %712 = fadd float %711, -1.000000e+00 %713 = fadd float %587, -1.000000e+00 %714 = fadd float %589, -1.000000e+00 %715 = fmul float %708, %585 %716 = fmul float %710, %713 %717 = fadd float %716, %715 %718 = fmul float %712, %714 %719 = fadd float %717, %718 %720 = bitcast float %619 to i32 %721 = bitcast float %583 to i32 %722 = insertelement <2 x i32> undef, i32 %720, i32 0 %723 = insertelement <2 x i32> %722, i32 %721, i32 1 %724 = bitcast <8 x i32> %61 to <32 x i8> %725 = bitcast <4 x i32> %63 to <16 x i8> %726 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %723, <32 x i8> %724, <16 x i8> %725, i32 2) %727 = extractelement <4 x float> %726, i32 0 %728 = extractelement <4 x float> %726, i32 1 %729 = extractelement <4 x float> %726, i32 2 %730 = fmul float %727, 4.000000e+00 %731 = fadd float %730, -1.000000e+00 %732 = fmul float %728, 4.000000e+00 %733 = fadd float %732, -1.000000e+00 %734 = fmul float %729, 4.000000e+00 %735 = fadd float %734, -1.000000e+00 %736 = fadd float %585, -1.000000e+00 %737 = fmul float %731, %736 %738 = fmul float %733, %587 %739 = fadd float %738, %737 %740 = fmul float %735, %589 %741 = fadd float %739, %740 %742 = bitcast float %619 to i32 %743 = bitcast float %620 to i32 %744 = insertelement <2 x i32> undef, i32 %742, i32 0 %745 = insertelement <2 x i32> %744, i32 %743, i32 1 %746 = bitcast <8 x i32> %61 to <32 x i8> %747 = bitcast <4 x i32> %63 to <16 x i8> %748 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %745, <32 x i8> %746, <16 x i8> %747, i32 2) %749 = extractelement <4 x float> %748, i32 0 %750 = extractelement <4 x float> %748, i32 1 %751 = extractelement <4 x float> %748, i32 2 %752 = fmul float %749, 4.000000e+00 %753 = fadd float %752, -1.000000e+00 %754 = fmul float %750, 4.000000e+00 %755 = fadd float %754, -1.000000e+00 %756 = fmul float %751, 4.000000e+00 %757 = fadd float %756, -1.000000e+00 %758 = fadd float %585, -1.000000e+00 %759 = fadd float %589, -1.000000e+00 %760 = fmul float %753, %758 %761 = fmul float %755, %587 %762 = fadd float %761, %760 %763 = fmul float %757, %759 %764 = fadd float %762, %763 %765 = bitcast float %630 to i32 %766 = bitcast float %583 to i32 %767 = insertelement <2 x i32> undef, i32 %765, i32 0 %768 = insertelement <2 x i32> %767, i32 %766, i32 1 %769 = bitcast <8 x i32> %61 to <32 x i8> %770 = bitcast <4 x i32> %63 to <16 x i8> %771 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %768, <32 x i8> %769, <16 x i8> %770, i32 2) %772 = extractelement <4 x float> %771, i32 0 %773 = extractelement <4 x float> %771, i32 1 %774 = extractelement <4 x float> %771, i32 2 %775 = fmul float %772, 4.000000e+00 %776 = fadd float %775, -1.000000e+00 %777 = fmul float %773, 4.000000e+00 %778 = fadd float %777, -1.000000e+00 %779 = fmul float %774, 4.000000e+00 %780 = fadd float %779, -1.000000e+00 %781 = fadd float %585, -1.000000e+00 %782 = fadd float %587, -1.000000e+00 %783 = fmul float %776, %781 %784 = fmul float %778, %782 %785 = fadd float %784, %783 %786 = fmul float %780, %589 %787 = fadd float %785, %786 %788 = bitcast float %630 to i32 %789 = bitcast float %631 to i32 %790 = insertelement <2 x i32> undef, i32 %788, i32 0 %791 = insertelement <2 x i32> %790, i32 %789, i32 1 %792 = bitcast <8 x i32> %61 to <32 x i8> %793 = bitcast <4 x i32> %63 to <16 x i8> %794 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %791, <32 x i8> %792, <16 x i8> %793, i32 2) %795 = extractelement <4 x float> %794, i32 0 %796 = extractelement <4 x float> %794, i32 1 %797 = extractelement <4 x float> %794, i32 2 %798 = fmul float %795, 4.000000e+00 %799 = fadd float %798, -1.000000e+00 %800 = fmul float %796, 4.000000e+00 %801 = fadd float %800, -1.000000e+00 %802 = fmul float %797, 4.000000e+00 %803 = fadd float %802, -1.000000e+00 %804 = fadd float %585, -1.000000e+00 %805 = fadd float %587, -1.000000e+00 %806 = fadd float %589, -1.000000e+00 %807 = fmul float %799, %804 %808 = fmul float %801, %805 %809 = fadd float %808, %807 %810 = fmul float %803, %806 %811 = fadd float %809, %810 %812 = call float @llvm.AMDGPU.lrp(float %585, float %741, float %652) %813 = call float @llvm.AMDGPU.lrp(float %585, float %764, float %674) %814 = call float @llvm.AMDGPU.lrp(float %585, float %787, float %696) %815 = call float @llvm.AMDGPU.lrp(float %585, float %811, float %719) %816 = call float @llvm.AMDGPU.lrp(float %587, float %814, float %812) %817 = call float @llvm.AMDGPU.lrp(float %587, float %815, float %813) %818 = call float @llvm.AMDGPU.lrp(float %589, float %817, float %816) %819 = fmul float %818, 0x3FF19999A0000000 %820 = fadd float %819, 1.000000e+00 %821 = fmul float %820, 0x3FC851EB80000000 %822 = fadd float %821, %570 %823 = fmul float %820, 0x3FC851EB80000000 %824 = fadd float %823, %571 %825 = fmul float %820, 0x3FC851EB80000000 %826 = fadd float %825, %75 %827 = fmul float %822, 0x400A666660000000 %828 = fmul float %824, 0x400A666660000000 %829 = fmul float %826, 0x400A666660000000 %830 = call float @llvm.floor.f32(float %827) %831 = call float @llvm.floor.f32(float %828) %832 = call float @llvm.floor.f32(float %829) %833 = fmul float %830, 3.906250e-03 %834 = fadd float %833, 1.953125e-03 %835 = fmul float %831, 3.906250e-03 %836 = fadd float %835, 1.953125e-03 %837 = fmul float %832, 3.906250e-03 %838 = fadd float %837, 1.953125e-03 %839 = call float @llvm.floor.f32(float %827) %840 = fsub float %827, %839 %841 = call float @llvm.floor.f32(float %828) %842 = fsub float %828, %841 %843 = call float @llvm.floor.f32(float %829) %844 = fsub float %829, %843 %845 = bitcast float %834 to i32 %846 = bitcast float %836 to i32 %847 = insertelement <2 x i32> undef, i32 %845, i32 0 %848 = insertelement <2 x i32> %847, i32 %846, i32 1 %849 = bitcast <8 x i32> %61 to <32 x i8> %850 = bitcast <4 x i32> %63 to <16 x i8> %851 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %848, <32 x i8> %849, <16 x i8> %850, i32 2) %852 = extractelement <4 x float> %851, i32 3 %853 = fadd float %838, 3.906250e-03 %854 = fadd float %834, 0.000000e+00 %855 = fadd float %836, 3.906250e-03 %856 = bitcast float %854 to i32 %857 = bitcast float %855 to i32 %858 = insertelement <2 x i32> undef, i32 %856, i32 0 %859 = insertelement <2 x i32> %858, i32 %857, i32 1 %860 = bitcast <8 x i32> %61 to <32 x i8> %861 = bitcast <4 x i32> %63 to <16 x i8> %862 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %859, <32 x i8> %860, <16 x i8> %861, i32 2) %863 = extractelement <4 x float> %862, i32 3 %864 = fadd float %838, 3.906250e-03 %865 = fadd float %834, 3.906250e-03 %866 = fadd float %836, 0.000000e+00 %867 = bitcast float %865 to i32 %868 = bitcast float %866 to i32 %869 = insertelement <2 x i32> undef, i32 %867, i32 0 %870 = insertelement <2 x i32> %869, i32 %868, i32 1 %871 = bitcast <8 x i32> %61 to <32 x i8> %872 = bitcast <4 x i32> %63 to <16 x i8> %873 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %870, <32 x i8> %871, <16 x i8> %872, i32 2) %874 = extractelement <4 x float> %873, i32 3 %875 = fadd float %838, 3.906250e-03 %876 = fadd float %834, 3.906250e-03 %877 = fadd float %836, 3.906250e-03 %878 = bitcast float %876 to i32 %879 = bitcast float %877 to i32 %880 = insertelement <2 x i32> undef, i32 %878, i32 0 %881 = insertelement <2 x i32> %880, i32 %879, i32 1 %882 = bitcast <8 x i32> %61 to <32 x i8> %883 = bitcast <4 x i32> %63 to <16 x i8> %884 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %881, <32 x i8> %882, <16 x i8> %883, i32 2) %885 = extractelement <4 x float> %884, i32 3 %886 = fadd float %838, 3.906250e-03 %887 = bitcast float %852 to i32 %888 = bitcast float %838 to i32 %889 = insertelement <2 x i32> undef, i32 %887, i32 0 %890 = insertelement <2 x i32> %889, i32 %888, i32 1 %891 = bitcast <8 x i32> %61 to <32 x i8> %892 = bitcast <4 x i32> %63 to <16 x i8> %893 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %890, <32 x i8> %891, <16 x i8> %892, i32 2) %894 = extractelement <4 x float> %893, i32 0 %895 = extractelement <4 x float> %893, i32 1 %896 = extractelement <4 x float> %893, i32 2 %897 = fmul float %894, 4.000000e+00 %898 = fadd float %897, -1.000000e+00 %899 = fmul float %895, 4.000000e+00 %900 = fadd float %899, -1.000000e+00 %901 = fmul float %896, 4.000000e+00 %902 = fadd float %901, -1.000000e+00 %903 = fmul float %898, %840 %904 = fmul float %900, %842 %905 = fadd float %904, %903 %906 = fmul float %902, %844 %907 = fadd float %905, %906 %908 = bitcast float %852 to i32 %909 = bitcast float %853 to i32 %910 = insertelement <2 x i32> undef, i32 %908, i32 0 %911 = insertelement <2 x i32> %910, i32 %909, i32 1 %912 = bitcast <8 x i32> %61 to <32 x i8> %913 = bitcast <4 x i32> %63 to <16 x i8> %914 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %911, <32 x i8> %912, <16 x i8> %913, i32 2) %915 = extractelement <4 x float> %914, i32 0 %916 = extractelement <4 x float> %914, i32 1 %917 = extractelement <4 x float> %914, i32 2 %918 = fmul float %915, 4.000000e+00 %919 = fadd float %918, -1.000000e+00 %920 = fmul float %916, 4.000000e+00 %921 = fadd float %920, -1.000000e+00 %922 = fmul float %917, 4.000000e+00 %923 = fadd float %922, -1.000000e+00 %924 = fadd float %844, -1.000000e+00 %925 = fmul float %919, %840 %926 = fmul float %921, %842 %927 = fadd float %926, %925 %928 = fmul float %923, %924 %929 = fadd float %927, %928 %930 = bitcast float %863 to i32 %931 = bitcast float %838 to i32 %932 = insertelement <2 x i32> undef, i32 %930, i32 0 %933 = insertelement <2 x i32> %932, i32 %931, i32 1 %934 = bitcast <8 x i32> %61 to <32 x i8> %935 = bitcast <4 x i32> %63 to <16 x i8> %936 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %933, <32 x i8> %934, <16 x i8> %935, i32 2) %937 = extractelement <4 x float> %936, i32 0 %938 = extractelement <4 x float> %936, i32 1 %939 = extractelement <4 x float> %936, i32 2 %940 = fmul float %937, 4.000000e+00 %941 = fadd float %940, -1.000000e+00 %942 = fmul float %938, 4.000000e+00 %943 = fadd float %942, -1.000000e+00 %944 = fmul float %939, 4.000000e+00 %945 = fadd float %944, -1.000000e+00 %946 = fadd float %842, -1.000000e+00 %947 = fmul float %941, %840 %948 = fmul float %943, %946 %949 = fadd float %948, %947 %950 = fmul float %945, %844 %951 = fadd float %949, %950 %952 = bitcast float %863 to i32 %953 = bitcast float %864 to i32 %954 = insertelement <2 x i32> undef, i32 %952, i32 0 %955 = insertelement <2 x i32> %954, i32 %953, i32 1 %956 = bitcast <8 x i32> %61 to <32 x i8> %957 = bitcast <4 x i32> %63 to <16 x i8> %958 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %955, <32 x i8> %956, <16 x i8> %957, i32 2) %959 = extractelement <4 x float> %958, i32 0 %960 = extractelement <4 x float> %958, i32 1 %961 = extractelement <4 x float> %958, i32 2 %962 = fmul float %959, 4.000000e+00 %963 = fadd float %962, -1.000000e+00 %964 = fmul float %960, 4.000000e+00 %965 = fadd float %964, -1.000000e+00 %966 = fmul float %961, 4.000000e+00 %967 = fadd float %966, -1.000000e+00 %968 = fadd float %842, -1.000000e+00 %969 = fadd float %844, -1.000000e+00 %970 = fmul float %963, %840 %971 = fmul float %965, %968 %972 = fadd float %971, %970 %973 = fmul float %967, %969 %974 = fadd float %972, %973 %975 = bitcast float %874 to i32 %976 = bitcast float %838 to i32 %977 = insertelement <2 x i32> undef, i32 %975, i32 0 %978 = insertelement <2 x i32> %977, i32 %976, i32 1 %979 = bitcast <8 x i32> %61 to <32 x i8> %980 = bitcast <4 x i32> %63 to <16 x i8> %981 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %978, <32 x i8> %979, <16 x i8> %980, i32 2) %982 = extractelement <4 x float> %981, i32 0 %983 = extractelement <4 x float> %981, i32 1 %984 = extractelement <4 x float> %981, i32 2 %985 = fmul float %982, 4.000000e+00 %986 = fadd float %985, -1.000000e+00 %987 = fmul float %983, 4.000000e+00 %988 = fadd float %987, -1.000000e+00 %989 = fmul float %984, 4.000000e+00 %990 = fadd float %989, -1.000000e+00 %991 = fadd float %840, -1.000000e+00 %992 = fmul float %986, %991 %993 = fmul float %988, %842 %994 = fadd float %993, %992 %995 = fmul float %990, %844 %996 = fadd float %994, %995 %997 = bitcast float %874 to i32 %998 = bitcast float %875 to i32 %999 = insertelement <2 x i32> undef, i32 %997, i32 0 %1000 = insertelement <2 x i32> %999, i32 %998, i32 1 %1001 = bitcast <8 x i32> %61 to <32 x i8> %1002 = bitcast <4 x i32> %63 to <16 x i8> %1003 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1000, <32 x i8> %1001, <16 x i8> %1002, i32 2) %1004 = extractelement <4 x float> %1003, i32 0 %1005 = extractelement <4 x float> %1003, i32 1 %1006 = extractelement <4 x float> %1003, i32 2 %1007 = fmul float %1004, 4.000000e+00 %1008 = fadd float %1007, -1.000000e+00 %1009 = fmul float %1005, 4.000000e+00 %1010 = fadd float %1009, -1.000000e+00 %1011 = fmul float %1006, 4.000000e+00 %1012 = fadd float %1011, -1.000000e+00 %1013 = fadd float %840, -1.000000e+00 %1014 = fadd float %844, -1.000000e+00 %1015 = fmul float %1008, %1013 %1016 = fmul float %1010, %842 %1017 = fadd float %1016, %1015 %1018 = fmul float %1012, %1014 %1019 = fadd float %1017, %1018 %1020 = bitcast float %885 to i32 %1021 = bitcast float %838 to i32 %1022 = insertelement <2 x i32> undef, i32 %1020, i32 0 %1023 = insertelement <2 x i32> %1022, i32 %1021, i32 1 %1024 = bitcast <8 x i32> %61 to <32 x i8> %1025 = bitcast <4 x i32> %63 to <16 x i8> %1026 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1023, <32 x i8> %1024, <16 x i8> %1025, i32 2) %1027 = extractelement <4 x float> %1026, i32 0 %1028 = extractelement <4 x float> %1026, i32 1 %1029 = extractelement <4 x float> %1026, i32 2 %1030 = fmul float %1027, 4.000000e+00 %1031 = fadd float %1030, -1.000000e+00 %1032 = fmul float %1028, 4.000000e+00 %1033 = fadd float %1032, -1.000000e+00 %1034 = fmul float %1029, 4.000000e+00 %1035 = fadd float %1034, -1.000000e+00 %1036 = fadd float %840, -1.000000e+00 %1037 = fadd float %842, -1.000000e+00 %1038 = fmul float %1031, %1036 %1039 = fmul float %1033, %1037 %1040 = fadd float %1039, %1038 %1041 = fmul float %1035, %844 %1042 = fadd float %1040, %1041 %1043 = bitcast float %885 to i32 %1044 = bitcast float %886 to i32 %1045 = insertelement <2 x i32> undef, i32 %1043, i32 0 %1046 = insertelement <2 x i32> %1045, i32 %1044, i32 1 %1047 = bitcast <8 x i32> %61 to <32 x i8> %1048 = bitcast <4 x i32> %63 to <16 x i8> %1049 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1046, <32 x i8> %1047, <16 x i8> %1048, i32 2) %1050 = extractelement <4 x float> %1049, i32 0 %1051 = extractelement <4 x float> %1049, i32 1 %1052 = extractelement <4 x float> %1049, i32 2 %1053 = fmul float %1050, 4.000000e+00 %1054 = fadd float %1053, -1.000000e+00 %1055 = fmul float %1051, 4.000000e+00 %1056 = fadd float %1055, -1.000000e+00 %1057 = fmul float %1052, 4.000000e+00 %1058 = fadd float %1057, -1.000000e+00 %1059 = fadd float %840, -1.000000e+00 %1060 = fadd float %842, -1.000000e+00 %1061 = fadd float %844, -1.000000e+00 %1062 = fmul float %1054, %1059 %1063 = fmul float %1056, %1060 %1064 = fadd float %1063, %1062 %1065 = fmul float %1058, %1061 %1066 = fadd float %1064, %1065 %1067 = call float @llvm.AMDGPU.lrp(float %840, float %996, float %907) %1068 = call float @llvm.AMDGPU.lrp(float %840, float %1019, float %929) %1069 = call float @llvm.AMDGPU.lrp(float %840, float %1042, float %951) %1070 = call float @llvm.AMDGPU.lrp(float %840, float %1066, float %974) %1071 = call float @llvm.AMDGPU.lrp(float %842, float %1069, float %1067) %1072 = call float @llvm.AMDGPU.lrp(float %842, float %1070, float %1068) %1073 = call float @llvm.AMDGPU.lrp(float %844, float %1072, float %1071) %1074 = call float @llvm.fabs.f32(float %1073) %1075 = fmul float %1074, 0x3FD364D940000000 %1076 = fadd float %1075, %820 %1077 = fmul float %1076, 0x3FCAE147A0000000 %1078 = fadd float %1077, %570 %1079 = fmul float %1076, 0x3FCAE147A0000000 %1080 = fadd float %1079, %571 %1081 = fmul float %1076, 0x3FCAE147A0000000 %1082 = fadd float %1081, %75 %1083 = fmul float %1078, %54 %1084 = fmul float %1080, %54 %1085 = fmul float %1082, %54 %1086 = call float @llvm.floor.f32(float %1083) %1087 = call float @llvm.floor.f32(float %1084) %1088 = call float @llvm.floor.f32(float %1085) %1089 = fmul float %1086, 3.906250e-03 %1090 = fadd float %1089, 1.953125e-03 %1091 = fmul float %1087, 3.906250e-03 %1092 = fadd float %1091, 1.953125e-03 %1093 = fmul float %1088, 3.906250e-03 %1094 = fadd float %1093, 1.953125e-03 %1095 = call float @llvm.floor.f32(float %1083) %1096 = fsub float %1083, %1095 %1097 = call float @llvm.floor.f32(float %1084) %1098 = fsub float %1084, %1097 %1099 = call float @llvm.floor.f32(float %1085) %1100 = fsub float %1085, %1099 %1101 = bitcast float %1090 to i32 %1102 = bitcast float %1092 to i32 %1103 = insertelement <2 x i32> undef, i32 %1101, i32 0 %1104 = insertelement <2 x i32> %1103, i32 %1102, i32 1 %1105 = bitcast <8 x i32> %61 to <32 x i8> %1106 = bitcast <4 x i32> %63 to <16 x i8> %1107 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1104, <32 x i8> %1105, <16 x i8> %1106, i32 2) %1108 = extractelement <4 x float> %1107, i32 3 %1109 = fadd float %1094, 3.906250e-03 %1110 = fadd float %1090, 0.000000e+00 %1111 = fadd float %1092, 3.906250e-03 %1112 = bitcast float %1110 to i32 %1113 = bitcast float %1111 to i32 %1114 = insertelement <2 x i32> undef, i32 %1112, i32 0 %1115 = insertelement <2 x i32> %1114, i32 %1113, i32 1 %1116 = bitcast <8 x i32> %61 to <32 x i8> %1117 = bitcast <4 x i32> %63 to <16 x i8> %1118 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1115, <32 x i8> %1116, <16 x i8> %1117, i32 2) %1119 = extractelement <4 x float> %1118, i32 3 %1120 = fadd float %1094, 3.906250e-03 %1121 = fadd float %1090, 3.906250e-03 %1122 = fadd float %1092, 0.000000e+00 %1123 = bitcast float %1121 to i32 %1124 = bitcast float %1122 to i32 %1125 = insertelement <2 x i32> undef, i32 %1123, i32 0 %1126 = insertelement <2 x i32> %1125, i32 %1124, i32 1 %1127 = bitcast <8 x i32> %61 to <32 x i8> %1128 = bitcast <4 x i32> %63 to <16 x i8> %1129 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1126, <32 x i8> %1127, <16 x i8> %1128, i32 2) %1130 = extractelement <4 x float> %1129, i32 3 %1131 = fadd float %1094, 3.906250e-03 %1132 = fadd float %1090, 3.906250e-03 %1133 = fadd float %1092, 3.906250e-03 %1134 = bitcast float %1132 to i32 %1135 = bitcast float %1133 to i32 %1136 = insertelement <2 x i32> undef, i32 %1134, i32 0 %1137 = insertelement <2 x i32> %1136, i32 %1135, i32 1 %1138 = bitcast <8 x i32> %61 to <32 x i8> %1139 = bitcast <4 x i32> %63 to <16 x i8> %1140 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1137, <32 x i8> %1138, <16 x i8> %1139, i32 2) %1141 = extractelement <4 x float> %1140, i32 3 %1142 = fadd float %1094, 3.906250e-03 %1143 = bitcast float %1108 to i32 %1144 = bitcast float %1094 to i32 %1145 = insertelement <2 x i32> undef, i32 %1143, i32 0 %1146 = insertelement <2 x i32> %1145, i32 %1144, i32 1 %1147 = bitcast <8 x i32> %61 to <32 x i8> %1148 = bitcast <4 x i32> %63 to <16 x i8> %1149 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1146, <32 x i8> %1147, <16 x i8> %1148, i32 2) %1150 = extractelement <4 x float> %1149, i32 0 %1151 = extractelement <4 x float> %1149, i32 1 %1152 = extractelement <4 x float> %1149, i32 2 %1153 = fmul float %1150, 4.000000e+00 %1154 = fadd float %1153, -1.000000e+00 %1155 = fmul float %1151, 4.000000e+00 %1156 = fadd float %1155, -1.000000e+00 %1157 = fmul float %1152, 4.000000e+00 %1158 = fadd float %1157, -1.000000e+00 %1159 = fmul float %1154, %1096 %1160 = fmul float %1156, %1098 %1161 = fadd float %1160, %1159 %1162 = fmul float %1158, %1100 %1163 = fadd float %1161, %1162 %1164 = bitcast float %1108 to i32 %1165 = bitcast float %1109 to i32 %1166 = insertelement <2 x i32> undef, i32 %1164, i32 0 %1167 = insertelement <2 x i32> %1166, i32 %1165, i32 1 %1168 = bitcast <8 x i32> %61 to <32 x i8> %1169 = bitcast <4 x i32> %63 to <16 x i8> %1170 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1167, <32 x i8> %1168, <16 x i8> %1169, i32 2) %1171 = extractelement <4 x float> %1170, i32 0 %1172 = extractelement <4 x float> %1170, i32 1 %1173 = extractelement <4 x float> %1170, i32 2 %1174 = fmul float %1171, 4.000000e+00 %1175 = fadd float %1174, -1.000000e+00 %1176 = fmul float %1172, 4.000000e+00 %1177 = fadd float %1176, -1.000000e+00 %1178 = fmul float %1173, 4.000000e+00 %1179 = fadd float %1178, -1.000000e+00 %1180 = fadd float %1100, -1.000000e+00 %1181 = fmul float %1175, %1096 %1182 = fmul float %1177, %1098 %1183 = fadd float %1182, %1181 %1184 = fmul float %1179, %1180 %1185 = fadd float %1183, %1184 %1186 = bitcast float %1119 to i32 %1187 = bitcast float %1094 to i32 %1188 = insertelement <2 x i32> undef, i32 %1186, i32 0 %1189 = insertelement <2 x i32> %1188, i32 %1187, i32 1 %1190 = bitcast <8 x i32> %61 to <32 x i8> %1191 = bitcast <4 x i32> %63 to <16 x i8> %1192 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1189, <32 x i8> %1190, <16 x i8> %1191, i32 2) %1193 = extractelement <4 x float> %1192, i32 0 %1194 = extractelement <4 x float> %1192, i32 1 %1195 = extractelement <4 x float> %1192, i32 2 %1196 = fmul float %1193, 4.000000e+00 %1197 = fadd float %1196, -1.000000e+00 %1198 = fmul float %1194, 4.000000e+00 %1199 = fadd float %1198, -1.000000e+00 %1200 = fmul float %1195, 4.000000e+00 %1201 = fadd float %1200, -1.000000e+00 %1202 = fadd float %1098, -1.000000e+00 %1203 = fmul float %1197, %1096 %1204 = fmul float %1199, %1202 %1205 = fadd float %1204, %1203 %1206 = fmul float %1201, %1100 %1207 = fadd float %1205, %1206 %1208 = bitcast float %1119 to i32 %1209 = bitcast float %1120 to i32 %1210 = insertelement <2 x i32> undef, i32 %1208, i32 0 %1211 = insertelement <2 x i32> %1210, i32 %1209, i32 1 %1212 = bitcast <8 x i32> %61 to <32 x i8> %1213 = bitcast <4 x i32> %63 to <16 x i8> %1214 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1211, <32 x i8> %1212, <16 x i8> %1213, i32 2) %1215 = extractelement <4 x float> %1214, i32 0 %1216 = extractelement <4 x float> %1214, i32 1 %1217 = extractelement <4 x float> %1214, i32 2 %1218 = fmul float %1215, 4.000000e+00 %1219 = fadd float %1218, -1.000000e+00 %1220 = fmul float %1216, 4.000000e+00 %1221 = fadd float %1220, -1.000000e+00 %1222 = fmul float %1217, 4.000000e+00 %1223 = fadd float %1222, -1.000000e+00 %1224 = fadd float %1098, -1.000000e+00 %1225 = fadd float %1100, -1.000000e+00 %1226 = fmul float %1219, %1096 %1227 = fmul float %1221, %1224 %1228 = fadd float %1227, %1226 %1229 = fmul float %1223, %1225 %1230 = fadd float %1228, %1229 %1231 = bitcast float %1130 to i32 %1232 = bitcast float %1094 to i32 %1233 = insertelement <2 x i32> undef, i32 %1231, i32 0 %1234 = insertelement <2 x i32> %1233, i32 %1232, i32 1 %1235 = bitcast <8 x i32> %61 to <32 x i8> %1236 = bitcast <4 x i32> %63 to <16 x i8> %1237 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1234, <32 x i8> %1235, <16 x i8> %1236, i32 2) %1238 = extractelement <4 x float> %1237, i32 0 %1239 = extractelement <4 x float> %1237, i32 1 %1240 = extractelement <4 x float> %1237, i32 2 %1241 = fmul float %1238, 4.000000e+00 %1242 = fadd float %1241, -1.000000e+00 %1243 = fmul float %1239, 4.000000e+00 %1244 = fadd float %1243, -1.000000e+00 %1245 = fmul float %1240, 4.000000e+00 %1246 = fadd float %1245, -1.000000e+00 %1247 = fadd float %1096, -1.000000e+00 %1248 = fmul float %1242, %1247 %1249 = fmul float %1244, %1098 %1250 = fadd float %1249, %1248 %1251 = fmul float %1246, %1100 %1252 = fadd float %1250, %1251 %1253 = bitcast float %1130 to i32 %1254 = bitcast float %1131 to i32 %1255 = insertelement <2 x i32> undef, i32 %1253, i32 0 %1256 = insertelement <2 x i32> %1255, i32 %1254, i32 1 %1257 = bitcast <8 x i32> %61 to <32 x i8> %1258 = bitcast <4 x i32> %63 to <16 x i8> %1259 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1256, <32 x i8> %1257, <16 x i8> %1258, i32 2) %1260 = extractelement <4 x float> %1259, i32 0 %1261 = extractelement <4 x float> %1259, i32 1 %1262 = extractelement <4 x float> %1259, i32 2 %1263 = fmul float %1260, 4.000000e+00 %1264 = fadd float %1263, -1.000000e+00 %1265 = fmul float %1261, 4.000000e+00 %1266 = fadd float %1265, -1.000000e+00 %1267 = fmul float %1262, 4.000000e+00 %1268 = fadd float %1267, -1.000000e+00 %1269 = fadd float %1096, -1.000000e+00 %1270 = fadd float %1100, -1.000000e+00 %1271 = fmul float %1264, %1269 %1272 = fmul float %1266, %1098 %1273 = fadd float %1272, %1271 %1274 = fmul float %1268, %1270 %1275 = fadd float %1273, %1274 %1276 = bitcast float %1141 to i32 %1277 = bitcast float %1094 to i32 %1278 = insertelement <2 x i32> undef, i32 %1276, i32 0 %1279 = insertelement <2 x i32> %1278, i32 %1277, i32 1 %1280 = bitcast <8 x i32> %61 to <32 x i8> %1281 = bitcast <4 x i32> %63 to <16 x i8> %1282 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1279, <32 x i8> %1280, <16 x i8> %1281, i32 2) %1283 = extractelement <4 x float> %1282, i32 0 %1284 = extractelement <4 x float> %1282, i32 1 %1285 = extractelement <4 x float> %1282, i32 2 %1286 = fmul float %1283, 4.000000e+00 %1287 = fadd float %1286, -1.000000e+00 %1288 = fmul float %1284, 4.000000e+00 %1289 = fadd float %1288, -1.000000e+00 %1290 = fmul float %1285, 4.000000e+00 %1291 = fadd float %1290, -1.000000e+00 %1292 = fadd float %1096, -1.000000e+00 %1293 = fadd float %1098, -1.000000e+00 %1294 = fmul float %1287, %1292 %1295 = fmul float %1289, %1293 %1296 = fadd float %1295, %1294 %1297 = fmul float %1291, %1100 %1298 = fadd float %1296, %1297 %1299 = bitcast float %1141 to i32 %1300 = bitcast float %1142 to i32 %1301 = insertelement <2 x i32> undef, i32 %1299, i32 0 %1302 = insertelement <2 x i32> %1301, i32 %1300, i32 1 %1303 = bitcast <8 x i32> %61 to <32 x i8> %1304 = bitcast <4 x i32> %63 to <16 x i8> %1305 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1302, <32 x i8> %1303, <16 x i8> %1304, i32 2) %1306 = extractelement <4 x float> %1305, i32 0 %1307 = extractelement <4 x float> %1305, i32 1 %1308 = extractelement <4 x float> %1305, i32 2 %1309 = fmul float %1306, 4.000000e+00 %1310 = fadd float %1309, -1.000000e+00 %1311 = fmul float %1307, 4.000000e+00 %1312 = fadd float %1311, -1.000000e+00 %1313 = fmul float %1308, 4.000000e+00 %1314 = fadd float %1313, -1.000000e+00 %1315 = fadd float %1096, -1.000000e+00 %1316 = fadd float %1098, -1.000000e+00 %1317 = fadd float %1100, -1.000000e+00 %1318 = fmul float %1310, %1315 %1319 = fmul float %1312, %1316 %1320 = fadd float %1319, %1318 %1321 = fmul float %1314, %1317 %1322 = fadd float %1320, %1321 %1323 = call float @llvm.AMDGPU.lrp(float %1096, float %1252, float %1163) %1324 = call float @llvm.AMDGPU.lrp(float %1096, float %1275, float %1185) %1325 = call float @llvm.AMDGPU.lrp(float %1096, float %1298, float %1207) %1326 = call float @llvm.AMDGPU.lrp(float %1096, float %1322, float %1230) %1327 = call float @llvm.AMDGPU.lrp(float %1098, float %1325, float %1323) %1328 = call float @llvm.AMDGPU.lrp(float %1098, float %1326, float %1324) %1329 = call float @llvm.AMDGPU.lrp(float %1100, float %1328, float %1327) %1330 = call float @llvm.fabs.f32(float %1329) %1331 = fdiv float 1.000000e+00, %57 %1332 = fmul float %1330, %1331 %1333 = fadd float %1332, %1076 %1334 = fmul float %1333, 0x3FCD70A3E0000000 %1335 = fadd float %1334, %570 %1336 = fmul float %1333, 0x3FCD70A3E0000000 %1337 = fadd float %1336, %571 %1338 = fmul float %1333, 0x3FCD70A3E0000000 %1339 = fadd float %1338, %75 %1340 = fmul float %1335, %55 %1341 = fmul float %1337, %55 %1342 = fmul float %1339, %55 %1343 = call float @llvm.floor.f32(float %1340) %1344 = call float @llvm.floor.f32(float %1341) %1345 = call float @llvm.floor.f32(float %1342) %1346 = fmul float %1343, 3.906250e-03 %1347 = fadd float %1346, 1.953125e-03 %1348 = fmul float %1344, 3.906250e-03 %1349 = fadd float %1348, 1.953125e-03 %1350 = fmul float %1345, 3.906250e-03 %1351 = fadd float %1350, 1.953125e-03 %1352 = call float @llvm.floor.f32(float %1340) %1353 = fsub float %1340, %1352 %1354 = call float @llvm.floor.f32(float %1341) %1355 = fsub float %1341, %1354 %1356 = call float @llvm.floor.f32(float %1342) %1357 = fsub float %1342, %1356 %1358 = bitcast float %1347 to i32 %1359 = bitcast float %1349 to i32 %1360 = insertelement <2 x i32> undef, i32 %1358, i32 0 %1361 = insertelement <2 x i32> %1360, i32 %1359, i32 1 %1362 = bitcast <8 x i32> %61 to <32 x i8> %1363 = bitcast <4 x i32> %63 to <16 x i8> %1364 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1361, <32 x i8> %1362, <16 x i8> %1363, i32 2) %1365 = extractelement <4 x float> %1364, i32 3 %1366 = fadd float %1351, 3.906250e-03 %1367 = fadd float %1347, 0.000000e+00 %1368 = fadd float %1349, 3.906250e-03 %1369 = bitcast float %1367 to i32 %1370 = bitcast float %1368 to i32 %1371 = insertelement <2 x i32> undef, i32 %1369, i32 0 %1372 = insertelement <2 x i32> %1371, i32 %1370, i32 1 %1373 = bitcast <8 x i32> %61 to <32 x i8> %1374 = bitcast <4 x i32> %63 to <16 x i8> %1375 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1372, <32 x i8> %1373, <16 x i8> %1374, i32 2) %1376 = extractelement <4 x float> %1375, i32 3 %1377 = fadd float %1351, 3.906250e-03 %1378 = fadd float %1347, 3.906250e-03 %1379 = fadd float %1349, 0.000000e+00 %1380 = bitcast float %1378 to i32 %1381 = bitcast float %1379 to i32 %1382 = insertelement <2 x i32> undef, i32 %1380, i32 0 %1383 = insertelement <2 x i32> %1382, i32 %1381, i32 1 %1384 = bitcast <8 x i32> %61 to <32 x i8> %1385 = bitcast <4 x i32> %63 to <16 x i8> %1386 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1383, <32 x i8> %1384, <16 x i8> %1385, i32 2) %1387 = extractelement <4 x float> %1386, i32 3 %1388 = fadd float %1351, 3.906250e-03 %1389 = fadd float %1347, 3.906250e-03 %1390 = fadd float %1349, 3.906250e-03 %1391 = bitcast float %1389 to i32 %1392 = bitcast float %1390 to i32 %1393 = insertelement <2 x i32> undef, i32 %1391, i32 0 %1394 = insertelement <2 x i32> %1393, i32 %1392, i32 1 %1395 = bitcast <8 x i32> %61 to <32 x i8> %1396 = bitcast <4 x i32> %63 to <16 x i8> %1397 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1394, <32 x i8> %1395, <16 x i8> %1396, i32 2) %1398 = extractelement <4 x float> %1397, i32 3 %1399 = fadd float %1351, 3.906250e-03 %1400 = bitcast float %1365 to i32 %1401 = bitcast float %1351 to i32 %1402 = insertelement <2 x i32> undef, i32 %1400, i32 0 %1403 = insertelement <2 x i32> %1402, i32 %1401, i32 1 %1404 = bitcast <8 x i32> %61 to <32 x i8> %1405 = bitcast <4 x i32> %63 to <16 x i8> %1406 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1403, <32 x i8> %1404, <16 x i8> %1405, i32 2) %1407 = extractelement <4 x float> %1406, i32 0 %1408 = extractelement <4 x float> %1406, i32 1 %1409 = extractelement <4 x float> %1406, i32 2 %1410 = fmul float %1407, 4.000000e+00 %1411 = fadd float %1410, -1.000000e+00 %1412 = fmul float %1408, 4.000000e+00 %1413 = fadd float %1412, -1.000000e+00 %1414 = fmul float %1409, 4.000000e+00 %1415 = fadd float %1414, -1.000000e+00 %1416 = fmul float %1411, %1353 %1417 = fmul float %1413, %1355 %1418 = fadd float %1417, %1416 %1419 = fmul float %1415, %1357 %1420 = fadd float %1418, %1419 %1421 = bitcast float %1365 to i32 %1422 = bitcast float %1366 to i32 %1423 = insertelement <2 x i32> undef, i32 %1421, i32 0 %1424 = insertelement <2 x i32> %1423, i32 %1422, i32 1 %1425 = bitcast <8 x i32> %61 to <32 x i8> %1426 = bitcast <4 x i32> %63 to <16 x i8> %1427 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1424, <32 x i8> %1425, <16 x i8> %1426, i32 2) %1428 = extractelement <4 x float> %1427, i32 0 %1429 = extractelement <4 x float> %1427, i32 1 %1430 = extractelement <4 x float> %1427, i32 2 %1431 = fmul float %1428, 4.000000e+00 %1432 = fadd float %1431, -1.000000e+00 %1433 = fmul float %1429, 4.000000e+00 %1434 = fadd float %1433, -1.000000e+00 %1435 = fmul float %1430, 4.000000e+00 %1436 = fadd float %1435, -1.000000e+00 %1437 = fadd float %1357, -1.000000e+00 %1438 = fmul float %1432, %1353 %1439 = fmul float %1434, %1355 %1440 = fadd float %1439, %1438 %1441 = fmul float %1436, %1437 %1442 = fadd float %1440, %1441 %1443 = bitcast float %1376 to i32 %1444 = bitcast float %1351 to i32 %1445 = insertelement <2 x i32> undef, i32 %1443, i32 0 %1446 = insertelement <2 x i32> %1445, i32 %1444, i32 1 %1447 = bitcast <8 x i32> %61 to <32 x i8> %1448 = bitcast <4 x i32> %63 to <16 x i8> %1449 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1446, <32 x i8> %1447, <16 x i8> %1448, i32 2) %1450 = extractelement <4 x float> %1449, i32 0 %1451 = extractelement <4 x float> %1449, i32 1 %1452 = extractelement <4 x float> %1449, i32 2 %1453 = fmul float %1450, 4.000000e+00 %1454 = fadd float %1453, -1.000000e+00 %1455 = fmul float %1451, 4.000000e+00 %1456 = fadd float %1455, -1.000000e+00 %1457 = fmul float %1452, 4.000000e+00 %1458 = fadd float %1457, -1.000000e+00 %1459 = fadd float %1355, -1.000000e+00 %1460 = fmul float %1454, %1353 %1461 = fmul float %1456, %1459 %1462 = fadd float %1461, %1460 %1463 = fmul float %1458, %1357 %1464 = fadd float %1462, %1463 %1465 = bitcast float %1376 to i32 %1466 = bitcast float %1377 to i32 %1467 = insertelement <2 x i32> undef, i32 %1465, i32 0 %1468 = insertelement <2 x i32> %1467, i32 %1466, i32 1 %1469 = bitcast <8 x i32> %61 to <32 x i8> %1470 = bitcast <4 x i32> %63 to <16 x i8> %1471 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1468, <32 x i8> %1469, <16 x i8> %1470, i32 2) %1472 = extractelement <4 x float> %1471, i32 0 %1473 = extractelement <4 x float> %1471, i32 1 %1474 = extractelement <4 x float> %1471, i32 2 %1475 = fmul float %1472, 4.000000e+00 %1476 = fadd float %1475, -1.000000e+00 %1477 = fmul float %1473, 4.000000e+00 %1478 = fadd float %1477, -1.000000e+00 %1479 = fmul float %1474, 4.000000e+00 %1480 = fadd float %1479, -1.000000e+00 %1481 = fadd float %1355, -1.000000e+00 %1482 = fadd float %1357, -1.000000e+00 %1483 = fmul float %1476, %1353 %1484 = fmul float %1478, %1481 %1485 = fadd float %1484, %1483 %1486 = fmul float %1480, %1482 %1487 = fadd float %1485, %1486 %1488 = bitcast float %1387 to i32 %1489 = bitcast float %1351 to i32 %1490 = insertelement <2 x i32> undef, i32 %1488, i32 0 %1491 = insertelement <2 x i32> %1490, i32 %1489, i32 1 %1492 = bitcast <8 x i32> %61 to <32 x i8> %1493 = bitcast <4 x i32> %63 to <16 x i8> %1494 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1491, <32 x i8> %1492, <16 x i8> %1493, i32 2) %1495 = extractelement <4 x float> %1494, i32 0 %1496 = extractelement <4 x float> %1494, i32 1 %1497 = extractelement <4 x float> %1494, i32 2 %1498 = fmul float %1495, 4.000000e+00 %1499 = fadd float %1498, -1.000000e+00 %1500 = fmul float %1496, 4.000000e+00 %1501 = fadd float %1500, -1.000000e+00 %1502 = fmul float %1497, 4.000000e+00 %1503 = fadd float %1502, -1.000000e+00 %1504 = fadd float %1353, -1.000000e+00 %1505 = fmul float %1499, %1504 %1506 = fmul float %1501, %1355 %1507 = fadd float %1506, %1505 %1508 = fmul float %1503, %1357 %1509 = fadd float %1507, %1508 %1510 = bitcast float %1387 to i32 %1511 = bitcast float %1388 to i32 %1512 = insertelement <2 x i32> undef, i32 %1510, i32 0 %1513 = insertelement <2 x i32> %1512, i32 %1511, i32 1 %1514 = bitcast <8 x i32> %61 to <32 x i8> %1515 = bitcast <4 x i32> %63 to <16 x i8> %1516 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1513, <32 x i8> %1514, <16 x i8> %1515, i32 2) %1517 = extractelement <4 x float> %1516, i32 0 %1518 = extractelement <4 x float> %1516, i32 1 %1519 = extractelement <4 x float> %1516, i32 2 %1520 = fmul float %1517, 4.000000e+00 %1521 = fadd float %1520, -1.000000e+00 %1522 = fmul float %1518, 4.000000e+00 %1523 = fadd float %1522, -1.000000e+00 %1524 = fmul float %1519, 4.000000e+00 %1525 = fadd float %1524, -1.000000e+00 %1526 = fadd float %1353, -1.000000e+00 %1527 = fadd float %1357, -1.000000e+00 %1528 = fmul float %1521, %1526 %1529 = fmul float %1523, %1355 %1530 = fadd float %1529, %1528 %1531 = fmul float %1525, %1527 %1532 = fadd float %1530, %1531 %1533 = bitcast float %1398 to i32 %1534 = bitcast float %1351 to i32 %1535 = insertelement <2 x i32> undef, i32 %1533, i32 0 %1536 = insertelement <2 x i32> %1535, i32 %1534, i32 1 %1537 = bitcast <8 x i32> %61 to <32 x i8> %1538 = bitcast <4 x i32> %63 to <16 x i8> %1539 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1536, <32 x i8> %1537, <16 x i8> %1538, i32 2) %1540 = extractelement <4 x float> %1539, i32 0 %1541 = extractelement <4 x float> %1539, i32 1 %1542 = extractelement <4 x float> %1539, i32 2 %1543 = fmul float %1540, 4.000000e+00 %1544 = fadd float %1543, -1.000000e+00 %1545 = fmul float %1541, 4.000000e+00 %1546 = fadd float %1545, -1.000000e+00 %1547 = fmul float %1542, 4.000000e+00 %1548 = fadd float %1547, -1.000000e+00 %1549 = fadd float %1353, -1.000000e+00 %1550 = fadd float %1355, -1.000000e+00 %1551 = fmul float %1544, %1549 %1552 = fmul float %1546, %1550 %1553 = fadd float %1552, %1551 %1554 = fmul float %1548, %1357 %1555 = fadd float %1553, %1554 %1556 = bitcast float %1398 to i32 %1557 = bitcast float %1399 to i32 %1558 = insertelement <2 x i32> undef, i32 %1556, i32 0 %1559 = insertelement <2 x i32> %1558, i32 %1557, i32 1 %1560 = bitcast <8 x i32> %61 to <32 x i8> %1561 = bitcast <4 x i32> %63 to <16 x i8> %1562 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1559, <32 x i8> %1560, <16 x i8> %1561, i32 2) %1563 = extractelement <4 x float> %1562, i32 0 %1564 = extractelement <4 x float> %1562, i32 1 %1565 = extractelement <4 x float> %1562, i32 2 %1566 = fmul float %1563, 4.000000e+00 %1567 = fadd float %1566, -1.000000e+00 %1568 = fmul float %1564, 4.000000e+00 %1569 = fadd float %1568, -1.000000e+00 %1570 = fmul float %1565, 4.000000e+00 %1571 = fadd float %1570, -1.000000e+00 %1572 = fadd float %1353, -1.000000e+00 %1573 = fadd float %1355, -1.000000e+00 %1574 = fadd float %1357, -1.000000e+00 %1575 = fmul float %1567, %1572 %1576 = fmul float %1569, %1573 %1577 = fadd float %1576, %1575 %1578 = fmul float %1571, %1574 %1579 = fadd float %1577, %1578 %1580 = call float @llvm.AMDGPU.lrp(float %1353, float %1509, float %1420) %1581 = call float @llvm.AMDGPU.lrp(float %1353, float %1532, float %1442) %1582 = call float @llvm.AMDGPU.lrp(float %1353, float %1555, float %1464) %1583 = call float @llvm.AMDGPU.lrp(float %1353, float %1579, float %1487) %1584 = call float @llvm.AMDGPU.lrp(float %1355, float %1582, float %1580) %1585 = call float @llvm.AMDGPU.lrp(float %1355, float %1583, float %1581) %1586 = call float @llvm.AMDGPU.lrp(float %1357, float %1585, float %1584) %1587 = call float @llvm.fabs.f32(float %1586) %1588 = fdiv float 1.000000e+00, %58 %1589 = fmul float %1587, %1588 %1590 = fadd float %1589, %1333 %1591 = fmul float %1590, 2.500000e-01 %1592 = fadd float %1591, %570 %1593 = fmul float %1590, 2.500000e-01 %1594 = fadd float %1593, %571 %1595 = fmul float %1590, 2.500000e-01 %1596 = fadd float %1595, %75 %1597 = fmul float %1592, %56 %1598 = fmul float %1594, %56 %1599 = fmul float %1596, %56 %1600 = call float @llvm.floor.f32(float %1597) %1601 = call float @llvm.floor.f32(float %1598) %1602 = call float @llvm.floor.f32(float %1599) %1603 = fmul float %1600, 3.906250e-03 %1604 = fadd float %1603, 1.953125e-03 %1605 = fmul float %1601, 3.906250e-03 %1606 = fadd float %1605, 1.953125e-03 %1607 = fmul float %1602, 3.906250e-03 %1608 = fadd float %1607, 1.953125e-03 %1609 = call float @llvm.floor.f32(float %1597) %1610 = fsub float %1597, %1609 %1611 = call float @llvm.floor.f32(float %1598) %1612 = fsub float %1598, %1611 %1613 = call float @llvm.floor.f32(float %1599) %1614 = fsub float %1599, %1613 %1615 = bitcast float %1604 to i32 %1616 = bitcast float %1606 to i32 %1617 = insertelement <2 x i32> undef, i32 %1615, i32 0 %1618 = insertelement <2 x i32> %1617, i32 %1616, i32 1 %1619 = bitcast <8 x i32> %61 to <32 x i8> %1620 = bitcast <4 x i32> %63 to <16 x i8> %1621 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1618, <32 x i8> %1619, <16 x i8> %1620, i32 2) %1622 = extractelement <4 x float> %1621, i32 3 %1623 = fadd float %1608, 3.906250e-03 %1624 = fadd float %1604, 0.000000e+00 %1625 = fadd float %1606, 3.906250e-03 %1626 = bitcast float %1624 to i32 %1627 = bitcast float %1625 to i32 %1628 = insertelement <2 x i32> undef, i32 %1626, i32 0 %1629 = insertelement <2 x i32> %1628, i32 %1627, i32 1 %1630 = bitcast <8 x i32> %61 to <32 x i8> %1631 = bitcast <4 x i32> %63 to <16 x i8> %1632 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1629, <32 x i8> %1630, <16 x i8> %1631, i32 2) %1633 = extractelement <4 x float> %1632, i32 3 %1634 = fadd float %1608, 3.906250e-03 %1635 = fadd float %1604, 3.906250e-03 %1636 = fadd float %1606, 0.000000e+00 %1637 = bitcast float %1635 to i32 %1638 = bitcast float %1636 to i32 %1639 = insertelement <2 x i32> undef, i32 %1637, i32 0 %1640 = insertelement <2 x i32> %1639, i32 %1638, i32 1 %1641 = bitcast <8 x i32> %61 to <32 x i8> %1642 = bitcast <4 x i32> %63 to <16 x i8> %1643 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1640, <32 x i8> %1641, <16 x i8> %1642, i32 2) %1644 = extractelement <4 x float> %1643, i32 3 %1645 = fadd float %1608, 3.906250e-03 %1646 = fadd float %1604, 3.906250e-03 %1647 = fadd float %1606, 3.906250e-03 %1648 = bitcast float %1646 to i32 %1649 = bitcast float %1647 to i32 %1650 = insertelement <2 x i32> undef, i32 %1648, i32 0 %1651 = insertelement <2 x i32> %1650, i32 %1649, i32 1 %1652 = bitcast <8 x i32> %61 to <32 x i8> %1653 = bitcast <4 x i32> %63 to <16 x i8> %1654 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1651, <32 x i8> %1652, <16 x i8> %1653, i32 2) %1655 = extractelement <4 x float> %1654, i32 3 %1656 = fadd float %1608, 3.906250e-03 %1657 = bitcast float %1622 to i32 %1658 = bitcast float %1608 to i32 %1659 = insertelement <2 x i32> undef, i32 %1657, i32 0 %1660 = insertelement <2 x i32> %1659, i32 %1658, i32 1 %1661 = bitcast <8 x i32> %61 to <32 x i8> %1662 = bitcast <4 x i32> %63 to <16 x i8> %1663 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1660, <32 x i8> %1661, <16 x i8> %1662, i32 2) %1664 = extractelement <4 x float> %1663, i32 0 %1665 = extractelement <4 x float> %1663, i32 1 %1666 = extractelement <4 x float> %1663, i32 2 %1667 = fmul float %1664, 4.000000e+00 %1668 = fadd float %1667, -1.000000e+00 %1669 = fmul float %1665, 4.000000e+00 %1670 = fadd float %1669, -1.000000e+00 %1671 = fmul float %1666, 4.000000e+00 %1672 = fadd float %1671, -1.000000e+00 %1673 = fmul float %1668, %1610 %1674 = fmul float %1670, %1612 %1675 = fadd float %1674, %1673 %1676 = fmul float %1672, %1614 %1677 = fadd float %1675, %1676 %1678 = bitcast float %1622 to i32 %1679 = bitcast float %1623 to i32 %1680 = insertelement <2 x i32> undef, i32 %1678, i32 0 %1681 = insertelement <2 x i32> %1680, i32 %1679, i32 1 %1682 = bitcast <8 x i32> %61 to <32 x i8> %1683 = bitcast <4 x i32> %63 to <16 x i8> %1684 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1681, <32 x i8> %1682, <16 x i8> %1683, i32 2) %1685 = extractelement <4 x float> %1684, i32 0 %1686 = extractelement <4 x float> %1684, i32 1 %1687 = extractelement <4 x float> %1684, i32 2 %1688 = fmul float %1685, 4.000000e+00 %1689 = fadd float %1688, -1.000000e+00 %1690 = fmul float %1686, 4.000000e+00 %1691 = fadd float %1690, -1.000000e+00 %1692 = fmul float %1687, 4.000000e+00 %1693 = fadd float %1692, -1.000000e+00 %1694 = fadd float %1614, -1.000000e+00 %1695 = fmul float %1689, %1610 %1696 = fmul float %1691, %1612 %1697 = fadd float %1696, %1695 %1698 = fmul float %1693, %1694 %1699 = fadd float %1697, %1698 %1700 = bitcast float %1633 to i32 %1701 = bitcast float %1608 to i32 %1702 = insertelement <2 x i32> undef, i32 %1700, i32 0 %1703 = insertelement <2 x i32> %1702, i32 %1701, i32 1 %1704 = bitcast <8 x i32> %61 to <32 x i8> %1705 = bitcast <4 x i32> %63 to <16 x i8> %1706 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1703, <32 x i8> %1704, <16 x i8> %1705, i32 2) %1707 = extractelement <4 x float> %1706, i32 0 %1708 = extractelement <4 x float> %1706, i32 1 %1709 = extractelement <4 x float> %1706, i32 2 %1710 = fmul float %1707, 4.000000e+00 %1711 = fadd float %1710, -1.000000e+00 %1712 = fmul float %1708, 4.000000e+00 %1713 = fadd float %1712, -1.000000e+00 %1714 = fmul float %1709, 4.000000e+00 %1715 = fadd float %1714, -1.000000e+00 %1716 = fadd float %1612, -1.000000e+00 %1717 = fmul float %1711, %1610 %1718 = fmul float %1713, %1716 %1719 = fadd float %1718, %1717 %1720 = fmul float %1715, %1614 %1721 = fadd float %1719, %1720 %1722 = bitcast float %1633 to i32 %1723 = bitcast float %1634 to i32 %1724 = insertelement <2 x i32> undef, i32 %1722, i32 0 %1725 = insertelement <2 x i32> %1724, i32 %1723, i32 1 %1726 = bitcast <8 x i32> %61 to <32 x i8> %1727 = bitcast <4 x i32> %63 to <16 x i8> %1728 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1725, <32 x i8> %1726, <16 x i8> %1727, i32 2) %1729 = extractelement <4 x float> %1728, i32 0 %1730 = extractelement <4 x float> %1728, i32 1 %1731 = extractelement <4 x float> %1728, i32 2 %1732 = fmul float %1729, 4.000000e+00 %1733 = fadd float %1732, -1.000000e+00 %1734 = fmul float %1730, 4.000000e+00 %1735 = fadd float %1734, -1.000000e+00 %1736 = fmul float %1731, 4.000000e+00 %1737 = fadd float %1736, -1.000000e+00 %1738 = fadd float %1612, -1.000000e+00 %1739 = fadd float %1614, -1.000000e+00 %1740 = fmul float %1733, %1610 %1741 = fmul float %1735, %1738 %1742 = fadd float %1741, %1740 %1743 = fmul float %1737, %1739 %1744 = fadd float %1742, %1743 %1745 = bitcast float %1644 to i32 %1746 = bitcast float %1608 to i32 %1747 = insertelement <2 x i32> undef, i32 %1745, i32 0 %1748 = insertelement <2 x i32> %1747, i32 %1746, i32 1 %1749 = bitcast <8 x i32> %61 to <32 x i8> %1750 = bitcast <4 x i32> %63 to <16 x i8> %1751 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1748, <32 x i8> %1749, <16 x i8> %1750, i32 2) %1752 = extractelement <4 x float> %1751, i32 0 %1753 = extractelement <4 x float> %1751, i32 1 %1754 = extractelement <4 x float> %1751, i32 2 %1755 = fmul float %1752, 4.000000e+00 %1756 = fadd float %1755, -1.000000e+00 %1757 = fmul float %1753, 4.000000e+00 %1758 = fadd float %1757, -1.000000e+00 %1759 = fmul float %1754, 4.000000e+00 %1760 = fadd float %1759, -1.000000e+00 %1761 = fadd float %1610, -1.000000e+00 %1762 = fmul float %1756, %1761 %1763 = fmul float %1758, %1612 %1764 = fadd float %1763, %1762 %1765 = fmul float %1760, %1614 %1766 = fadd float %1764, %1765 %1767 = bitcast float %1644 to i32 %1768 = bitcast float %1645 to i32 %1769 = insertelement <2 x i32> undef, i32 %1767, i32 0 %1770 = insertelement <2 x i32> %1769, i32 %1768, i32 1 %1771 = bitcast <8 x i32> %61 to <32 x i8> %1772 = bitcast <4 x i32> %63 to <16 x i8> %1773 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1770, <32 x i8> %1771, <16 x i8> %1772, i32 2) %1774 = extractelement <4 x float> %1773, i32 0 %1775 = extractelement <4 x float> %1773, i32 1 %1776 = extractelement <4 x float> %1773, i32 2 %1777 = fmul float %1774, 4.000000e+00 %1778 = fadd float %1777, -1.000000e+00 %1779 = fmul float %1775, 4.000000e+00 %1780 = fadd float %1779, -1.000000e+00 %1781 = fmul float %1776, 4.000000e+00 %1782 = fadd float %1781, -1.000000e+00 %1783 = fadd float %1610, -1.000000e+00 %1784 = fadd float %1614, -1.000000e+00 %1785 = fmul float %1778, %1783 %1786 = fmul float %1780, %1612 %1787 = fadd float %1786, %1785 %1788 = fmul float %1782, %1784 %1789 = fadd float %1787, %1788 %1790 = bitcast float %1655 to i32 %1791 = bitcast float %1608 to i32 %1792 = insertelement <2 x i32> undef, i32 %1790, i32 0 %1793 = insertelement <2 x i32> %1792, i32 %1791, i32 1 %1794 = bitcast <8 x i32> %61 to <32 x i8> %1795 = bitcast <4 x i32> %63 to <16 x i8> %1796 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1793, <32 x i8> %1794, <16 x i8> %1795, i32 2) %1797 = extractelement <4 x float> %1796, i32 0 %1798 = extractelement <4 x float> %1796, i32 1 %1799 = extractelement <4 x float> %1796, i32 2 %1800 = fmul float %1797, 4.000000e+00 %1801 = fadd float %1800, -1.000000e+00 %1802 = fmul float %1798, 4.000000e+00 %1803 = fadd float %1802, -1.000000e+00 %1804 = fmul float %1799, 4.000000e+00 %1805 = fadd float %1804, -1.000000e+00 %1806 = fadd float %1610, -1.000000e+00 %1807 = fadd float %1612, -1.000000e+00 %1808 = fmul float %1801, %1806 %1809 = fmul float %1803, %1807 %1810 = fadd float %1809, %1808 %1811 = fmul float %1805, %1614 %1812 = fadd float %1810, %1811 %1813 = bitcast float %1655 to i32 %1814 = bitcast float %1656 to i32 %1815 = insertelement <2 x i32> undef, i32 %1813, i32 0 %1816 = insertelement <2 x i32> %1815, i32 %1814, i32 1 %1817 = bitcast <8 x i32> %61 to <32 x i8> %1818 = bitcast <4 x i32> %63 to <16 x i8> %1819 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1816, <32 x i8> %1817, <16 x i8> %1818, i32 2) %1820 = extractelement <4 x float> %1819, i32 0 %1821 = extractelement <4 x float> %1819, i32 1 %1822 = extractelement <4 x float> %1819, i32 2 %1823 = fmul float %1820, 4.000000e+00 %1824 = fadd float %1823, -1.000000e+00 %1825 = fmul float %1821, 4.000000e+00 %1826 = fadd float %1825, -1.000000e+00 %1827 = fmul float %1822, 4.000000e+00 %1828 = fadd float %1827, -1.000000e+00 %1829 = fadd float %1610, -1.000000e+00 %1830 = fadd float %1612, -1.000000e+00 %1831 = fadd float %1614, -1.000000e+00 %1832 = fmul float %1824, %1829 %1833 = fmul float %1826, %1830 %1834 = fadd float %1833, %1832 %1835 = fmul float %1828, %1831 %1836 = fadd float %1834, %1835 %1837 = call float @llvm.AMDGPU.lrp(float %1610, float %1766, float %1677) %1838 = call float @llvm.AMDGPU.lrp(float %1610, float %1789, float %1699) %1839 = call float @llvm.AMDGPU.lrp(float %1610, float %1812, float %1721) %1840 = call float @llvm.AMDGPU.lrp(float %1610, float %1836, float %1744) %1841 = call float @llvm.AMDGPU.lrp(float %1612, float %1839, float %1837) %1842 = call float @llvm.AMDGPU.lrp(float %1612, float %1840, float %1838) %1843 = call float @llvm.AMDGPU.lrp(float %1614, float %1842, float %1841) %1844 = call float @llvm.fabs.f32(float %1843) %1845 = fdiv float 1.000000e+00, %59 %1846 = fmul float %1844, %1845 %1847 = fadd float %1846, %1590 %1848 = fsub float %1847, %28 %1849 = fsub float %67, %24 %1850 = fsub float %68, %25 %1851 = fsub float %69, %26 %1852 = fmul float %1849, %1849 %1853 = fmul float %1850, %1850 %1854 = fadd float %1853, %1852 %1855 = fmul float %1851, %1851 %1856 = fadd float %1854, %1855 %1857 = call float @llvm.AMDGPU.rsq.clamped.f32(float %1856) %1858 = fmul float %1849, %1857 %1859 = fmul float %1850, %1857 %1860 = fmul float %1851, %1857 %1861 = fmul float %1858, 0x3FF19999A0000000 %1862 = fmul float %1859, 0x3FF19999A0000000 %1863 = fmul float %1860, 0x3FF19999A0000000 %1864 = fmul float %46, %46 %1865 = fmul float %47, %47 %1866 = fadd float %1865, %1864 %1867 = fmul float %48, %48 %1868 = fadd float %1866, %1867 %1869 = call float @llvm.AMDGPU.rsq.clamped.f32(float %1868) %1870 = fmul float %46, %1869 %1871 = fmul float %47, %1869 %1872 = fmul float %48, %1869 %1873 = fmul float %1861, %1870 %1874 = fmul float %1862, %1871 %1875 = fadd float %1874, %1873 %1876 = fmul float %1863, %1872 %1877 = fadd float %1875, %1876 %1878 = fmul float %1877, 5.000000e-01 %1879 = fadd float %1878, 5.000000e-01 %1880 = fmul float %1879, %1879 %1881 = fmul float %1880, %1880 %1882 = fadd float %1848, 0xBFC99999A0000000 %1883 = call float @llvm.fabs.f32(float %1882) %1884 = fsub float 1.000000e+00, %1883 %1885 = call float @llvm.AMDIL.clamp.(float %1884, float 0.000000e+00, float 1.000000e+00) %1886 = call float @llvm.AMDIL.clamp.(float %1881, float 0.000000e+00, float 1.000000e+00) %1887 = call float @llvm.AMDGPU.lrp(float %1886, float %30, float %38) %1888 = call float @llvm.AMDGPU.lrp(float %1886, float %31, float %39) %1889 = call float @llvm.AMDGPU.lrp(float %1886, float %32, float %40) %1890 = call float @llvm.AMDGPU.lrp(float %1886, float %33, float %41) %1891 = fmul float %1885, %1885 %1892 = call float @llvm.maxnum.f32(float %1891, float 0.000000e+00) %1893 = call float @llvm.AMDIL.clamp.(float %1881, float 0.000000e+00, float 1.000000e+00) %1894 = call float @llvm.AMDGPU.lrp(float %1893, float %34, float %42) %1895 = call float @llvm.AMDGPU.lrp(float %1893, float %35, float %43) %1896 = call float @llvm.AMDGPU.lrp(float %1893, float %36, float %44) %1897 = call float @llvm.AMDGPU.lrp(float %1893, float %37, float %45) %1898 = fmul float %1887, %1892 %1899 = fadd float %1898, %1894 %1900 = fmul float %1888, %1892 %1901 = fadd float %1900, %1895 %1902 = fmul float %1889, %1892 %1903 = fadd float %1902, %1896 %1904 = fmul float %1899, 0x3FD322D0E0000000 %1905 = fmul float %1901, 0x3FE2C8B440000000 %1906 = fadd float %1905, %1904 %1907 = fmul float %1903, 0x3FBD2F1AA0000000 %1908 = fadd float %1907, %1906 %1909 = call float @llvm.pow.f32(float %49, float 1.500000e+00) %1910 = call float @llvm.AMDGPU.lrp(float %1909, float %1899, float %1908) %1911 = call float @llvm.AMDGPU.lrp(float %1909, float %1901, float %1908) %1912 = call float @llvm.AMDGPU.lrp(float %1909, float %1903, float %1908) %1913 = fmul float %1910, %49 %1914 = fmul float %1911, %49 %1915 = fmul float %1912, %49 %1916 = fmul float %1848, %29 %1917 = fadd float %64, -5.000000e-01 %1918 = fadd float %65, -5.000000e-01 %1919 = fmul float %1917, %1917 %1920 = fmul float %1918, %1918 %1921 = fadd float %1919, %1920 %1922 = call float @llvm.sqrt.f32(float %1921) %1923 = fdiv float 1.000000e+00, %50 %1924 = fmul float %1922, %1923 %1925 = fsub float 1.000000e+00, %1924 %1926 = call float @llvm.AMDIL.clamp.(float %1925, float 0.000000e+00, float 1.000000e+00) %1927 = fmul float %1916, %1926 %1928 = call float @llvm.AMDIL.clamp.(float %1927, float 0.000000e+00, float 1.000000e+00) %1929 = call i32 @llvm.SI.packf16(float %1913, float %1914) %1930 = bitcast i32 %1929 to float %1931 = call i32 @llvm.SI.packf16(float %1915, float %1928) %1932 = bitcast i32 %1931 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %1930, float %1932, float %1930, float %1932) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.floor.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v6, v0, 2, 0, [m0] ; C8180200 v_interp_p2_f32 v6, [v6], v1, 2, 0, [m0] ; C8190201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400 v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x32 ; C2040132 s_buffer_load_dword s11, s[0:3], 0x34 ; C2058134 v_interp_p1_f32 v0, v0, 1, 1, [m0] ; C8000500 v_interp_p2_f32 v0, [v0], v1, 1, 1, [m0] ; C8010501 s_buffer_load_dword s10, s[0:3], 0x35 ; C2050135 s_buffer_load_dword s9, s[0:3], 0x36 ; C2048136 s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 s_buffer_load_dword s13, s[0:3], 0x30 ; C2068130 s_buffer_load_dword s14, s[0:3], 0x31 ; C2070131 s_buffer_load_dword s24, s[0:3], 0x38 ; C20C0138 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s8 ; 7E020208 s_buffer_load_dword s8, s[0:3], 0xc ; C204010C v_mac_f32_e32 v1, s12, v6 ; 3E020C0C v_mov_b32_e32 v6, s13 ; 7E0C020D v_mac_f32_e32 v6, s12, v2 ; 3E0C040C v_mov_b32_e32 v7, s14 ; 7E0E020E v_mac_f32_e32 v7, s12, v3 ; 3E0E060C v_mov_b32_e32 v10, 0x3dcccccd ; 7E1402FF 3DCCCCCD v_mul_f32_e32 v8, v10, v6 ; 10100D0A v_floor_f32_e32 v11, v8 ; 7E164908 v_mul_f32_e32 v8, v10, v7 ; 10100F0A v_floor_f32_e32 v12, v8 ; 7E184908 v_mul_f32_e32 v8, v10, v1 ; 1010030A v_floor_f32_e32 v13, v8 ; 7E1A4908 s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500 s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700 v_mov_b32_e32 v9, 0x3b000000 ; 7E1202FF 3B000000 v_mov_b32_e32 v8, 0x3b800000 ; 7E1002FF 3B800000 v_mad_f32 v14, v8, v11, v9 ; D282000E 04261708 v_mad_f32 v15, v8, v12, v9 ; D282000F 04261908 v_add_f32_e32 v17, 0, v14 ; 06221C80 v_add_f32_e32 v18, v8, v15 ; 06241F08 v_add_f32_e32 v20, v8, v14 ; 06281D08 v_add_f32_e32 v21, 0, v15 ; 062A1E80 v_mad_f32 v24, v8, v13, v9 ; D2820018 04261B08 v_mov_b32_e32 v25, v20 ; 7E320314 v_mov_b32_e32 v26, v21 ; 7E340315 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v23, 8, 0, 0, 0, 0, 0, 0, 0, v[14:15], s[16:23], s[12:15] ; F0800800 0064170E v_mov_b32_e32 v26, v18 ; 7E340312 v_add_f32_e32 v27, v8, v24 ; 06363108 image_sample v28, 8, 0, 0, 0, 0, 0, 0, 0, v[17:18], s[16:23], s[12:15] ; F0800800 00641C11 image_sample v30, 8, 0, 0, 0, 0, 0, 0, 0, v[20:21], s[16:23], s[12:15] ; F0800800 00641E14 s_waitcnt vmcnt(2) ; BF8C0772 v_mov_b32_e32 v31, v23 ; 7E3E0317 v_mov_b32_e32 v32, v24 ; 7E400318 image_sample v25, 8, 0, 0, 0, 0, 0, 0, 0, v[25:26], s[16:23], s[12:15] ; F0800800 00641919 v_mov_b32_e32 v32, v27 ; 7E40031B v_mov_b32_e32 v29, v24 ; 7E3A0318 image_sample v[33:35], 7, 0, 0, 0, 0, 0, 0, 0, v[23:24], s[16:23], s[12:15] ; F0800700 00642117 image_sample v[36:38], 7, 0, 0, 0, 0, 0, 0, 0, v[31:32], s[16:23], s[12:15] ; F0800700 0064241F s_waitcnt vmcnt(4) ; BF8C0774 image_sample v[39:41], 7, 0, 0, 0, 0, 0, 0, 0, v[28:29], s[16:23], s[12:15] ; F0800700 0064271C v_mov_b32_e32 v29, v27 ; 7E3A031B v_mov_b32_e32 v31, v24 ; 7E3E0318 image_sample v[42:44], 7, 0, 0, 0, 0, 0, 0, 0, v[28:29], s[16:23], s[12:15] ; F0800700 00642A1C s_waitcnt vmcnt(5) ; BF8C0775 image_sample v[45:47], 7, 0, 0, 0, 0, 0, 0, 0, v[30:31], s[16:23], s[12:15] ; F0800700 00642D1E v_mov_b32_e32 v31, v27 ; 7E3E031B v_mov_b32_e32 v26, v24 ; 7E340318 image_sample v[28:30], 7, 0, 0, 0, 0, 0, 0, 0, v[30:31], s[16:23], s[12:15] ; F0800700 00641C1E s_waitcnt vmcnt(6) ; BF8C0776 image_sample v[48:50], 7, 0, 0, 0, 0, 0, 0, 0, v[25:26], s[16:23], s[12:15] ; F0800700 00643019 v_mov_b32_e32 v26, v27 ; 7E34031B v_mov_b32_e32 v16, v14 ; 7E20030E image_sample v[51:53], 7, 0, 0, 0, 0, 0, 0, 0, v[25:26], s[16:23], s[12:15] ; F0800700 00643319 image_sample v23, 8, 0, 0, 0, 0, 0, 0, 0, v[15:16], s[16:23], s[12:15] ; F0800800 0064170F v_mov_b32_e32 v22, v20 ; 7E2C0314 v_mov_b32_e32 v19, v17 ; 7E260311 image_sample v14, 8, 0, 0, 0, 0, 0, 0, 0, v[21:22], s[16:23], s[12:15] ; F0800800 00640E15 image_sample v16, 8, 0, 0, 0, 0, 0, 0, 0, v[18:19], s[16:23], s[12:15] ; F0800800 00641012 v_mov_b32_e32 v19, v20 ; 7E260314 s_waitcnt vmcnt(10) ; BF8C077A v_mad_f32 v20, 4.0, v33, -1.0 ; D2820014 03CE42F6 v_mad_f32 v21, 4.0, v34, -1.0 ; D2820015 03CE44F6 v_mad_f32 v22, 4.0, v35, -1.0 ; D2820016 03CE46F6 s_waitcnt vmcnt(9) ; BF8C0779 v_mad_f32 v25, 4.0, v36, -1.0 ; D2820019 03CE48F6 v_mad_f32 v26, 4.0, v37, -1.0 ; D282001A 03CE4AF6 v_mad_f32 v31, 4.0, v38, -1.0 ; D282001F 03CE4CF6 s_waitcnt vmcnt(8) ; BF8C0778 v_mad_f32 v32, 4.0, v39, -1.0 ; D2820020 03CE4EF6 v_mad_f32 v33, 4.0, v40, -1.0 ; D2820021 03CE50F6 v_mad_f32 v34, 4.0, v41, -1.0 ; D2820022 03CE52F6 s_waitcnt vmcnt(7) ; BF8C0777 v_mad_f32 v35, 4.0, v42, -1.0 ; D2820023 03CE54F6 v_mad_f32 v36, 4.0, v43, -1.0 ; D2820024 03CE56F6 v_mad_f32 v37, 4.0, v44, -1.0 ; D2820025 03CE58F6 s_waitcnt vmcnt(6) ; BF8C0776 v_mad_f32 v38, 4.0, v45, -1.0 ; D2820026 03CE5AF6 v_mad_f32 v39, 4.0, v46, -1.0 ; D2820027 03CE5CF6 v_mad_f32 v40, 4.0, v47, -1.0 ; D2820028 03CE5EF6 s_waitcnt vmcnt(5) ; BF8C0775 v_mad_f32 v28, 4.0, v28, -1.0 ; D282001C 03CE38F6 v_mad_f32 v29, 4.0, v29, -1.0 ; D282001D 03CE3AF6 v_mad_f32 v30, 4.0, v30, -1.0 ; D282001E 03CE3CF6 s_waitcnt vmcnt(4) ; BF8C0774 v_mad_f32 v41, 4.0, v48, -1.0 ; D2820029 03CE60F6 v_mad_f32 v42, 4.0, v49, -1.0 ; D282002A 03CE62F6 v_mad_f32 v43, 4.0, v50, -1.0 ; D282002B 03CE64F6 image_sample v18, 8, 0, 0, 0, 0, 0, 0, 0, v[18:19], s[16:23], s[12:15] ; F0800800 00641212 s_waitcnt vmcnt(4) ; BF8C0774 v_mad_f32 v44, 4.0, v51, -1.0 ; D282002C 03CE66F6 v_mad_f32 v45, 4.0, v52, -1.0 ; D282002D 03CE68F6 v_mad_f32 v46, 4.0, v53, -1.0 ; D282002E 03CE6AF6 v_mov_b32_e32 v15, v24 ; 7E1E0318 v_mov_b32_e32 v17, v24 ; 7E220318 v_mov_b32_e32 v19, v24 ; 7E260318 s_waitcnt vmcnt(3) ; BF8C0773 v_mov_b32_e32 v47, v23 ; 7E5E0317 v_mov_b32_e32 v48, v24 ; 7E600318 image_sample v[48:50], 7, 0, 0, 0, 0, 0, 0, 0, v[23:24], s[16:23], s[12:15] ; F0800700 00643017 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v23, 4.0, v48, -1.0 ; D2820017 03CE60F6 v_mad_f32 v24, 4.0, v49, -1.0 ; D2820018 03CE62F6 v_mad_f32 v49, 4.0, v50, -1.0 ; D2820031 03CE64F6 v_mov_b32_e32 v48, v27 ; 7E60031B image_sample v[50:52], 7, 0, 0, 0, 0, 0, 0, 0, v[47:48], s[16:23], s[12:15] ; F0800700 0064322F s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v47, 4.0, v50, -1.0 ; D282002F 03CE64F6 v_mad_f32 v48, 4.0, v51, -1.0 ; D2820030 03CE66F6 v_mad_f32 v50, 4.0, v52, -1.0 ; D2820032 03CE68F6 image_sample v[51:53], 7, 0, 0, 0, 0, 0, 0, 0, v[14:15], s[16:23], s[12:15] ; F0800700 0064330E s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v51, 4.0, v51, -1.0 ; D2820033 03CE66F6 v_mad_f32 v52, 4.0, v52, -1.0 ; D2820034 03CE68F6 v_mad_f32 v53, 4.0, v53, -1.0 ; D2820035 03CE6AF6 v_mov_b32_e32 v15, v27 ; 7E1E031B image_sample v[54:56], 7, 0, 0, 0, 0, 0, 0, 0, v[14:15], s[16:23], s[12:15] ; F0800700 0064360E s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v14, 4.0, v54, -1.0 ; D282000E 03CE6CF6 v_mad_f32 v15, 4.0, v55, -1.0 ; D282000F 03CE6EF6 v_mad_f32 v54, 4.0, v56, -1.0 ; D2820036 03CE70F6 image_sample v[55:57], 7, 0, 0, 0, 0, 0, 0, 0, v[16:17], s[16:23], s[12:15] ; F0800700 00643710 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v55, 4.0, v55, -1.0 ; D2820037 03CE6EF6 v_mad_f32 v56, 4.0, v56, -1.0 ; D2820038 03CE70F6 v_mad_f32 v57, 4.0, v57, -1.0 ; D2820039 03CE72F6 v_mov_b32_e32 v17, v27 ; 7E22031B image_sample v[58:60], 7, 0, 0, 0, 0, 0, 0, 0, v[16:17], s[16:23], s[12:15] ; F0800700 00643A10 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v16, 4.0, v58, -1.0 ; D2820010 03CE74F6 v_mad_f32 v17, 4.0, v59, -1.0 ; D2820011 03CE76F6 v_mad_f32 v58, 4.0, v60, -1.0 ; D282003A 03CE78F6 image_sample v[59:61], 7, 0, 0, 0, 0, 0, 0, 0, v[18:19], s[16:23], s[12:15] ; F0800700 00643B12 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v59, 4.0, v59, -1.0 ; D282003B 03CE76F6 v_mad_f32 v60, 4.0, v60, -1.0 ; D282003C 03CE78F6 v_mad_f32 v61, 4.0, v61, -1.0 ; D282003D 03CE7AF6 v_mov_b32_e32 v19, v27 ; 7E26031B image_sample v[62:64], 7, 0, 0, 0, 0, 0, 0, 0, v[18:19], s[16:23], s[12:15] ; F0800700 00643E12 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v18, 4.0, v62, -1.0 ; D2820012 03CE7CF6 v_mad_f32 v19, 4.0, v63, -1.0 ; D2820013 03CE7EF6 v_mad_f32 v27, 4.0, v64, -1.0 ; D282001B 03CE80F6 v_mad_f32 v11, v6, v10, -v11 ; D282000B 842E1506 v_mad_f32 v12, v7, v10, -v12 ; D282000C 84321507 v_mad_f32 v10, v1, v10, -v13 ; D282000A 84361501 v_mul_f32_e32 v13, v11, v25 ; 101A330B v_mac_f32_e32 v13, v12, v26 ; 3E1A350C v_add_f32_e32 v25, -1.0, v10 ; 063214F3 v_mac_f32_e32 v13, v25, v31 ; 3E1A3F19 v_mul_f32_e32 v26, v11, v35 ; 1034470B v_add_f32_e32 v31, -1.0, v12 ; 063E18F3 v_mac_f32_e32 v26, v31, v36 ; 3E34491F v_mac_f32_e32 v26, v25, v37 ; 3E344B19 v_add_f32_e32 v35, -1.0, v11 ; 064616F3 v_mul_f32_e32 v28, v35, v28 ; 10383923 v_mac_f32_e32 v28, v12, v29 ; 3E383B0C v_mac_f32_e32 v28, v25, v30 ; 3E383D19 v_mul_f32_e32 v29, v35, v44 ; 103A5923 v_mac_f32_e32 v29, v31, v45 ; 3E3A5B1F v_mac_f32_e32 v29, v25, v46 ; 3E3A5D19 v_mul_f32_e32 v30, v12, v47 ; 103C5F0C v_mac_f32_e32 v30, v11, v48 ; 3E3C610B v_mac_f32_e32 v30, v25, v50 ; 3E3C6519 v_mul_f32_e32 v14, v12, v14 ; 101C1D0C v_mac_f32_e32 v14, v35, v15 ; 3E1C1F23 v_mac_f32_e32 v14, v25, v54 ; 3E1C6D19 v_mul_f32_e32 v15, v31, v16 ; 101E211F v_mac_f32_e32 v15, v11, v17 ; 3E1E230B v_mac_f32_e32 v15, v25, v58 ; 3E1E7519 v_mul_f32_e32 v16, v31, v18 ; 1020251F v_mac_f32_e32 v16, v35, v19 ; 3E202723 v_mac_f32_e32 v16, v25, v27 ; 3E203719 v_mul_f32_e32 v17, v11, v20 ; 1022290B v_mac_f32_e32 v17, v12, v21 ; 3E222B0C v_mac_f32_e32 v17, v10, v22 ; 3E222D0A v_mul_f32_e32 v18, v11, v32 ; 1024410B v_mac_f32_e32 v18, v31, v33 ; 3E24431F v_mac_f32_e32 v18, v10, v34 ; 3E24450A v_mul_f32_e32 v19, v35, v38 ; 10264D23 v_mac_f32_e32 v19, v12, v39 ; 3E264F0C v_mac_f32_e32 v19, v10, v40 ; 3E26510A v_mul_f32_e32 v20, v12, v51 ; 1028670C v_mac_f32_e32 v20, v35, v52 ; 3E286923 v_mul_f32_e32 v21, v31, v59 ; 102A771F v_mac_f32_e32 v21, v35, v60 ; 3E2A7923 v_mul_f32_e32 v22, v35, v41 ; 102C5323 v_mac_f32_e32 v22, v31, v42 ; 3E2C551F v_mac_f32_e32 v22, v10, v43 ; 3E2C570A v_sub_f32_e32 v25, 1.0, v11 ; 083216F2 v_mul_f32_e32 v17, v17, v25 ; 10223311 v_mac_f32_e32 v17, v19, v11 ; 3E221713 v_mul_f32_e32 v13, v13, v25 ; 101A330D v_mac_f32_e32 v13, v28, v11 ; 3E1A171C v_mul_f32_e32 v18, v18, v25 ; 10243312 v_mac_f32_e32 v18, v22, v11 ; 3E241716 v_mul_f32_e32 v19, v26, v25 ; 1026331A v_mac_f32_e32 v19, v29, v11 ; 3E26171D v_sub_f32_e32 v22, 1.0, v12 ; 082C18F2 v_mul_f32_e32 v17, v17, v22 ; 10222D11 v_mac_f32_e32 v17, v18, v12 ; 3E221912 v_mul_f32_e32 v13, v13, v22 ; 101A2D0D v_mac_f32_e32 v13, v19, v12 ; 3E1A1913 v_mul_f32_e32 v18, v12, v23 ; 10242F0C v_mac_f32_e32 v18, v11, v24 ; 3E24310B v_mac_f32_e32 v18, v10, v49 ; 3E24630A v_mac_f32_e32 v20, v10, v53 ; 3E286B0A v_mul_f32_e32 v19, v31, v55 ; 10266F1F v_mac_f32_e32 v19, v11, v56 ; 3E26710B v_mac_f32_e32 v19, v10, v57 ; 3E26730A v_mac_f32_e32 v21, v10, v61 ; 3E2A7B0A v_mul_f32_e32 v18, v18, v22 ; 10242D12 v_mac_f32_e32 v18, v19, v12 ; 3E241913 v_mul_f32_e32 v19, v30, v22 ; 10262D1E v_mac_f32_e32 v19, v15, v12 ; 3E26190F v_mul_f32_e32 v15, v20, v22 ; 101E2D14 v_mac_f32_e32 v15, v21, v12 ; 3E1E1915 v_mul_f32_e32 v14, v14, v22 ; 101C2D0E v_mac_f32_e32 v14, v16, v12 ; 3E1C1910 v_mul_f32_e32 v12, v18, v25 ; 10183312 v_mac_f32_e32 v12, v15, v11 ; 3E18170F v_mul_f32_e32 v15, v19, v25 ; 101E3313 v_mac_f32_e32 v15, v14, v11 ; 3E1E170E v_sub_f32_e32 v11, 1.0, v10 ; 081614F2 v_mac_f32_e32 v6, v17, v11 ; 3E0C1711 v_mac_f32_e32 v7, v12, v11 ; 3E0E170C v_mac_f32_e32 v6, v13, v10 ; 3E0C150D v_mac_f32_e32 v7, v15, v10 ; 3E0E150F v_mov_b32_e32 v10, 0x3e4ccccd ; 7E1402FF 3E4CCCCD v_add_f32_e32 v11, v10, v6 ; 06160D0A v_add_f32_e32 v12, v10, v7 ; 06180F0A v_add_f32_e32 v10, v10, v1 ; 0614030A v_floor_f32_e32 v13, v11 ; 7E1A490B v_floor_f32_e32 v14, v12 ; 7E1C490C v_mad_f32 v15, v8, v13, v9 ; D282000F 04261B08 v_mad_f32 v16, v8, v14, v9 ; D2820010 04261D08 v_floor_f32_e32 v17, v10 ; 7E22490A v_mad_f32 v19, v8, v17, v9 ; D2820013 04262308 image_sample v18, 8, 0, 0, 0, 0, 0, 0, 0, v[15:16], s[16:23], s[12:15] ; F0800800 0064120F v_add_f32_e32 v20, 0, v15 ; 06281E80 v_add_f32_e32 v21, v8, v16 ; 062A2108 v_add_f32_e32 v22, v8, v15 ; 062C1F08 v_add_f32_e32 v23, 0, v16 ; 062E2080 image_sample v15, 8, 0, 0, 0, 0, 0, 0, 0, v[20:21], s[16:23], s[12:15] ; F0800800 00640F14 image_sample v24, 8, 0, 0, 0, 0, 0, 0, 0, v[22:23], s[16:23], s[12:15] ; F0800800 00641816 v_mov_b32_e32 v23, v21 ; 7E2E0315 image_sample v20, 8, 0, 0, 0, 0, 0, 0, 0, v[22:23], s[16:23], s[12:15] ; F0800800 00641416 v_mov_b32_e32 v16, v19 ; 7E200313 v_mov_b32_e32 v25, v19 ; 7E320313 v_mov_b32_e32 v21, v19 ; 7E2A0313 v_add_f32_e32 v22, v8, v19 ; 062C2708 s_waitcnt vmcnt(3) ; BF8C0773 v_mov_b32_e32 v26, v18 ; 7E340312 v_mov_b32_e32 v27, v19 ; 7E360313 image_sample v[27:29], 7, 0, 0, 0, 0, 0, 0, 0, v[18:19], s[16:23], s[12:15] ; F0800700 00641B12 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v18, 4.0, v27, -1.0 ; D2820012 03CE36F6 v_mad_f32 v19, 4.0, v28, -1.0 ; D2820013 03CE38F6 v_mad_f32 v23, 4.0, v29, -1.0 ; D2820017 03CE3AF6 v_mov_b32_e32 v27, v22 ; 7E360316 image_sample v[26:28], 7, 0, 0, 0, 0, 0, 0, 0, v[26:27], s[16:23], s[12:15] ; F0800700 00641A1A s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v26, 4.0, v26, -1.0 ; D282001A 03CE34F6 v_mad_f32 v27, 4.0, v27, -1.0 ; D282001B 03CE36F6 v_mad_f32 v28, 4.0, v28, -1.0 ; D282001C 03CE38F6 image_sample v[29:31], 7, 0, 0, 0, 0, 0, 0, 0, v[15:16], s[16:23], s[12:15] ; F0800700 00641D0F s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v29, 4.0, v29, -1.0 ; D282001D 03CE3AF6 v_mad_f32 v30, 4.0, v30, -1.0 ; D282001E 03CE3CF6 v_mad_f32 v31, 4.0, v31, -1.0 ; D282001F 03CE3EF6 v_mov_b32_e32 v16, v22 ; 7E200316 image_sample v[32:34], 7, 0, 0, 0, 0, 0, 0, 0, v[15:16], s[16:23], s[12:15] ; F0800700 0064200F s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v15, 4.0, v32, -1.0 ; D282000F 03CE40F6 v_mad_f32 v16, 4.0, v33, -1.0 ; D2820010 03CE42F6 v_mad_f32 v32, 4.0, v34, -1.0 ; D2820020 03CE44F6 image_sample v[33:35], 7, 0, 0, 0, 0, 0, 0, 0, v[24:25], s[16:23], s[12:15] ; F0800700 00642118 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v33, 4.0, v33, -1.0 ; D2820021 03CE42F6 v_mad_f32 v34, 4.0, v34, -1.0 ; D2820022 03CE44F6 v_mad_f32 v35, 4.0, v35, -1.0 ; D2820023 03CE46F6 v_mov_b32_e32 v25, v22 ; 7E320316 image_sample v[36:38], 7, 0, 0, 0, 0, 0, 0, 0, v[24:25], s[16:23], s[12:15] ; F0800700 00642418 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v24, 4.0, v36, -1.0 ; D2820018 03CE48F6 v_mad_f32 v25, 4.0, v37, -1.0 ; D2820019 03CE4AF6 v_mad_f32 v36, 4.0, v38, -1.0 ; D2820024 03CE4CF6 image_sample v[37:39], 7, 0, 0, 0, 0, 0, 0, 0, v[20:21], s[16:23], s[12:15] ; F0800700 00642514 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v37, 4.0, v37, -1.0 ; D2820025 03CE4AF6 v_mad_f32 v38, 4.0, v38, -1.0 ; D2820026 03CE4CF6 v_mad_f32 v39, 4.0, v39, -1.0 ; D2820027 03CE4EF6 v_mov_b32_e32 v21, v22 ; 7E2A0316 image_sample v[20:22], 7, 0, 0, 0, 0, 0, 0, 0, v[20:21], s[16:23], s[12:15] ; F0800700 00641414 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v20, 4.0, v20, -1.0 ; D2820014 03CE28F6 v_mad_f32 v21, 4.0, v21, -1.0 ; D2820015 03CE2AF6 v_mad_f32 v22, 4.0, v22, -1.0 ; D2820016 03CE2CF6 v_subrev_f32_e32 v11, v13, v11 ; 0A16170D v_subrev_f32_e32 v12, v14, v12 ; 0A18190E v_mul_f32_e32 v13, v11, v29 ; 101A3B0B v_add_f32_e32 v14, -1.0, v12 ; 061C18F3 v_mac_f32_e32 v13, v14, v30 ; 3E1A3D0E v_mul_f32_e32 v15, v11, v15 ; 101E1F0B v_mac_f32_e32 v15, v14, v16 ; 3E1E210E v_add_f32_e32 v16, -1.0, v11 ; 062016F3 v_mul_f32_e32 v29, v16, v37 ; 103A4B10 v_mac_f32_e32 v29, v14, v38 ; 3E3A4D0E v_mul_f32_e32 v20, v16, v20 ; 10282910 v_mac_f32_e32 v20, v14, v21 ; 3E282B0E v_subrev_f32_e32 v10, v17, v10 ; 0A141511 v_mul_f32_e32 v14, v11, v26 ; 101C350B v_mac_f32_e32 v14, v12, v27 ; 3E1C370C v_add_f32_e32 v17, -1.0, v10 ; 062214F3 v_mac_f32_e32 v14, v17, v28 ; 3E1C3911 v_mac_f32_e32 v15, v17, v32 ; 3E1E4111 v_mul_f32_e32 v21, v16, v24 ; 102A3110 v_mac_f32_e32 v21, v12, v25 ; 3E2A330C v_mac_f32_e32 v21, v17, v36 ; 3E2A4911 v_mac_f32_e32 v20, v17, v22 ; 3E282D11 v_mul_f32_e32 v17, v11, v18 ; 1022250B v_mac_f32_e32 v17, v12, v19 ; 3E22270C v_mac_f32_e32 v17, v10, v23 ; 3E222F0A v_mac_f32_e32 v13, v10, v31 ; 3E1A3F0A v_mul_f32_e32 v16, v16, v33 ; 10204310 v_mac_f32_e32 v16, v12, v34 ; 3E20450C v_mac_f32_e32 v16, v10, v35 ; 3E20470A v_mac_f32_e32 v29, v10, v39 ; 3E3A4F0A v_sub_f32_e32 v18, 1.0, v11 ; 082416F2 v_mul_f32_e32 v17, v17, v18 ; 10222511 v_mac_f32_e32 v17, v16, v11 ; 3E221710 v_mul_f32_e32 v14, v14, v18 ; 101C250E v_mac_f32_e32 v14, v21, v11 ; 3E1C1715 v_mul_f32_e32 v13, v13, v18 ; 101A250D v_mac_f32_e32 v13, v29, v11 ; 3E1A171D v_mul_f32_e32 v15, v15, v18 ; 101E250F v_mac_f32_e32 v15, v20, v11 ; 3E1E1714 v_sub_f32_e32 v11, 1.0, v12 ; 081618F2 v_mul_f32_e32 v16, v17, v11 ; 10201711 v_mac_f32_e32 v16, v13, v12 ; 3E20190D v_mul_f32_e32 v11, v14, v11 ; 1016170E v_mac_f32_e32 v11, v15, v12 ; 3E16190F v_sub_f32_e32 v12, 1.0, v10 ; 081814F2 v_mul_f32_e32 v12, v16, v12 ; 10181910 v_mac_f32_e32 v12, v11, v10 ; 3E18150B v_mov_b32_e32 v10, 0x3f8ccccd ; 7E1402FF 3F8CCCCD v_mad_f32 v11, v12, v10, 1.0 ; D282000B 03CA150C v_mov_b32_e32 v12, 0x3e428f5c ; 7E1802FF 3E428F5C v_mad_f32 v13, v12, v11, v6 ; D282000D 041A170C v_mad_f32 v14, v12, v11, v7 ; D282000E 041E170C v_mad_f32 v12, v12, v11, v1 ; D282000C 0406170C v_mov_b32_e32 v15, 0x40533333 ; 7E1E02FF 40533333 v_mul_f32_e32 v16, v15, v13 ; 10201B0F v_floor_f32_e32 v16, v16 ; 7E204910 v_mul_f32_e32 v17, v15, v14 ; 10221D0F v_floor_f32_e32 v17, v17 ; 7E224911 v_mul_f32_e32 v18, v15, v12 ; 1024190F v_floor_f32_e32 v18, v18 ; 7E244912 v_mad_f32 v19, v8, v16, v9 ; D2820013 04262108 v_mad_f32 v20, v8, v17, v9 ; D2820014 04262308 v_mad_f32 v22, v8, v18, v9 ; D2820016 04262508 image_sample v21, 8, 0, 0, 0, 0, 0, 0, 0, v[19:20], s[16:23], s[12:15] ; F0800800 00641513 v_add_f32_e32 v23, 0, v19 ; 062E2680 v_add_f32_e32 v24, v8, v20 ; 06302908 v_add_f32_e32 v25, v8, v19 ; 06322708 v_add_f32_e32 v26, 0, v20 ; 06342880 image_sample v19, 8, 0, 0, 0, 0, 0, 0, 0, v[23:24], s[16:23], s[12:15] ; F0800800 00641317 image_sample v27, 8, 0, 0, 0, 0, 0, 0, 0, v[25:26], s[16:23], s[12:15] ; F0800800 00641B19 v_mov_b32_e32 v26, v24 ; 7E340318 v_mad_f32 v13, v13, v15, -v16 ; D282000D 84421F0D v_mad_f32 v14, v14, v15, -v17 ; D282000E 84461F0E image_sample v16, 8, 0, 0, 0, 0, 0, 0, 0, v[25:26], s[16:23], s[12:15] ; F0800800 00641019 v_mad_f32 v12, v12, v15, -v18 ; D282000C 844A1F0C v_mov_b32_e32 v20, v22 ; 7E280316 v_mov_b32_e32 v28, v22 ; 7E380316 v_mov_b32_e32 v17, v22 ; 7E220316 v_add_f32_e32 v15, v8, v22 ; 061E2D08 s_waitcnt vmcnt(3) ; BF8C0773 v_mov_b32_e32 v23, v21 ; 7E2E0315 v_mov_b32_e32 v24, v22 ; 7E300316 image_sample v[24:26], 7, 0, 0, 0, 0, 0, 0, 0, v[21:22], s[16:23], s[12:15] ; F0800700 00641815 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v18, 4.0, v24, -1.0 ; D2820012 03CE30F6 v_mad_f32 v21, 4.0, v25, -1.0 ; D2820015 03CE32F6 v_mad_f32 v22, 4.0, v26, -1.0 ; D2820016 03CE34F6 v_mov_b32_e32 v24, v15 ; 7E30030F image_sample v[23:25], 7, 0, 0, 0, 0, 0, 0, 0, v[23:24], s[16:23], s[12:15] ; F0800700 00641717 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v23, 4.0, v23, -1.0 ; D2820017 03CE2EF6 v_mad_f32 v24, 4.0, v24, -1.0 ; D2820018 03CE30F6 v_mad_f32 v25, 4.0, v25, -1.0 ; D2820019 03CE32F6 image_sample v[29:31], 7, 0, 0, 0, 0, 0, 0, 0, v[19:20], s[16:23], s[12:15] ; F0800700 00641D13 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v26, 4.0, v29, -1.0 ; D282001A 03CE3AF6 v_mad_f32 v29, 4.0, v30, -1.0 ; D282001D 03CE3CF6 v_mad_f32 v30, 4.0, v31, -1.0 ; D282001E 03CE3EF6 v_mov_b32_e32 v20, v15 ; 7E28030F image_sample v[31:33], 7, 0, 0, 0, 0, 0, 0, 0, v[19:20], s[16:23], s[12:15] ; F0800700 00641F13 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v19, 4.0, v31, -1.0 ; D2820013 03CE3EF6 v_mad_f32 v20, 4.0, v32, -1.0 ; D2820014 03CE40F6 v_mad_f32 v31, 4.0, v33, -1.0 ; D282001F 03CE42F6 image_sample v[32:34], 7, 0, 0, 0, 0, 0, 0, 0, v[27:28], s[16:23], s[12:15] ; F0800700 0064201B s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v32, 4.0, v32, -1.0 ; D2820020 03CE40F6 v_mad_f32 v33, 4.0, v33, -1.0 ; D2820021 03CE42F6 v_mad_f32 v34, 4.0, v34, -1.0 ; D2820022 03CE44F6 v_mov_b32_e32 v28, v15 ; 7E38030F image_sample v[35:37], 7, 0, 0, 0, 0, 0, 0, 0, v[27:28], s[16:23], s[12:15] ; F0800700 0064231B s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v27, 4.0, v35, -1.0 ; D282001B 03CE46F6 v_mad_f32 v28, 4.0, v36, -1.0 ; D282001C 03CE48F6 v_mad_f32 v35, 4.0, v37, -1.0 ; D2820023 03CE4AF6 image_sample v[36:38], 7, 0, 0, 0, 0, 0, 0, 0, v[16:17], s[16:23], s[12:15] ; F0800700 00642410 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v36, 4.0, v36, -1.0 ; D2820024 03CE48F6 v_mad_f32 v37, 4.0, v37, -1.0 ; D2820025 03CE4AF6 v_mad_f32 v38, 4.0, v38, -1.0 ; D2820026 03CE4CF6 v_mov_b32_e32 v17, v15 ; 7E22030F image_sample v[15:17], 7, 0, 0, 0, 0, 0, 0, 0, v[16:17], s[16:23], s[12:15] ; F0800700 00640F10 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v15, 4.0, v15, -1.0 ; D282000F 03CE1EF6 v_mad_f32 v16, 4.0, v16, -1.0 ; D2820010 03CE20F6 v_mad_f32 v17, 4.0, v17, -1.0 ; D2820011 03CE22F6 v_mul_f32_e32 v26, v13, v26 ; 1034350D v_add_f32_e32 v39, -1.0, v14 ; 064E1CF3 v_mac_f32_e32 v26, v39, v29 ; 3E343B27 v_mul_f32_e32 v19, v13, v19 ; 1026270D v_mac_f32_e32 v19, v39, v20 ; 3E262927 v_add_f32_e32 v20, -1.0, v13 ; 06281AF3 v_mul_f32_e32 v29, v20, v36 ; 103A4914 v_mac_f32_e32 v29, v39, v37 ; 3E3A4B27 v_mul_f32_e32 v15, v20, v15 ; 101E1F14 v_mac_f32_e32 v15, v39, v16 ; 3E1E2127 v_mul_f32_e32 v16, v13, v23 ; 10202F0D v_mac_f32_e32 v16, v14, v24 ; 3E20310E v_add_f32_e32 v23, -1.0, v12 ; 062E18F3 v_mac_f32_e32 v16, v23, v25 ; 3E203317 v_mac_f32_e32 v19, v23, v31 ; 3E263F17 v_mul_f32_e32 v24, v20, v27 ; 10303714 v_mac_f32_e32 v24, v14, v28 ; 3E30390E v_mac_f32_e32 v24, v23, v35 ; 3E304717 v_mac_f32_e32 v15, v23, v17 ; 3E1E2317 v_mul_f32_e32 v17, v13, v18 ; 1022250D v_mac_f32_e32 v17, v14, v21 ; 3E222B0E v_mac_f32_e32 v17, v12, v22 ; 3E222D0C v_mac_f32_e32 v26, v12, v30 ; 3E343D0C v_mul_f32_e32 v18, v20, v32 ; 10244114 v_mac_f32_e32 v18, v14, v33 ; 3E24430E v_mac_f32_e32 v18, v12, v34 ; 3E24450C v_mac_f32_e32 v29, v12, v38 ; 3E3A4D0C v_sub_f32_e32 v20, 1.0, v13 ; 08281AF2 v_mul_f32_e32 v17, v17, v20 ; 10222911 v_mac_f32_e32 v17, v18, v13 ; 3E221B12 v_mul_f32_e32 v16, v16, v20 ; 10202910 v_mac_f32_e32 v16, v24, v13 ; 3E201B18 v_mul_f32_e32 v18, v26, v20 ; 1024291A v_mac_f32_e32 v18, v29, v13 ; 3E241B1D v_mul_f32_e32 v19, v19, v20 ; 10262913 v_mac_f32_e32 v19, v15, v13 ; 3E261B0F v_sub_f32_e32 v13, 1.0, v14 ; 081A1CF2 v_mul_f32_e32 v15, v17, v13 ; 101E1B11 v_mac_f32_e32 v15, v18, v14 ; 3E1E1D12 v_mul_f32_e32 v13, v16, v13 ; 101A1B10 v_mac_f32_e32 v13, v19, v14 ; 3E1A1D13 v_sub_f32_e32 v14, 1.0, v12 ; 081C18F2 v_mul_f32_e32 v14, v15, v14 ; 101C1D0F v_mac_f32_e32 v14, v13, v12 ; 3E1C190D v_mov_b32_e32 v12, 0x3e9b26ca ; 7E1802FF 3E9B26CA v_mad_f32 v11, |v14|, v12, v11 ; D282010B 042E190E v_mov_b32_e32 v12, 0x3e570a3d ; 7E1802FF 3E570A3D v_mad_f32 v13, v12, v11, v6 ; D282000D 041A170C v_mad_f32 v14, v12, v11, v7 ; D282000E 041E170C v_mad_f32 v12, v12, v11, v1 ; D282000C 0406170C v_mul_f32_e32 v15, s11, v13 ; 101E1A0B v_floor_f32_e32 v15, v15 ; 7E1E490F v_mul_f32_e32 v16, s11, v14 ; 10201C0B v_floor_f32_e32 v16, v16 ; 7E204910 v_mul_f32_e32 v17, s11, v12 ; 1022180B v_floor_f32_e32 v17, v17 ; 7E224911 v_mad_f32 v18, v8, v15, v9 ; D2820012 04261F08 v_mad_f32 v19, v8, v16, v9 ; D2820013 04262108 v_mad_f32 v21, v8, v17, v9 ; D2820015 04262308 image_sample v20, 8, 0, 0, 0, 0, 0, 0, 0, v[18:19], s[16:23], s[12:15] ; F0800800 00641412 v_add_f32_e32 v22, 0, v18 ; 062C2480 v_add_f32_e32 v23, v8, v19 ; 062E2708 v_add_f32_e32 v24, v8, v18 ; 06302508 v_add_f32_e32 v25, 0, v19 ; 06322680 image_sample v18, 8, 0, 0, 0, 0, 0, 0, 0, v[22:23], s[16:23], s[12:15] ; F0800800 00641216 image_sample v26, 8, 0, 0, 0, 0, 0, 0, 0, v[24:25], s[16:23], s[12:15] ; F0800800 00641A18 v_mov_b32_e32 v25, v23 ; 7E320317 image_sample v22, 8, 0, 0, 0, 0, 0, 0, 0, v[24:25], s[16:23], s[12:15] ; F0800800 00641618 v_mov_b32_e32 v19, v21 ; 7E260315 v_mov_b32_e32 v27, v21 ; 7E360315 v_mov_b32_e32 v23, v21 ; 7E2E0315 v_add_f32_e32 v24, v8, v21 ; 06302B08 s_waitcnt vmcnt(3) ; BF8C0773 v_mov_b32_e32 v28, v20 ; 7E380314 v_mov_b32_e32 v29, v21 ; 7E3A0315 image_sample v[29:31], 7, 0, 0, 0, 0, 0, 0, 0, v[20:21], s[16:23], s[12:15] ; F0800700 00641D14 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v20, 4.0, v29, -1.0 ; D2820014 03CE3AF6 v_mad_f32 v21, 4.0, v30, -1.0 ; D2820015 03CE3CF6 v_mad_f32 v25, 4.0, v31, -1.0 ; D2820019 03CE3EF6 v_mov_b32_e32 v29, v24 ; 7E3A0318 image_sample v[28:30], 7, 0, 0, 0, 0, 0, 0, 0, v[28:29], s[16:23], s[12:15] ; F0800700 00641C1C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v28, 4.0, v28, -1.0 ; D282001C 03CE38F6 v_mad_f32 v29, 4.0, v29, -1.0 ; D282001D 03CE3AF6 v_mad_f32 v30, 4.0, v30, -1.0 ; D282001E 03CE3CF6 image_sample v[31:33], 7, 0, 0, 0, 0, 0, 0, 0, v[18:19], s[16:23], s[12:15] ; F0800700 00641F12 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v31, 4.0, v31, -1.0 ; D282001F 03CE3EF6 v_mad_f32 v32, 4.0, v32, -1.0 ; D2820020 03CE40F6 v_mad_f32 v33, 4.0, v33, -1.0 ; D2820021 03CE42F6 v_mov_b32_e32 v19, v24 ; 7E260318 image_sample v[34:36], 7, 0, 0, 0, 0, 0, 0, 0, v[18:19], s[16:23], s[12:15] ; F0800700 00642212 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v18, 4.0, v34, -1.0 ; D2820012 03CE44F6 v_mad_f32 v19, 4.0, v35, -1.0 ; D2820013 03CE46F6 v_mad_f32 v34, 4.0, v36, -1.0 ; D2820022 03CE48F6 image_sample v[35:37], 7, 0, 0, 0, 0, 0, 0, 0, v[26:27], s[16:23], s[12:15] ; F0800700 0064231A s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v35, 4.0, v35, -1.0 ; D2820023 03CE46F6 v_mad_f32 v36, 4.0, v36, -1.0 ; D2820024 03CE48F6 v_mad_f32 v37, 4.0, v37, -1.0 ; D2820025 03CE4AF6 v_mov_b32_e32 v27, v24 ; 7E360318 image_sample v[38:40], 7, 0, 0, 0, 0, 0, 0, 0, v[26:27], s[16:23], s[12:15] ; F0800700 0064261A s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v26, 4.0, v38, -1.0 ; D282001A 03CE4CF6 v_mad_f32 v27, 4.0, v39, -1.0 ; D282001B 03CE4EF6 v_mad_f32 v38, 4.0, v40, -1.0 ; D2820026 03CE50F6 image_sample v[39:41], 7, 0, 0, 0, 0, 0, 0, 0, v[22:23], s[16:23], s[12:15] ; F0800700 00642716 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v39, 4.0, v39, -1.0 ; D2820027 03CE4EF6 v_mad_f32 v40, 4.0, v40, -1.0 ; D2820028 03CE50F6 v_mad_f32 v41, 4.0, v41, -1.0 ; D2820029 03CE52F6 v_mov_b32_e32 v23, v24 ; 7E2E0318 image_sample v[22:24], 7, 0, 0, 0, 0, 0, 0, 0, v[22:23], s[16:23], s[12:15] ; F0800700 00641616 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v22, 4.0, v22, -1.0 ; D2820016 03CE2CF6 v_mad_f32 v23, 4.0, v23, -1.0 ; D2820017 03CE2EF6 v_mad_f32 v24, 4.0, v24, -1.0 ; D2820018 03CE30F6 v_mad_f32 v13, v13, s11, -v15 ; D282000D 843C170D v_mad_f32 v14, v14, s11, -v16 ; D282000E 8440170E v_mul_f32_e32 v15, v13, v31 ; 101E3F0D v_add_f32_e32 v16, -1.0, v14 ; 06201CF3 v_mac_f32_e32 v15, v16, v32 ; 3E1E4110 v_mul_f32_e32 v18, v13, v18 ; 1024250D v_mac_f32_e32 v18, v16, v19 ; 3E242710 v_add_f32_e32 v19, -1.0, v13 ; 06261AF3 v_mul_f32_e32 v31, v19, v39 ; 103E4F13 v_mac_f32_e32 v31, v16, v40 ; 3E3E5110 v_mul_f32_e32 v22, v19, v22 ; 102C2D13 v_mac_f32_e32 v22, v16, v23 ; 3E2C2F10 v_mad_f32 v12, v12, s11, -v17 ; D282000C 8444170C v_mul_f32_e32 v16, v13, v28 ; 1020390D v_mac_f32_e32 v16, v14, v29 ; 3E203B0E v_add_f32_e32 v17, -1.0, v12 ; 062218F3 v_mac_f32_e32 v16, v17, v30 ; 3E203D11 v_mac_f32_e32 v18, v17, v34 ; 3E244511 v_mul_f32_e32 v23, v19, v26 ; 102E3513 v_mac_f32_e32 v23, v14, v27 ; 3E2E370E v_mac_f32_e32 v23, v17, v38 ; 3E2E4D11 v_mac_f32_e32 v22, v17, v24 ; 3E2C3111 v_mul_f32_e32 v17, v13, v20 ; 1022290D v_mac_f32_e32 v17, v14, v21 ; 3E222B0E v_mac_f32_e32 v17, v12, v25 ; 3E22330C v_mac_f32_e32 v15, v12, v33 ; 3E1E430C v_mul_f32_e32 v19, v19, v35 ; 10264713 v_mac_f32_e32 v19, v14, v36 ; 3E26490E v_mac_f32_e32 v19, v12, v37 ; 3E264B0C v_mac_f32_e32 v31, v12, v41 ; 3E3E530C v_sub_f32_e32 v20, 1.0, v13 ; 08281AF2 v_mul_f32_e32 v17, v17, v20 ; 10222911 v_mac_f32_e32 v17, v19, v13 ; 3E221B13 v_mul_f32_e32 v16, v16, v20 ; 10202910 v_mac_f32_e32 v16, v23, v13 ; 3E201B17 v_mul_f32_e32 v15, v15, v20 ; 101E290F v_mac_f32_e32 v15, v31, v13 ; 3E1E1B1F v_mul_f32_e32 v18, v18, v20 ; 10242912 v_mac_f32_e32 v18, v22, v13 ; 3E241B16 v_sub_f32_e32 v13, 1.0, v14 ; 081A1CF2 v_mul_f32_e32 v17, v17, v13 ; 10221B11 v_mac_f32_e32 v17, v15, v14 ; 3E221D0F v_mul_f32_e32 v13, v16, v13 ; 101A1B10 v_mac_f32_e32 v13, v18, v14 ; 3E1A1D12 v_rcp_f32_e32 v14, s24 ; 7E1C5418 v_sub_f32_e32 v15, 1.0, v12 ; 081E18F2 v_mul_f32_e32 v15, v17, v15 ; 101E1F11 v_mac_f32_e32 v15, v13, v12 ; 3E1E190D v_mad_f32 v11, |v15|, v14, v11 ; D282010B 042E1D0F v_mov_b32_e32 v12, 0x3e6b851f ; 7E1802FF 3E6B851F v_mad_f32 v13, v12, v11, v6 ; D282000D 041A170C v_mad_f32 v14, v12, v11, v7 ; D282000E 041E170C v_mad_f32 v12, v12, v11, v1 ; D282000C 0406170C v_mul_f32_e32 v15, s10, v13 ; 101E1A0A v_floor_f32_e32 v15, v15 ; 7E1E490F v_mul_f32_e32 v16, s10, v14 ; 10201C0A v_floor_f32_e32 v16, v16 ; 7E204910 v_mul_f32_e32 v17, s10, v12 ; 1022180A v_floor_f32_e32 v17, v17 ; 7E224911 v_mad_f32 v18, v8, v15, v9 ; D2820012 04261F08 v_mad_f32 v19, v8, v16, v9 ; D2820013 04262108 v_mad_f32 v21, v8, v17, v9 ; D2820015 04262308 image_sample v20, 8, 0, 0, 0, 0, 0, 0, 0, v[18:19], s[16:23], s[12:15] ; F0800800 00641412 v_add_f32_e32 v22, 0, v18 ; 062C2480 v_add_f32_e32 v23, v8, v19 ; 062E2708 v_add_f32_e32 v24, v8, v18 ; 06302508 v_add_f32_e32 v25, 0, v19 ; 06322680 image_sample v18, 8, 0, 0, 0, 0, 0, 0, 0, v[22:23], s[16:23], s[12:15] ; F0800800 00641216 image_sample v26, 8, 0, 0, 0, 0, 0, 0, 0, v[24:25], s[16:23], s[12:15] ; F0800800 00641A18 v_mov_b32_e32 v25, v23 ; 7E320317 image_sample v22, 8, 0, 0, 0, 0, 0, 0, 0, v[24:25], s[16:23], s[12:15] ; F0800800 00641618 v_mov_b32_e32 v19, v21 ; 7E260315 v_mov_b32_e32 v27, v21 ; 7E360315 v_mov_b32_e32 v23, v21 ; 7E2E0315 v_add_f32_e32 v24, v8, v21 ; 06302B08 s_waitcnt vmcnt(3) ; BF8C0773 v_mov_b32_e32 v28, v20 ; 7E380314 v_mov_b32_e32 v29, v21 ; 7E3A0315 image_sample v[29:31], 7, 0, 0, 0, 0, 0, 0, 0, v[20:21], s[16:23], s[12:15] ; F0800700 00641D14 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v20, 4.0, v29, -1.0 ; D2820014 03CE3AF6 v_mad_f32 v21, 4.0, v30, -1.0 ; D2820015 03CE3CF6 v_mad_f32 v25, 4.0, v31, -1.0 ; D2820019 03CE3EF6 v_mov_b32_e32 v29, v24 ; 7E3A0318 image_sample v[28:30], 7, 0, 0, 0, 0, 0, 0, 0, v[28:29], s[16:23], s[12:15] ; F0800700 00641C1C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v28, 4.0, v28, -1.0 ; D282001C 03CE38F6 v_mad_f32 v29, 4.0, v29, -1.0 ; D282001D 03CE3AF6 v_mad_f32 v30, 4.0, v30, -1.0 ; D282001E 03CE3CF6 image_sample v[31:33], 7, 0, 0, 0, 0, 0, 0, 0, v[18:19], s[16:23], s[12:15] ; F0800700 00641F12 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v31, 4.0, v31, -1.0 ; D282001F 03CE3EF6 v_mad_f32 v32, 4.0, v32, -1.0 ; D2820020 03CE40F6 v_mad_f32 v33, 4.0, v33, -1.0 ; D2820021 03CE42F6 v_mov_b32_e32 v19, v24 ; 7E260318 image_sample v[34:36], 7, 0, 0, 0, 0, 0, 0, 0, v[18:19], s[16:23], s[12:15] ; F0800700 00642212 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v18, 4.0, v34, -1.0 ; D2820012 03CE44F6 v_mad_f32 v19, 4.0, v35, -1.0 ; D2820013 03CE46F6 v_mad_f32 v34, 4.0, v36, -1.0 ; D2820022 03CE48F6 image_sample v[35:37], 7, 0, 0, 0, 0, 0, 0, 0, v[26:27], s[16:23], s[12:15] ; F0800700 0064231A s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v35, 4.0, v35, -1.0 ; D2820023 03CE46F6 v_mad_f32 v36, 4.0, v36, -1.0 ; D2820024 03CE48F6 v_mad_f32 v37, 4.0, v37, -1.0 ; D2820025 03CE4AF6 v_mov_b32_e32 v27, v24 ; 7E360318 image_sample v[38:40], 7, 0, 0, 0, 0, 0, 0, 0, v[26:27], s[16:23], s[12:15] ; F0800700 0064261A s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v26, 4.0, v38, -1.0 ; D282001A 03CE4CF6 v_mad_f32 v27, 4.0, v39, -1.0 ; D282001B 03CE4EF6 v_mad_f32 v38, 4.0, v40, -1.0 ; D2820026 03CE50F6 image_sample v[39:41], 7, 0, 0, 0, 0, 0, 0, 0, v[22:23], s[16:23], s[12:15] ; F0800700 00642716 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v39, 4.0, v39, -1.0 ; D2820027 03CE4EF6 v_mad_f32 v40, 4.0, v40, -1.0 ; D2820028 03CE50F6 v_mad_f32 v41, 4.0, v41, -1.0 ; D2820029 03CE52F6 v_mov_b32_e32 v23, v24 ; 7E2E0318 image_sample v[22:24], 7, 0, 0, 0, 0, 0, 0, 0, v[22:23], s[16:23], s[12:15] ; F0800700 00641616 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v22, 4.0, v22, -1.0 ; D2820016 03CE2CF6 v_mad_f32 v23, 4.0, v23, -1.0 ; D2820017 03CE2EF6 v_mad_f32 v24, 4.0, v24, -1.0 ; D2820018 03CE30F6 v_mad_f32 v13, v13, s10, -v15 ; D282000D 843C150D v_mad_f32 v14, v14, s10, -v16 ; D282000E 8440150E v_mul_f32_e32 v15, v13, v31 ; 101E3F0D v_add_f32_e32 v16, -1.0, v14 ; 06201CF3 v_mac_f32_e32 v15, v16, v32 ; 3E1E4110 v_mul_f32_e32 v18, v13, v18 ; 1024250D v_mac_f32_e32 v18, v16, v19 ; 3E242710 v_add_f32_e32 v19, -1.0, v13 ; 06261AF3 v_mul_f32_e32 v31, v19, v39 ; 103E4F13 v_mac_f32_e32 v31, v16, v40 ; 3E3E5110 v_mul_f32_e32 v22, v19, v22 ; 102C2D13 v_mac_f32_e32 v22, v16, v23 ; 3E2C2F10 v_mad_f32 v12, v12, s10, -v17 ; D282000C 8444150C v_mul_f32_e32 v16, v13, v28 ; 1020390D v_mac_f32_e32 v16, v14, v29 ; 3E203B0E v_add_f32_e32 v17, -1.0, v12 ; 062218F3 v_mac_f32_e32 v16, v17, v30 ; 3E203D11 v_mac_f32_e32 v18, v17, v34 ; 3E244511 v_mul_f32_e32 v23, v19, v26 ; 102E3513 v_mac_f32_e32 v23, v14, v27 ; 3E2E370E v_mac_f32_e32 v23, v17, v38 ; 3E2E4D11 v_mac_f32_e32 v22, v17, v24 ; 3E2C3111 v_mul_f32_e32 v17, v13, v20 ; 1022290D v_mac_f32_e32 v17, v14, v21 ; 3E222B0E v_mac_f32_e32 v17, v12, v25 ; 3E22330C v_mac_f32_e32 v15, v12, v33 ; 3E1E430C v_mul_f32_e32 v19, v19, v35 ; 10264713 v_mac_f32_e32 v19, v14, v36 ; 3E26490E v_mac_f32_e32 v19, v12, v37 ; 3E264B0C v_mac_f32_e32 v31, v12, v41 ; 3E3E530C v_sub_f32_e32 v20, 1.0, v13 ; 08281AF2 v_mul_f32_e32 v17, v17, v20 ; 10222911 v_mac_f32_e32 v17, v19, v13 ; 3E221B13 v_mul_f32_e32 v16, v16, v20 ; 10202910 v_mac_f32_e32 v16, v23, v13 ; 3E201B17 v_mul_f32_e32 v15, v15, v20 ; 101E290F s_buffer_load_dword s4, s[0:3], 0x39 ; C2020139 v_mac_f32_e32 v15, v31, v13 ; 3E1E1B1F v_mul_f32_e32 v18, v18, v20 ; 10242912 v_mac_f32_e32 v18, v22, v13 ; 3E241B16 v_sub_f32_e32 v13, 1.0, v14 ; 081A1CF2 v_mul_f32_e32 v17, v17, v13 ; 10221B11 v_mac_f32_e32 v17, v15, v14 ; 3E221D0F v_mul_f32_e32 v13, v16, v13 ; 101A1B10 v_mac_f32_e32 v13, v18, v14 ; 3E1A1D12 s_buffer_load_dword s5, s[0:3], 0x3a ; C202813A s_waitcnt lgkmcnt(0) ; BF8C007F v_rcp_f32_e32 v14, s4 ; 7E1C5404 v_sub_f32_e32 v15, 1.0, v12 ; 081E18F2 v_mul_f32_e32 v15, v17, v15 ; 101E1F11 v_mac_f32_e32 v15, v13, v12 ; 3E1E190D v_mad_f32 v11, |v15|, v14, v11 ; D282010B 042E1D0F v_mov_b32_e32 v12, 0x3e800000 ; 7E1802FF 3E800000 v_mac_f32_e32 v6, v12, v11 ; 3E0C170C v_mac_f32_e32 v7, v12, v11 ; 3E0E170C v_mac_f32_e32 v1, v12, v11 ; 3E02170C v_mul_f32_e32 v12, s9, v6 ; 10180C09 v_floor_f32_e32 v12, v12 ; 7E18490C v_mul_f32_e32 v13, s9, v7 ; 101A0E09 v_floor_f32_e32 v13, v13 ; 7E1A490D v_mul_f32_e32 v14, s9, v1 ; 101C0209 v_floor_f32_e32 v14, v14 ; 7E1C490E v_mad_f32 v15, v8, v12, v9 ; D282000F 04261908 v_mad_f32 v16, v8, v13, v9 ; D2820010 04261B08 v_mad_f32 v18, v8, v14, v9 ; D2820012 04261D08 image_sample v17, 8, 0, 0, 0, 0, 0, 0, 0, v[15:16], s[16:23], s[12:15] ; F0800800 0064110F v_add_f32_e32 v19, 0, v15 ; 06261E80 v_add_f32_e32 v20, v8, v16 ; 06282108 v_add_f32_e32 v21, v8, v15 ; 062A1F08 v_add_f32_e32 v22, 0, v16 ; 062C2080 image_sample v15, 8, 0, 0, 0, 0, 0, 0, 0, v[19:20], s[16:23], s[12:15] ; F0800800 00640F13 image_sample v23, 8, 0, 0, 0, 0, 0, 0, 0, v[21:22], s[16:23], s[12:15] ; F0800800 00641715 v_mov_b32_e32 v22, v20 ; 7E2C0314 image_sample v19, 8, 0, 0, 0, 0, 0, 0, 0, v[21:22], s[16:23], s[12:15] ; F0800800 00641315 v_add_f32_e32 v8, v8, v18 ; 06102508 v_mov_b32_e32 v16, v18 ; 7E200312 v_mov_b32_e32 v24, v18 ; 7E300312 v_mov_b32_e32 v20, v18 ; 7E280312 s_waitcnt vmcnt(3) ; BF8C0773 v_mov_b32_e32 v21, v17 ; 7E2A0311 v_mov_b32_e32 v22, v18 ; 7E2C0312 image_sample v[25:27], 7, 0, 0, 0, 0, 0, 0, 0, v[17:18], s[16:23], s[12:15] ; F0800700 00641911 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v9, 4.0, v25, -1.0 ; D2820009 03CE32F6 v_mad_f32 v17, 4.0, v26, -1.0 ; D2820011 03CE34F6 v_mad_f32 v18, 4.0, v27, -1.0 ; D2820012 03CE36F6 v_mov_b32_e32 v22, v8 ; 7E2C0308 image_sample v[25:27], 7, 0, 0, 0, 0, 0, 0, 0, v[21:22], s[16:23], s[12:15] ; F0800700 00641915 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v21, 4.0, v25, -1.0 ; D2820015 03CE32F6 v_mad_f32 v22, 4.0, v26, -1.0 ; D2820016 03CE34F6 v_mad_f32 v25, 4.0, v27, -1.0 ; D2820019 03CE36F6 image_sample v[26:28], 7, 0, 0, 0, 0, 0, 0, 0, v[15:16], s[16:23], s[12:15] ; F0800700 00641A0F s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v26, 4.0, v26, -1.0 ; D282001A 03CE34F6 v_mad_f32 v27, 4.0, v27, -1.0 ; D282001B 03CE36F6 v_mad_f32 v28, 4.0, v28, -1.0 ; D282001C 03CE38F6 v_mov_b32_e32 v16, v8 ; 7E200308 image_sample v[29:31], 7, 0, 0, 0, 0, 0, 0, 0, v[15:16], s[16:23], s[12:15] ; F0800700 00641D0F s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v15, 4.0, v29, -1.0 ; D282000F 03CE3AF6 v_mad_f32 v16, 4.0, v30, -1.0 ; D2820010 03CE3CF6 v_mad_f32 v29, 4.0, v31, -1.0 ; D282001D 03CE3EF6 image_sample v[30:32], 7, 0, 0, 0, 0, 0, 0, 0, v[23:24], s[16:23], s[12:15] ; F0800700 00641E17 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v30, 4.0, v30, -1.0 ; D282001E 03CE3CF6 v_mad_f32 v31, 4.0, v31, -1.0 ; D282001F 03CE3EF6 v_mad_f32 v32, 4.0, v32, -1.0 ; D2820020 03CE40F6 v_mov_b32_e32 v24, v8 ; 7E300308 image_sample v[33:35], 7, 0, 0, 0, 0, 0, 0, 0, v[23:24], s[16:23], s[12:15] ; F0800700 00642117 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v23, 4.0, v33, -1.0 ; D2820017 03CE42F6 v_mad_f32 v24, 4.0, v34, -1.0 ; D2820018 03CE44F6 v_mad_f32 v33, 4.0, v35, -1.0 ; D2820021 03CE46F6 image_sample v[34:36], 7, 0, 0, 0, 0, 0, 0, 0, v[19:20], s[16:23], s[12:15] ; F0800700 00642213 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v34, 4.0, v34, -1.0 ; D2820022 03CE44F6 v_mad_f32 v35, 4.0, v35, -1.0 ; D2820023 03CE46F6 v_mad_f32 v36, 4.0, v36, -1.0 ; D2820024 03CE48F6 v_mov_b32_e32 v20, v8 ; 7E280308 image_sample v[37:39], 7, 0, 0, 0, 0, 0, 0, 0, v[19:20], s[16:23], s[12:15] ; F0800700 00642513 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v8, 4.0, v37, -1.0 ; D2820008 03CE4AF6 v_mad_f32 v19, 4.0, v38, -1.0 ; D2820013 03CE4CF6 v_mad_f32 v20, 4.0, v39, -1.0 ; D2820014 03CE4EF6 v_mad_f32 v6, v6, s9, -v12 ; D2820006 84301306 v_mad_f32 v7, v7, s9, -v13 ; D2820007 84341307 v_mul_f32_e32 v12, v6, v26 ; 10183506 v_add_f32_e32 v13, -1.0, v7 ; 061A0EF3 v_mac_f32_e32 v12, v13, v27 ; 3E18370D v_mul_f32_e32 v15, v6, v15 ; 101E1F06 v_mac_f32_e32 v15, v13, v16 ; 3E1E210D v_add_f32_e32 v16, -1.0, v6 ; 06200CF3 v_mul_f32_e32 v26, v16, v34 ; 10344510 v_mac_f32_e32 v26, v13, v35 ; 3E34470D v_mul_f32_e32 v8, v16, v8 ; 10101110 v_mac_f32_e32 v8, v13, v19 ; 3E10270D v_mad_f32 v1, v1, s9, -v14 ; D2820001 84381301 v_mul_f32_e32 v13, v6, v21 ; 101A2B06 v_mac_f32_e32 v13, v7, v22 ; 3E1A2D07 v_add_f32_e32 v14, -1.0, v1 ; 061C02F3 v_mac_f32_e32 v13, v14, v25 ; 3E1A330E v_mac_f32_e32 v15, v14, v29 ; 3E1E3B0E v_mul_f32_e32 v19, v16, v23 ; 10262F10 v_mac_f32_e32 v19, v7, v24 ; 3E263107 v_mac_f32_e32 v19, v14, v33 ; 3E26430E v_mac_f32_e32 v8, v14, v20 ; 3E10290E v_mul_f32_e32 v9, v6, v9 ; 10121306 v_mac_f32_e32 v9, v7, v17 ; 3E122307 v_mac_f32_e32 v9, v1, v18 ; 3E122501 v_mac_f32_e32 v12, v1, v28 ; 3E183901 v_mul_f32_e32 v14, v16, v30 ; 101C3D10 v_mac_f32_e32 v14, v7, v31 ; 3E1C3F07 v_mac_f32_e32 v14, v1, v32 ; 3E1C4101 v_mac_f32_e32 v26, v1, v36 ; 3E344901 v_sub_f32_e32 v16, 1.0, v6 ; 08200CF2 v_mul_f32_e32 v9, v9, v16 ; 10122109 v_mac_f32_e32 v9, v14, v6 ; 3E120D0E v_mul_f32_e32 v13, v13, v16 ; 101A210D v_mac_f32_e32 v13, v19, v6 ; 3E1A0D13 v_mul_f32_e32 v12, v12, v16 ; 1018210C v_mac_f32_e32 v12, v26, v6 ; 3E180D1A v_mul_f32_e32 v14, v15, v16 ; 101C210F v_mac_f32_e32 v14, v8, v6 ; 3E1C0D08 v_sub_f32_e32 v6, 1.0, v7 ; 080C0EF2 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 v_mul_f32_e32 v8, v9, v6 ; 10100D09 v_mac_f32_e32 v8, v12, v7 ; 3E100F0C v_mul_f32_e32 v6, v13, v6 ; 100C0D0D s_buffer_load_dword s6, s[0:3], 0x1 ; C2030101 s_buffer_load_dword s7, s[0:3], 0x2 ; C2038102 v_mac_f32_e32 v6, v14, v7 ; 3E0C0F0E v_sub_f32_e32 v7, 1.0, v1 ; 080E02F2 v_mul_f32_e32 v7, v8, v7 ; 100E0F08 v_mac_f32_e32 v7, v6, v1 ; 3E0E0306 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v1, s4, v5 ; 0A020A04 s_buffer_load_dword s4, s[0:3], 0x24 ; C2020124 s_buffer_load_dword s9, s[0:3], 0x25 ; C2048125 v_rcp_f32_e32 v5, s5 ; 7E0A5405 v_subrev_f32_e32 v4, s6, v4 ; 0A080806 v_subrev_f32_e32 v0, s7, v0 ; 0A000007 v_mul_f32_e32 v6, v1, v1 ; 100C0301 v_mac_f32_e32 v6, v4, v4 ; 3E0C0904 v_mac_f32_e32 v6, v0, v0 ; 3E0C0100 s_buffer_load_dword s5, s[0:3], 0x26 ; C2028126 v_rsq_clamp_f32_e32 v6, v6 ; 7E0C5906 v_mad_f32 v5, |v7|, v5, v11 ; D2820105 042E0B07 s_buffer_load_dword s6, s[0:3], 0x28 ; C2030128 s_buffer_load_dword s7, s[0:3], 0x2c ; C203812C v_mul_f32_e32 v1, v6, v1 ; 10020306 v_mul_f32_e32 v4, v6, v4 ; 10080906 v_mul_f32_e32 v0, v6, v0 ; 10000106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e64 v6, s4, s4 ; D2100006 00000804 v_mac_f32_e64 v6, s9, s9 ; D23E0006 00001209 v_mac_f32_e64 v6, s5, s5 ; D23E0006 00000A05 v_rsq_clamp_f32_e32 v6, v6 ; 7E0C5906 v_mul_f32_e32 v1, v10, v1 ; 1002030A v_mul_f32_e32 v4, v10, v4 ; 1008090A v_mul_f32_e32 v0, v10, v0 ; 1000010A v_mul_f32_e32 v7, s4, v6 ; 100E0C04 v_mul_f32_e32 v8, s9, v6 ; 10100C09 v_mul_f32_e32 v6, s5, v6 ; 100C0C05 v_mul_f32_e32 v1, v7, v1 ; 10020307 s_buffer_load_dword s4, s[0:3], 0x1c ; C202011C s_buffer_load_dword s5, s[0:3], 0x1d ; C202811D s_buffer_load_dword s9, s[0:3], 0x1e ; C204811E v_mac_f32_e32 v1, v8, v4 ; 3E020908 v_mac_f32_e32 v1, v6, v0 ; 3E020106 v_mad_f32 v0, 0.5, v1, 0.5 ; D2820000 03C202F0 s_buffer_load_dword s10, s[0:3], 0x14 ; C2050114 v_mul_f32_e32 v0, v0, v0 ; 10000100 v_mul_f32_e32 v0, v0, v0 ; 10000100 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_sub_f32_e32 v1, 1.0, v0 ; 080200F2 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v1 ; 10080204 s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_buffer_load_dword s11, s[0:3], 0x16 ; C2058116 s_buffer_load_dword s12, s[0:3], 0x18 ; C2060118 s_buffer_load_dword s13, s[0:3], 0x20 ; C2068120 v_mac_f32_e32 v4, s10, v0 ; 3E08000A s_buffer_load_dword s10, s[0:3], 0x21 ; C2050121 s_buffer_load_dword s14, s[0:3], 0x19 ; C2070119 s_buffer_load_dword s15, s[0:3], 0x22 ; C2078122 s_buffer_load_dword s16, s[0:3], 0x1a ; C208011A v_mul_f32_e32 v6, s5, v1 ; 100C0205 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v6, s4, v0 ; 3E0C0004 v_mul_f32_e32 v7, s9, v1 ; 100E0209 v_mac_f32_e32 v7, s11, v0 ; 3E0E000B v_mul_f32_e32 v8, s13, v1 ; 1010020D v_mac_f32_e32 v8, s12, v0 ; 3E10000C v_mul_f32_e32 v9, s10, v1 ; 1012020A v_mac_f32_e32 v9, s14, v0 ; 3E12000E v_mul_f32_e32 v1, s15, v1 ; 1002020F v_mac_f32_e32 v1, s16, v0 ; 3E020010 v_subrev_f32_e32 v0, s8, v5 ; 0A000A08 v_mov_b32_e32 v5, 0xbe4ccccd ; 7E0A02FF BE4CCCCD v_add_f32_e32 v5, v0, v5 ; 060A0B00 v_sub_f32_e64 v5, 1.0, |v5| ; D2080205 00020AF2 v_add_f32_e64 v5, 0, v5 clamp ; D2060805 00020A80 v_mul_f32_e32 v5, v5, v5 ; 100A0B05 v_max_f32_e32 v5, 0, v5 ; 200A0A80 v_mac_f32_e32 v8, v5, v4 ; 3E100905 v_mac_f32_e32 v9, v5, v6 ; 3E120D05 v_mac_f32_e32 v1, v5, v7 ; 3E020F05 v_log_f32_e32 v4, s6 ; 7E084E06 v_mul_f32_e32 v5, 0x3e991687, v8 ; 100A10FF 3E991687 v_madmk_f32_e32 v5, v9, v5, 0x3f1645a2 ; 400A0B09 3F1645A2 v_madmk_f32_e32 v5, v1, v5, 0x3de978d5 ; 400A0B01 3DE978D5 v_mul_legacy_f32_e32 v4, 0x3fc00000, v4 ; 0E0808FF 3FC00000 v_exp_f32_e32 v4, v4 ; 7E084B04 v_sub_f32_e32 v6, 1.0, v4 ; 080C08F2 v_mul_f32_e32 v5, v5, v6 ; 100A0D05 s_buffer_load_dword s0, s[0:3], 0x10 ; C2000110 v_mad_f32 v6, v8, v4, v5 ; D2820006 04160908 v_mad_f32 v7, v9, v4, v5 ; D2820007 04160909 v_mac_f32_e32 v5, v1, v4 ; 3E0A0901 v_add_f32_e32 v1, -0.5, v2 ; 060204F1 v_add_f32_e32 v2, -0.5, v3 ; 060406F1 v_rcp_f32_e32 v3, s7 ; 7E065407 v_mul_f32_e32 v2, v2, v2 ; 10040502 v_mac_f32_e32 v2, v1, v1 ; 3E040301 v_sqrt_f32_e32 v1, v2 ; 7E026702 v_mad_f32 v1, -v1, v3, 1.0 ; D2820001 23CA0701 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s0, v0 ; 10000000 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_mul_f32_e32 v0, v1, v0 ; 10000101 v_mul_f32_e32 v1, s6, v6 ; 10020C06 v_mul_f32_e32 v2, s6, v7 ; 10040E06 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_mul_f32_e32 v2, s6, v5 ; 10040A06 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_cvt_pkrtz_f16_f32_e32 v0, v2, v0 ; 5E000102 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 68 Code Size: 5196 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL CONST[0..12] DCL TEMP[0..1], LOCAL 0: MUL TEMP[0].xyz, CONST[1].xyzz, IN[1].xxxx 1: MAD TEMP[0].xyz, CONST[2].xyzz, IN[1].yyyy, TEMP[0].xyzz 2: MAD TEMP[0].xyz, CONST[3].xyzz, IN[1].zzzz, TEMP[0].xyzz 3: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[0].xyzz 4: RSQ TEMP[1].x, TEMP[1].xxxx 5: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xxxx 6: MUL TEMP[1], CONST[9], IN[0].xxxx 7: MAD TEMP[1], CONST[10], IN[0].yyyy, TEMP[1] 8: MAD TEMP[1], CONST[11], IN[0].zzzz, TEMP[1] 9: MAD TEMP[1].z, CONST[12], IN[0].wwww, TEMP[1] 10: MUL TEMP[1].x, TEMP[1].zzzz, CONST[0].wwww 11: MOV TEMP[0].w, -TEMP[1].xxxx 12: MUL TEMP[1], CONST[5], IN[0].xxxx 13: MAD TEMP[1], CONST[6], IN[0].yyyy, TEMP[1] 14: MAD TEMP[1], CONST[7], IN[0].zzzz, TEMP[1] 15: MAD TEMP[1], CONST[8], IN[0].wwww, TEMP[1] 16: MOV OUT[1], TEMP[0] 17: MOV OUT[0], TEMP[1] 18: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %43 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0 %45 = add i32 %5, %7 %46 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %45) %47 = extractelement <4 x float> %46, i32 0 %48 = extractelement <4 x float> %46, i32 1 %49 = extractelement <4 x float> %46, i32 2 %50 = extractelement <4 x float> %46, i32 3 %51 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %52 = load <16 x i8>, <16 x i8> addrspace(2)* %51, align 16, !tbaa !0 %53 = add i32 %5, %7 %54 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %52, i32 0, i32 %53) %55 = extractelement <4 x float> %54, i32 0 %56 = extractelement <4 x float> %54, i32 1 %57 = extractelement <4 x float> %54, i32 2 %58 = fmul float %14, %55 %59 = fmul float %15, %55 %60 = fmul float %16, %55 %61 = fmul float %17, %56 %62 = fadd float %61, %58 %63 = fmul float %18, %56 %64 = fadd float %63, %59 %65 = fmul float %19, %56 %66 = fadd float %65, %60 %67 = fmul float %20, %57 %68 = fadd float %67, %62 %69 = fmul float %21, %57 %70 = fadd float %69, %64 %71 = fmul float %22, %57 %72 = fadd float %71, %66 %73 = fmul float %68, %68 %74 = fmul float %70, %70 %75 = fadd float %74, %73 %76 = fmul float %72, %72 %77 = fadd float %75, %76 %78 = call float @llvm.AMDGPU.rsq.clamped.f32(float %77) %79 = fmul float %68, %78 %80 = fmul float %70, %78 %81 = fmul float %72, %78 %82 = fmul float %39, %47 %83 = fmul float %40, %48 %84 = fadd float %83, %82 %85 = fmul float %41, %49 %86 = fadd float %85, %84 %87 = fmul float %42, %50 %88 = fadd float %87, %86 %89 = fmul float %88, %13 %90 = fsub float -0.000000e+00, %89 %91 = fmul float %23, %47 %92 = fmul float %24, %47 %93 = fmul float %25, %47 %94 = fmul float %26, %47 %95 = fmul float %27, %48 %96 = fadd float %95, %91 %97 = fmul float %28, %48 %98 = fadd float %97, %92 %99 = fmul float %29, %48 %100 = fadd float %99, %93 %101 = fmul float %30, %48 %102 = fadd float %101, %94 %103 = fmul float %31, %49 %104 = fadd float %103, %96 %105 = fmul float %32, %49 %106 = fadd float %105, %98 %107 = fmul float %33, %49 %108 = fadd float %107, %100 %109 = fmul float %34, %49 %110 = fadd float %109, %102 %111 = fmul float %35, %50 %112 = fadd float %111, %104 %113 = fmul float %36, %50 %114 = fadd float %113, %106 %115 = fmul float %37, %50 %116 = fadd float %115, %108 %117 = fmul float %38, %50 %118 = fadd float %117, %110 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %79, float %80, float %81, float %90) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %112, float %114, float %116, float %118) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_buffer_load_dword s5, s[0:3], 0x8 ; C2028108 s_buffer_load_dword s6, s[0:3], 0x5 ; C2030105 s_buffer_load_dword s7, s[0:3], 0x9 ; C2038109 s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106 s_buffer_load_dword s9, s[0:3], 0xa ; C204810A s_buffer_load_dword s10, s[0:3], 0xc ; C205010C s_buffer_load_dword s11, s[0:3], 0xd ; C205810D s_buffer_load_dword s12, s[0:3], 0xe ; C206010E s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s4, v5 ; 10000A04 v_mac_f32_e32 v0, s5, v6 ; 3E000C05 v_mul_f32_e32 v8, s6, v5 ; 10100A06 v_mac_f32_e32 v8, s7, v6 ; 3E100C07 v_mul_f32_e32 v5, s8, v5 ; 100A0A08 v_mac_f32_e32 v5, s9, v6 ; 3E0A0C09 v_mac_f32_e32 v0, s10, v7 ; 3E000E0A s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 v_mac_f32_e32 v8, s11, v7 ; 3E100E0B v_mac_f32_e32 v5, s12, v7 ; 3E0A0E0C s_buffer_load_dword s5, s[0:3], 0x14 ; C2028114 s_buffer_load_dword s6, s[0:3], 0x15 ; C2030115 s_buffer_load_dword s7, s[0:3], 0x16 ; C2038116 s_buffer_load_dword s8, s[0:3], 0x17 ; C2040117 s_buffer_load_dword s9, s[0:3], 0x18 ; C2048118 s_buffer_load_dword s10, s[0:3], 0x23 ; C2050123 s_buffer_load_dword s11, s[0:3], 0x26 ; C2058126 s_buffer_load_dword s12, s[0:3], 0x2a ; C206012A s_buffer_load_dword s13, s[0:3], 0x19 ; C2068119 s_buffer_load_dword s14, s[0:3], 0x1a ; C207011A s_buffer_load_dword s15, s[0:3], 0x1b ; C207811B s_buffer_load_dword s16, s[0:3], 0x1c ; C208011C s_buffer_load_dword s17, s[0:3], 0x1d ; C208811D s_buffer_load_dword s18, s[0:3], 0x1e ; C209011E s_buffer_load_dword s19, s[0:3], 0x1f ; C209811F s_buffer_load_dword s20, s[0:3], 0x20 ; C20A0120 s_buffer_load_dword s21, s[0:3], 0x21 ; C20A8121 s_buffer_load_dword s22, s[0:3], 0x22 ; C20B0122 s_buffer_load_dword s23, s[0:3], 0x2e ; C20B812E s_buffer_load_dword s0, s[0:3], 0x32 ; C2000132 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s11, v1 ; 100C020B v_mac_f32_e32 v6, s12, v2 ; 3E0C040C v_mul_f32_e32 v7, s5, v1 ; 100E0205 v_mac_f32_e32 v7, s9, v2 ; 3E0E0409 v_mul_f32_e32 v9, s6, v1 ; 10120206 v_mac_f32_e32 v9, s13, v2 ; 3E12040D v_mul_f32_e32 v10, s7, v1 ; 10140207 v_mac_f32_e32 v10, s14, v2 ; 3E14040E v_mul_f32_e32 v1, s8, v1 ; 10020208 v_mac_f32_e32 v1, s15, v2 ; 3E02040F v_mac_f32_e32 v6, s23, v3 ; 3E0C0617 v_mac_f32_e32 v7, s16, v3 ; 3E0E0610 v_mac_f32_e32 v9, s17, v3 ; 3E120611 v_mac_f32_e32 v10, s18, v3 ; 3E140612 v_mac_f32_e32 v1, s19, v3 ; 3E020613 v_mac_f32_e32 v6, s0, v4 ; 3E0C0800 v_mac_f32_e32 v7, s20, v4 ; 3E0E0814 v_mac_f32_e32 v9, s21, v4 ; 3E120815 v_mul_f32_e32 v2, v0, v0 ; 10040100 v_mac_f32_e32 v2, v8, v8 ; 3E041108 v_mac_f32_e32 v2, v5, v5 ; 3E040B05 v_rsq_clamp_f32_e32 v2, v2 ; 7E045902 v_mac_f32_e32 v10, s22, v4 ; 3E140816 v_mac_f32_e32 v1, s10, v4 ; 3E02080A v_mul_f32_e32 v3, s4, v6 ; 10060C04 v_mul_f32_e32 v0, v2, v0 ; 10000102 v_mul_f32_e32 v4, v2, v8 ; 10081102 v_mul_f32_e32 v2, v2, v5 ; 10040B02 v_xor_b32_e32 v3, 0x80000000, v3 ; 3A0606FF 80000000 exp 15, 32, 0, 0, 0, v0, v4, v2, v3 ; F800020F 03020400 exp 15, 12, 0, 1, 0, v7, v9, v10, v1 ; F80008CF 010A0907 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 12 Code Size: 344 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 0.2813, 1.0000, 0.5000, 255.0000} IMM[1] FLT32 { 0.0039, 0.0000, 0.0000, 0.0000} 0: ADD TEMP[0].x, IN[0].zzzz, IMM[0].yyyy 1: RCP TEMP[0].x, TEMP[0].xxxx 2: MUL TEMP[0].xy, IN[0].xyyy, TEMP[0].xxxx 3: MAD TEMP[0].xy, IMM[0].xxxx, TEMP[0].xyyy, IMM[0].zzzz 4: MUL TEMP[1].xy, IMM[0].ywww, IN[0].wwww 5: FRC TEMP[1].xy, TEMP[1].xyyy 6: MOV TEMP[2].y, TEMP[1].yyyy 7: MUL TEMP[3].x, TEMP[1].yyyy, IMM[1].xxxx 8: ADD TEMP[2].x, TEMP[1].xxxx, -TEMP[3].xxxx 9: MOV TEMP[0].zw, TEMP[2].yyxy 10: MOV OUT[0], TEMP[0] 11: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %23 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %24 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %25 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %26 = fadd float %24, 1.000000e+00 %27 = fdiv float 1.000000e+00, %26 %28 = fmul float %22, %27 %29 = fmul float %23, %27 %30 = fmul float %28, 0x3FD20033A0000000 %31 = fadd float %30, 5.000000e-01 %32 = fmul float %29, 0x3FD20033A0000000 %33 = fadd float %32, 5.000000e-01 %34 = fmul float %25, 2.550000e+02 %35 = call float @llvm.floor.f32(float %25) %36 = fsub float %25, %35 %37 = call float @llvm.floor.f32(float %34) %38 = fsub float %34, %37 %39 = fmul float %38, 0x3F70101020000000 %40 = fsub float %36, %39 %41 = call i32 @llvm.SI.packf16(float %31, float %33) %42 = bitcast i32 %41 to float %43 = call i32 @llvm.SI.packf16(float %40, float %38) %44 = bitcast i32 %43 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %42, float %44, float %42, float %44) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.floor.f32(float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_add_f32_e32 v4, 1.0, v4 ; 060808F2 v_rcp_f32_e32 v4, v4 ; 7E085504 v_interp_p1_f32 v0, v0, 3, 0, [m0] ; C8000300 v_interp_p2_f32 v0, [v0], v1, 3, 0, [m0] ; C8010301 v_mul_f32_e32 v1, v4, v2 ; 10020504 v_mul_f32_e32 v2, v4, v3 ; 10040704 v_mov_b32_e32 v3, 0x3e90019d ; 7E0602FF 3E90019D v_mad_f32 v1, v1, v3, 0.5 ; D2820001 03C20701 v_mad_f32 v2, v2, v3, 0.5 ; D2820002 03C20702 v_mov_b32_e32 v3, 0x437f0000 ; 7E0602FF 437F0000 v_mul_f32_e32 v4, v3, v0 ; 10080103 v_floor_f32_e32 v5, v0 ; 7E0A4900 v_subrev_f32_e32 v5, v5, v0 ; 0A0A0105 v_floor_f32_e32 v4, v4 ; 7E084904 v_mad_f32 v0, v0, v3, -v4 ; D2820000 84120700 v_madmk_f32_e32 v3, v0, v5, 0xbb808081 ; 40060B00 BB808081 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_cvt_pkrtz_f16_f32_e32 v0, v3, v0 ; 5E000103 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 8 Code Size: 136 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL CONST[0..5] DCL TEMP[0..2], LOCAL 0: MUL TEMP[0], CONST[2], IN[0].xxxx 1: MAD TEMP[0], CONST[3], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[4], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[5], IN[0].wwww, TEMP[0] 4: MAD TEMP[1].xy, IN[1].xyyy, CONST[1].xyyy, CONST[1].zwww 5: MUL TEMP[2].xy, IN[1].xyyy, CONST[0].xyyy 6: MOV TEMP[1].zw, TEMP[2].yyxy 7: MOV OUT[1], TEMP[1] 8: MOV OUT[0], TEMP[0] 9: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %35 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0 %37 = add i32 %5, %7 %38 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %36, i32 0, i32 %37) %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = extractelement <4 x float> %38, i32 2 %42 = extractelement <4 x float> %38, i32 3 %43 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0 %45 = add i32 %5, %7 %46 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %45) %47 = extractelement <4 x float> %46, i32 0 %48 = extractelement <4 x float> %46, i32 1 %49 = fmul float %19, %39 %50 = fmul float %20, %39 %51 = fmul float %21, %39 %52 = fmul float %22, %39 %53 = fmul float %23, %40 %54 = fadd float %53, %49 %55 = fmul float %24, %40 %56 = fadd float %55, %50 %57 = fmul float %25, %40 %58 = fadd float %57, %51 %59 = fmul float %26, %40 %60 = fadd float %59, %52 %61 = fmul float %27, %41 %62 = fadd float %61, %54 %63 = fmul float %28, %41 %64 = fadd float %63, %56 %65 = fmul float %29, %41 %66 = fadd float %65, %58 %67 = fmul float %30, %41 %68 = fadd float %67, %60 %69 = fmul float %31, %42 %70 = fadd float %69, %62 %71 = fmul float %32, %42 %72 = fadd float %71, %64 %73 = fmul float %33, %42 %74 = fadd float %73, %66 %75 = fmul float %34, %42 %76 = fadd float %75, %68 %77 = fmul float %47, %15 %78 = fadd float %77, %17 %79 = fmul float %48, %16 %80 = fadd float %79, %18 %81 = fmul float %47, %13 %82 = fmul float %48, %14 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %78, float %80, float %81, float %82) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %70, float %72, float %74, float %76) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[8:11], 0 idxen ; E00C2000 80020500 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104 s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105 s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106 s_buffer_load_dword s9, s[0:3], 0x7 ; C2048107 s_buffer_load_dword s10, s[0:3], 0x8 ; C2050108 s_buffer_load_dword s11, s[0:3], 0x9 ; C2058109 s_buffer_load_dword s12, s[0:3], 0xa ; C206010A s_buffer_load_dword s13, s[0:3], 0xb ; C206810B s_buffer_load_dword s14, s[0:3], 0xc ; C207010C s_buffer_load_dword s15, s[0:3], 0xd ; C207810D s_buffer_load_dword s16, s[0:3], 0xe ; C208010E s_buffer_load_dword s17, s[0:3], 0xf ; C208810F s_buffer_load_dword s18, s[0:3], 0x10 ; C2090110 s_buffer_load_dword s19, s[0:3], 0x11 ; C2098111 s_buffer_load_dword s20, s[0:3], 0x12 ; C20A0112 s_buffer_load_dword s21, s[0:3], 0x13 ; C20A8113 s_buffer_load_dword s22, s[0:3], 0x14 ; C20B0114 s_buffer_load_dword s23, s[0:3], 0x15 ; C20B8115 s_buffer_load_dword s24, s[0:3], 0x16 ; C20C0116 s_buffer_load_dword s0, s[0:3], 0x17 ; C2000117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s10, v1 ; 1000020A v_mac_f32_e32 v0, s14, v2 ; 3E00040E v_mul_f32_e32 v7, s11, v1 ; 100E020B v_mac_f32_e32 v7, s15, v2 ; 3E0E040F v_mul_f32_e32 v8, s12, v1 ; 1010020C v_mac_f32_e32 v8, s16, v2 ; 3E100410 v_mul_f32_e32 v1, s13, v1 ; 1002020D v_mac_f32_e32 v1, s17, v2 ; 3E020411 v_mac_f32_e32 v0, s18, v3 ; 3E000612 v_mac_f32_e32 v7, s19, v3 ; 3E0E0613 v_mac_f32_e32 v8, s20, v3 ; 3E100614 v_mac_f32_e32 v1, s21, v3 ; 3E020615 v_mac_f32_e32 v0, s22, v4 ; 3E000816 v_mac_f32_e32 v7, s23, v4 ; 3E0E0817 v_mac_f32_e32 v8, s24, v4 ; 3E100818 v_mac_f32_e32 v1, s0, v4 ; 3E020800 v_mov_b32_e32 v2, s8 ; 7E040208 v_mac_f32_e32 v2, s6, v5 ; 3E040A06 v_mov_b32_e32 v3, s9 ; 7E060209 v_mac_f32_e32 v3, s7, v6 ; 3E060C07 v_mul_f32_e32 v4, s4, v5 ; 10080A04 v_mul_f32_e32 v5, s5, v6 ; 100A0C05 exp 15, 32, 0, 0, 0, v2, v3, v4, v5 ; F800020F 05040302 exp 15, 12, 0, 1, 0, v0, v7, v8, v1 ; F80008CF 01080700 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 12 Code Size: 240 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL CONST[0] DCL CONST[3] DCL TEMP[0..6], LOCAL IMM[0] FLT32 { 2.0000, -1.0000, 3.5554, 0.0000} IMM[1] FLT32 { -1.7777, 1.0000, 0.0039, 0.3000} IMM[2] FLT32 { 0.4010, 0.8899, -0.0175, 0.0714} IMM[3] FLT32 { 0.1618, 0.1339, -0.3530, -0.6984} IMM[4] FLT32 { -0.2305, -0.1900, 0.5025, -0.6984} IMM[5] FLT32 { -0.6257, 0.1242, 0.1164, -0.6984} IMM[6] FLT32 { 0.3821, -0.3241, 0.4113, -0.6984} IMM[7] FLT32 { -0.0883, 0.1650, 0.1396, -0.6984} IMM[8] FLT32 { 0.1892, -0.1284, -0.0987, -0.6984} IMM[9] FLT32 { 0.1986, 0.1767, 0.4380, -0.6984} IMM[10] FLT32 { -0.3295, 0.0268, -0.4022, -0.6984} IMM[11] FLT32 { -0.0196, -0.3108, -0.4107, -0.6984} IMM[12] FLT32 { -0.3215, 0.6832, -0.3433, -0.6984} IMM[13] FLT32 { 0.7026, 0.1648, 0.0225, -0.6984} IMM[14] FLT32 { 0.0370, -0.9391, 0.1359, -0.6984} IMM[15] FLT32 { -0.6984, -0.6003, -0.0402, 0.0000} 0: MOV TEMP[0].xy, IN[0].zwww 1: TEX TEMP[0].xyz, TEMP[0], SAMP[1], 2D 2: MAD TEMP[0].xyz, TEMP[0].xyzz, IMM[0].xxxx, IMM[0].yyyy 3: MOV TEMP[1].xy, IN[0].xyyy 4: TEX TEMP[1], TEMP[1], SAMP[0], 2D 5: MAD TEMP[2].xyz, TEMP[1].xyzz, IMM[0].zzww, IMM[1].xxyy 6: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz 7: RCP TEMP[3].x, TEMP[3].xxxx 8: MUL TEMP[3].x, IMM[0].xxxx, TEMP[3].xxxx 9: MUL TEMP[2].xy, TEMP[3].xxxx, TEMP[2].xyyy 10: ADD TEMP[3].x, TEMP[3].xxxx, IMM[0].yyyy 11: MOV TEMP[2].z, TEMP[3].xxxx 12: DP2 TEMP[1].x, TEMP[1].zwww, IMM[1].yzzz 13: MUL TEMP[1].x, TEMP[1].xxxx, CONST[0].zzzz 14: RCP TEMP[3].x, TEMP[1].xxxx 15: MUL TEMP[3].x, CONST[3].xxxx, TEMP[3].xxxx 16: MOV TEMP[4].x, IMM[0].wwww 17: DP3 TEMP[5].x, TEMP[0].xyzz, IMM[2].xyzz 18: MUL TEMP[5].xyz, TEMP[5].xxxx, TEMP[0].xyzz 19: MUL TEMP[5].xyz, IMM[0].xxxx, TEMP[5].xyzz 20: ADD TEMP[5].xyz, IMM[2].xyzz, -TEMP[5].xyzz 21: DP3 TEMP[6].x, TEMP[2].xyzz, TEMP[5].xyzz 22: FSLT TEMP[6].x, TEMP[6].xxxx, IMM[0].wwww 23: UIF TEMP[6].xxxx :0 24: MOV TEMP[6].x, IMM[1].yyyy 25: ELSE :0 26: MOV TEMP[6].x, IMM[0].yyyy 27: ENDIF 28: MUL TEMP[5].xyz, TEMP[5].xyzz, -TEMP[6].xxxx 29: MAD TEMP[5].xyz, TEMP[2].xyzz, IMM[1].wwww, TEMP[5].xyzz 30: MUL TEMP[6].x, TEMP[5].zzzz, CONST[3].xxxx 31: ADD TEMP[6].x, TEMP[1].xxxx, -TEMP[6].xxxx 32: MAD TEMP[5].xy, TEMP[5].xyyy, TEMP[3].xxxx, IN[0].xyyy 33: MOV TEMP[5].xy, TEMP[5].xyyy 34: TEX TEMP[5].zw, TEMP[5], SAMP[0], 2D 35: DP2 TEMP[5].x, TEMP[5].zwww, IMM[1].yzzz 36: MUL TEMP[5].x, TEMP[5].xxxx, CONST[0].zzzz 37: ADD TEMP[5].x, TEMP[6].xxxx, -TEMP[5].xxxx 38: MOV_SAT TEMP[5].x, TEMP[5].xxxx 39: FSLT TEMP[6].x, CONST[3].yyyy, TEMP[5].xxxx 40: UIF TEMP[6].xxxx :0 41: ADD TEMP[5].x, IMM[1].yyyy, -TEMP[5].xxxx 42: POW TEMP[4].x, TEMP[5].xxxx, CONST[3].zzzz 43: ENDIF 44: DP3 TEMP[5].x, TEMP[0].xyzz, IMM[3].xyzz 45: MUL TEMP[5].xyz, TEMP[5].xxxx, TEMP[0].xyzz 46: MUL TEMP[5].xyz, IMM[0].xxxx, TEMP[5].xyzz 47: ADD TEMP[5].xyz, IMM[3].xyzz, -TEMP[5].xyzz 48: DP3 TEMP[6].x, TEMP[2].xyzz, TEMP[5].xyzz 49: FSLT TEMP[6].x, TEMP[6].xxxx, IMM[0].wwww 50: UIF TEMP[6].xxxx :0 51: MOV TEMP[6].x, IMM[1].yyyy 52: ELSE :0 53: MOV TEMP[6].x, IMM[0].yyyy 54: ENDIF 55: MUL TEMP[5].xyz, TEMP[5].xyzz, -TEMP[6].xxxx 56: MAD TEMP[5].xyz, TEMP[2].xyzz, IMM[1].wwww, TEMP[5].xyzz 57: MUL TEMP[6].x, TEMP[5].zzzz, CONST[3].xxxx 58: ADD TEMP[6].x, TEMP[1].xxxx, -TEMP[6].xxxx 59: MAD TEMP[5].xy, TEMP[5].xyyy, TEMP[3].xxxx, IN[0].xyyy 60: MOV TEMP[5].xy, TEMP[5].xyyy 61: TEX TEMP[5].zw, TEMP[5], SAMP[0], 2D 62: DP2 TEMP[5].x, TEMP[5].zwww, IMM[1].yzzz 63: MUL TEMP[5].x, TEMP[5].xxxx, CONST[0].zzzz 64: ADD TEMP[5].x, TEMP[6].xxxx, -TEMP[5].xxxx 65: MOV_SAT TEMP[5].x, TEMP[5].xxxx 66: FSLT TEMP[6].x, CONST[3].yyyy, TEMP[5].xxxx 67: UIF TEMP[6].xxxx :0 68: ADD TEMP[5].x, IMM[1].yyyy, -TEMP[5].xxxx 69: POW TEMP[5].x, TEMP[5].xxxx, CONST[3].zzzz 70: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx 71: ENDIF 72: DP3 TEMP[5].x, TEMP[0].xyzz, IMM[4].xyzz 73: MUL TEMP[5].xyz, TEMP[5].xxxx, TEMP[0].xyzz 74: MUL TEMP[5].xyz, IMM[0].xxxx, TEMP[5].xyzz 75: ADD TEMP[5].xyz, IMM[4].xyzz, -TEMP[5].xyzz 76: DP3 TEMP[6].x, TEMP[2].xyzz, TEMP[5].xyzz 77: FSLT TEMP[6].x, TEMP[6].xxxx, IMM[0].wwww 78: UIF TEMP[6].xxxx :0 79: MOV TEMP[6].x, IMM[1].yyyy 80: ELSE :0 81: MOV TEMP[6].x, IMM[0].yyyy 82: ENDIF 83: MUL TEMP[5].xyz, TEMP[5].xyzz, -TEMP[6].xxxx 84: MAD TEMP[5].xyz, TEMP[2].xyzz, IMM[1].wwww, TEMP[5].xyzz 85: MUL TEMP[6].x, TEMP[5].zzzz, CONST[3].xxxx 86: ADD TEMP[6].x, TEMP[1].xxxx, -TEMP[6].xxxx 87: MAD TEMP[5].xy, TEMP[5].xyyy, TEMP[3].xxxx, IN[0].xyyy 88: MOV TEMP[5].xy, TEMP[5].xyyy 89: TEX TEMP[5].zw, TEMP[5], SAMP[0], 2D 90: DP2 TEMP[5].x, TEMP[5].zwww, IMM[1].yzzz 91: MUL TEMP[5].x, TEMP[5].xxxx, CONST[0].zzzz 92: ADD TEMP[5].x, TEMP[6].xxxx, -TEMP[5].xxxx 93: MOV_SAT TEMP[5].x, TEMP[5].xxxx 94: FSLT TEMP[6].x, CONST[3].yyyy, TEMP[5].xxxx 95: UIF TEMP[6].xxxx :0 96: ADD TEMP[5].x, IMM[1].yyyy, -TEMP[5].xxxx 97: POW TEMP[5].x, TEMP[5].xxxx, CONST[3].zzzz 98: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx 99: ENDIF 100: DP3 TEMP[5].x, TEMP[0].xyzz, IMM[5].xyzz 101: MUL TEMP[5].xyz, TEMP[5].xxxx, TEMP[0].xyzz 102: MUL TEMP[5].xyz, IMM[0].xxxx, TEMP[5].xyzz 103: ADD TEMP[5].xyz, IMM[5].xyzz, -TEMP[5].xyzz 104: DP3 TEMP[6].x, TEMP[2].xyzz, TEMP[5].xyzz 105: FSLT TEMP[6].x, TEMP[6].xxxx, IMM[0].wwww 106: UIF TEMP[6].xxxx :0 107: MOV TEMP[6].x, IMM[1].yyyy 108: ELSE :0 109: MOV TEMP[6].x, IMM[0].yyyy 110: ENDIF 111: MUL TEMP[5].xyz, TEMP[5].xyzz, -TEMP[6].xxxx 112: MAD TEMP[5].xyz, TEMP[2].xyzz, IMM[1].wwww, TEMP[5].xyzz 113: MUL TEMP[6].x, TEMP[5].zzzz, CONST[3].xxxx 114: ADD TEMP[6].x, TEMP[1].xxxx, -TEMP[6].xxxx 115: MAD TEMP[5].xy, TEMP[5].xyyy, TEMP[3].xxxx, IN[0].xyyy 116: MOV TEMP[5].xy, TEMP[5].xyyy 117: TEX TEMP[5].zw, TEMP[5], SAMP[0], 2D 118: DP2 TEMP[5].x, TEMP[5].zwww, IMM[1].yzzz 119: MUL TEMP[5].x, TEMP[5].xxxx, CONST[0].zzzz 120: ADD TEMP[5].x, TEMP[6].xxxx, -TEMP[5].xxxx 121: MOV_SAT TEMP[5].x, TEMP[5].xxxx 122: FSLT TEMP[6].x, CONST[3].yyyy, TEMP[5].xxxx 123: UIF TEMP[6].xxxx :0 124: ADD TEMP[5].x, IMM[1].yyyy, -TEMP[5].xxxx 125: POW TEMP[5].x, TEMP[5].xxxx, CONST[3].zzzz 126: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx 127: ENDIF 128: DP3 TEMP[5].x, TEMP[0].xyzz, IMM[6].xyzz 129: MUL TEMP[5].xyz, TEMP[5].xxxx, TEMP[0].xyzz 130: MUL TEMP[5].xyz, IMM[0].xxxx, TEMP[5].xyzz 131: ADD TEMP[5].xyz, IMM[6].xyzz, -TEMP[5].xyzz 132: DP3 TEMP[6].x, TEMP[2].xyzz, TEMP[5].xyzz 133: FSLT TEMP[6].x, TEMP[6].xxxx, IMM[0].wwww 134: UIF TEMP[6].xxxx :0 135: MOV TEMP[6].x, IMM[1].yyyy 136: ELSE :0 137: MOV TEMP[6].x, IMM[0].yyyy 138: ENDIF 139: MUL TEMP[5].xyz, TEMP[5].xyzz, -TEMP[6].xxxx 140: MAD TEMP[5].xyz, TEMP[2].xyzz, IMM[1].wwww, TEMP[5].xyzz 141: MUL TEMP[6].x, TEMP[5].zzzz, CONST[3].xxxx 142: ADD TEMP[6].x, TEMP[1].xxxx, -TEMP[6].xxxx 143: MAD TEMP[5].xy, TEMP[5].xyyy, TEMP[3].xxxx, IN[0].xyyy 144: MOV TEMP[5].xy, TEMP[5].xyyy 145: TEX TEMP[5].zw, TEMP[5], SAMP[0], 2D 146: DP2 TEMP[5].x, TEMP[5].zwww, IMM[1].yzzz 147: MUL TEMP[5].x, TEMP[5].xxxx, CONST[0].zzzz 148: ADD TEMP[5].x, TEMP[6].xxxx, -TEMP[5].xxxx 149: MOV_SAT TEMP[5].x, TEMP[5].xxxx 150: FSLT TEMP[6].x, CONST[3].yyyy, TEMP[5].xxxx 151: UIF TEMP[6].xxxx :0 152: ADD TEMP[5].x, IMM[1].yyyy, -TEMP[5].xxxx 153: POW TEMP[5].x, TEMP[5].xxxx, CONST[3].zzzz 154: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx 155: ENDIF 156: DP3 TEMP[5].x, TEMP[0].xyzz, IMM[7].xyzz 157: MUL TEMP[5].xyz, TEMP[5].xxxx, TEMP[0].xyzz 158: MUL TEMP[5].xyz, IMM[0].xxxx, TEMP[5].xyzz 159: ADD TEMP[5].xyz, IMM[7].xyzz, -TEMP[5].xyzz 160: DP3 TEMP[6].x, TEMP[2].xyzz, TEMP[5].xyzz 161: FSLT TEMP[6].x, TEMP[6].xxxx, IMM[0].wwww 162: UIF TEMP[6].xxxx :0 163: MOV TEMP[6].x, IMM[1].yyyy 164: ELSE :0 165: MOV TEMP[6].x, IMM[0].yyyy 166: ENDIF 167: MUL TEMP[5].xyz, TEMP[5].xyzz, -TEMP[6].xxxx 168: MAD TEMP[5].xyz, TEMP[2].xyzz, IMM[1].wwww, TEMP[5].xyzz 169: MUL TEMP[6].x, TEMP[5].zzzz, CONST[3].xxxx 170: ADD TEMP[6].x, TEMP[1].xxxx, -TEMP[6].xxxx 171: MAD TEMP[5].xy, TEMP[5].xyyy, TEMP[3].xxxx, IN[0].xyyy 172: MOV TEMP[5].xy, TEMP[5].xyyy 173: TEX TEMP[5].zw, TEMP[5], SAMP[0], 2D 174: DP2 TEMP[5].x, TEMP[5].zwww, IMM[1].yzzz 175: MUL TEMP[5].x, TEMP[5].xxxx, CONST[0].zzzz 176: ADD TEMP[5].x, TEMP[6].xxxx, -TEMP[5].xxxx 177: MOV_SAT TEMP[5].x, TEMP[5].xxxx 178: FSLT TEMP[6].x, CONST[3].yyyy, TEMP[5].xxxx 179: UIF TEMP[6].xxxx :0 180: ADD TEMP[5].x, IMM[1].yyyy, -TEMP[5].xxxx 181: POW TEMP[5].x, TEMP[5].xxxx, CONST[3].zzzz 182: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx 183: ENDIF 184: DP3 TEMP[5].x, TEMP[0].xyzz, IMM[8].xyzz 185: MUL TEMP[5].xyz, TEMP[5].xxxx, TEMP[0].xyzz 186: MUL TEMP[5].xyz, IMM[0].xxxx, TEMP[5].xyzz 187: ADD TEMP[5].xyz, IMM[8].xyzz, -TEMP[5].xyzz 188: DP3 TEMP[6].x, TEMP[2].xyzz, TEMP[5].xyzz 189: FSLT TEMP[6].x, TEMP[6].xxxx, IMM[0].wwww 190: UIF TEMP[6].xxxx :0 191: MOV TEMP[6].x, IMM[1].yyyy 192: ELSE :0 193: MOV TEMP[6].x, IMM[0].yyyy 194: ENDIF 195: MUL TEMP[5].xyz, TEMP[5].xyzz, -TEMP[6].xxxx 196: MAD TEMP[5].xyz, TEMP[2].xyzz, IMM[1].wwww, TEMP[5].xyzz 197: MUL TEMP[6].x, TEMP[5].zzzz, CONST[3].xxxx 198: ADD TEMP[6].x, TEMP[1].xxxx, -TEMP[6].xxxx 199: MAD TEMP[5].xy, TEMP[5].xyyy, TEMP[3].xxxx, IN[0].xyyy 200: MOV TEMP[5].xy, TEMP[5].xyyy 201: TEX TEMP[5].zw, TEMP[5], SAMP[0], 2D 202: DP2 TEMP[5].x, TEMP[5].zwww, IMM[1].yzzz 203: MUL TEMP[5].x, TEMP[5].xxxx, CONST[0].zzzz 204: ADD TEMP[5].x, TEMP[6].xxxx, -TEMP[5].xxxx 205: MOV_SAT TEMP[5].x, TEMP[5].xxxx 206: FSLT TEMP[6].x, CONST[3].yyyy, TEMP[5].xxxx 207: UIF TEMP[6].xxxx :0 208: ADD TEMP[5].x, IMM[1].yyyy, -TEMP[5].xxxx 209: POW TEMP[5].x, TEMP[5].xxxx, CONST[3].zzzz 210: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx 211: ENDIF 212: DP3 TEMP[5].x, TEMP[0].xyzz, IMM[9].xyzz 213: MUL TEMP[5].xyz, TEMP[5].xxxx, TEMP[0].xyzz 214: MUL TEMP[5].xyz, IMM[0].xxxx, TEMP[5].xyzz 215: ADD TEMP[5].xyz, IMM[9].xyzz, -TEMP[5].xyzz 216: DP3 TEMP[6].x, TEMP[2].xyzz, TEMP[5].xyzz 217: FSLT TEMP[6].x, TEMP[6].xxxx, IMM[0].wwww 218: UIF TEMP[6].xxxx :0 219: MOV TEMP[6].x, IMM[1].yyyy 220: ELSE :0 221: MOV TEMP[6].x, IMM[0].yyyy 222: ENDIF 223: MUL TEMP[5].xyz, TEMP[5].xyzz, -TEMP[6].xxxx 224: MAD TEMP[5].xyz, TEMP[2].xyzz, IMM[1].wwww, TEMP[5].xyzz 225: MUL TEMP[6].x, TEMP[5].zzzz, CONST[3].xxxx 226: ADD TEMP[6].x, TEMP[1].xxxx, -TEMP[6].xxxx 227: MAD TEMP[5].xy, TEMP[5].xyyy, TEMP[3].xxxx, IN[0].xyyy 228: MOV TEMP[5].xy, TEMP[5].xyyy 229: TEX TEMP[5].zw, TEMP[5], SAMP[0], 2D 230: DP2 TEMP[5].x, TEMP[5].zwww, IMM[1].yzzz 231: MUL TEMP[5].x, TEMP[5].xxxx, CONST[0].zzzz 232: ADD TEMP[5].x, TEMP[6].xxxx, -TEMP[5].xxxx 233: MOV_SAT TEMP[5].x, TEMP[5].xxxx 234: FSLT TEMP[6].x, CONST[3].yyyy, TEMP[5].xxxx 235: UIF TEMP[6].xxxx :0 236: ADD TEMP[5].x, IMM[1].yyyy, -TEMP[5].xxxx 237: POW TEMP[5].x, TEMP[5].xxxx, CONST[3].zzzz 238: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx 239: ENDIF 240: DP3 TEMP[5].x, TEMP[0].xyzz, IMM[10].xyzz 241: MUL TEMP[5].xyz, TEMP[5].xxxx, TEMP[0].xyzz 242: MUL TEMP[5].xyz, IMM[0].xxxx, TEMP[5].xyzz 243: ADD TEMP[5].xyz, IMM[10].xyzz, -TEMP[5].xyzz 244: DP3 TEMP[6].x, TEMP[2].xyzz, TEMP[5].xyzz 245: FSLT TEMP[6].x, TEMP[6].xxxx, IMM[0].wwww 246: UIF TEMP[6].xxxx :0 247: MOV TEMP[6].x, IMM[1].yyyy 248: ELSE :0 249: MOV TEMP[6].x, IMM[0].yyyy 250: ENDIF 251: MUL TEMP[5].xyz, TEMP[5].xyzz, -TEMP[6].xxxx 252: MAD TEMP[5].xyz, TEMP[2].xyzz, IMM[1].wwww, TEMP[5].xyzz 253: MUL TEMP[6].x, TEMP[5].zzzz, CONST[3].xxxx 254: ADD TEMP[6].x, TEMP[1].xxxx, -TEMP[6].xxxx 255: MAD TEMP[5].xy, TEMP[5].xyyy, TEMP[3].xxxx, IN[0].xyyy 256: MOV TEMP[5].xy, TEMP[5].xyyy 257: TEX TEMP[5].zw, TEMP[5], SAMP[0], 2D 258: DP2 TEMP[5].x, TEMP[5].zwww, IMM[1].yzzz 259: MUL TEMP[5].x, TEMP[5].xxxx, CONST[0].zzzz 260: ADD TEMP[5].x, TEMP[6].xxxx, -TEMP[5].xxxx 261: MOV_SAT TEMP[5].x, TEMP[5].xxxx 262: FSLT TEMP[6].x, CONST[3].yyyy, TEMP[5].xxxx 263: UIF TEMP[6].xxxx :0 264: ADD TEMP[5].x, IMM[1].yyyy, -TEMP[5].xxxx 265: POW TEMP[5].x, TEMP[5].xxxx, CONST[3].zzzz 266: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx 267: ENDIF 268: DP3 TEMP[5].x, TEMP[0].xyzz, IMM[11].xyzz 269: MUL TEMP[5].xyz, TEMP[5].xxxx, TEMP[0].xyzz 270: MUL TEMP[5].xyz, IMM[0].xxxx, TEMP[5].xyzz 271: ADD TEMP[5].xyz, IMM[11].xyzz, -TEMP[5].xyzz 272: DP3 TEMP[6].x, TEMP[2].xyzz, TEMP[5].xyzz 273: FSLT TEMP[6].x, TEMP[6].xxxx, IMM[0].wwww 274: UIF TEMP[6].xxxx :0 275: MOV TEMP[6].x, IMM[1].yyyy 276: ELSE :0 277: MOV TEMP[6].x, IMM[0].yyyy 278: ENDIF 279: MUL TEMP[5].xyz, TEMP[5].xyzz, -TEMP[6].xxxx 280: MAD TEMP[5].xyz, TEMP[2].xyzz, IMM[1].wwww, TEMP[5].xyzz 281: MUL TEMP[6].x, TEMP[5].zzzz, CONST[3].xxxx 282: ADD TEMP[6].x, TEMP[1].xxxx, -TEMP[6].xxxx 283: MAD TEMP[5].xy, TEMP[5].xyyy, TEMP[3].xxxx, IN[0].xyyy 284: MOV TEMP[5].xy, TEMP[5].xyyy 285: TEX TEMP[5].zw, TEMP[5], SAMP[0], 2D 286: DP2 TEMP[5].x, TEMP[5].zwww, IMM[1].yzzz 287: MUL TEMP[5].x, TEMP[5].xxxx, CONST[0].zzzz 288: ADD TEMP[5].x, TEMP[6].xxxx, -TEMP[5].xxxx 289: MOV_SAT TEMP[5].x, TEMP[5].xxxx 290: FSLT TEMP[6].x, CONST[3].yyyy, TEMP[5].xxxx 291: UIF TEMP[6].xxxx :0 292: ADD TEMP[5].x, IMM[1].yyyy, -TEMP[5].xxxx 293: POW TEMP[5].x, TEMP[5].xxxx, CONST[3].zzzz 294: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx 295: ENDIF 296: DP3 TEMP[5].x, TEMP[0].xyzz, IMM[12].xyzz 297: MUL TEMP[5].xyz, TEMP[5].xxxx, TEMP[0].xyzz 298: MUL TEMP[5].xyz, IMM[0].xxxx, TEMP[5].xyzz 299: ADD TEMP[5].xyz, IMM[12].xyzz, -TEMP[5].xyzz 300: DP3 TEMP[6].x, TEMP[2].xyzz, TEMP[5].xyzz 301: FSLT TEMP[6].x, TEMP[6].xxxx, IMM[0].wwww 302: UIF TEMP[6].xxxx :0 303: MOV TEMP[6].x, IMM[1].yyyy 304: ELSE :0 305: MOV TEMP[6].x, IMM[0].yyyy 306: ENDIF 307: MUL TEMP[5].xyz, TEMP[5].xyzz, -TEMP[6].xxxx 308: MAD TEMP[5].xyz, TEMP[2].xyzz, IMM[1].wwww, TEMP[5].xyzz 309: MUL TEMP[6].x, TEMP[5].zzzz, CONST[3].xxxx 310: ADD TEMP[6].x, TEMP[1].xxxx, -TEMP[6].xxxx 311: MAD TEMP[5].xy, TEMP[5].xyyy, TEMP[3].xxxx, IN[0].xyyy 312: MOV TEMP[5].xy, TEMP[5].xyyy 313: TEX TEMP[5].zw, TEMP[5], SAMP[0], 2D 314: DP2 TEMP[5].x, TEMP[5].zwww, IMM[1].yzzz 315: MUL TEMP[5].x, TEMP[5].xxxx, CONST[0].zzzz 316: ADD TEMP[5].x, TEMP[6].xxxx, -TEMP[5].xxxx 317: MOV_SAT TEMP[5].x, TEMP[5].xxxx 318: FSLT TEMP[6].x, CONST[3].yyyy, TEMP[5].xxxx 319: UIF TEMP[6].xxxx :0 320: ADD TEMP[5].x, IMM[1].yyyy, -TEMP[5].xxxx 321: POW TEMP[5].x, TEMP[5].xxxx, CONST[3].zzzz 322: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx 323: ENDIF 324: DP3 TEMP[5].x, TEMP[0].xyzz, IMM[13].xyzz 325: MUL TEMP[5].xyz, TEMP[5].xxxx, TEMP[0].xyzz 326: MUL TEMP[5].xyz, IMM[0].xxxx, TEMP[5].xyzz 327: ADD TEMP[5].xyz, IMM[13].xyzz, -TEMP[5].xyzz 328: DP3 TEMP[6].x, TEMP[2].xyzz, TEMP[5].xyzz 329: FSLT TEMP[6].x, TEMP[6].xxxx, IMM[0].wwww 330: UIF TEMP[6].xxxx :0 331: MOV TEMP[6].x, IMM[1].yyyy 332: ELSE :0 333: MOV TEMP[6].x, IMM[0].yyyy 334: ENDIF 335: MUL TEMP[5].xyz, TEMP[5].xyzz, -TEMP[6].xxxx 336: MAD TEMP[5].xyz, TEMP[2].xyzz, IMM[1].wwww, TEMP[5].xyzz 337: MUL TEMP[6].x, TEMP[5].zzzz, CONST[3].xxxx 338: ADD TEMP[6].x, TEMP[1].xxxx, -TEMP[6].xxxx 339: MAD TEMP[5].xy, TEMP[5].xyyy, TEMP[3].xxxx, IN[0].xyyy 340: MOV TEMP[5].xy, TEMP[5].xyyy 341: TEX TEMP[5].zw, TEMP[5], SAMP[0], 2D 342: DP2 TEMP[5].x, TEMP[5].zwww, IMM[1].yzzz 343: MUL TEMP[5].x, TEMP[5].xxxx, CONST[0].zzzz 344: ADD TEMP[5].x, TEMP[6].xxxx, -TEMP[5].xxxx 345: MOV_SAT TEMP[5].x, TEMP[5].xxxx 346: FSLT TEMP[6].x, CONST[3].yyyy, TEMP[5].xxxx 347: UIF TEMP[6].xxxx :0 348: ADD TEMP[5].x, IMM[1].yyyy, -TEMP[5].xxxx 349: POW TEMP[5].x, TEMP[5].xxxx, CONST[3].zzzz 350: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx 351: ENDIF 352: DP3 TEMP[5].x, TEMP[0].xyzz, IMM[14].xyzz 353: MUL TEMP[5].xyz, TEMP[5].xxxx, TEMP[0].xyzz 354: MUL TEMP[5].xyz, IMM[0].xxxx, TEMP[5].xyzz 355: ADD TEMP[5].xyz, IMM[14].xyzz, -TEMP[5].xyzz 356: DP3 TEMP[6].x, TEMP[2].xyzz, TEMP[5].xyzz 357: FSLT TEMP[6].x, TEMP[6].xxxx, IMM[0].wwww 358: UIF TEMP[6].xxxx :0 359: MOV TEMP[6].x, IMM[1].yyyy 360: ELSE :0 361: MOV TEMP[6].x, IMM[0].yyyy 362: ENDIF 363: MUL TEMP[5].xyz, TEMP[5].xyzz, -TEMP[6].xxxx 364: MAD TEMP[5].xyz, TEMP[2].xyzz, IMM[1].wwww, TEMP[5].xyzz 365: MUL TEMP[6].x, TEMP[5].zzzz, CONST[3].xxxx 366: ADD TEMP[6].x, TEMP[1].xxxx, -TEMP[6].xxxx 367: MAD TEMP[5].xy, TEMP[5].xyyy, TEMP[3].xxxx, IN[0].xyyy 368: MOV TEMP[5].xy, TEMP[5].xyyy 369: TEX TEMP[5].zw, TEMP[5], SAMP[0], 2D 370: DP2 TEMP[5].x, TEMP[5].zwww, IMM[1].yzzz 371: MUL TEMP[5].x, TEMP[5].xxxx, CONST[0].zzzz 372: ADD TEMP[5].x, TEMP[6].xxxx, -TEMP[5].xxxx 373: MOV_SAT TEMP[5].x, TEMP[5].xxxx 374: FSLT TEMP[6].x, CONST[3].yyyy, TEMP[5].xxxx 375: UIF TEMP[6].xxxx :0 376: ADD TEMP[5].x, IMM[1].yyyy, -TEMP[5].xxxx 377: POW TEMP[5].x, TEMP[5].xxxx, CONST[3].zzzz 378: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx 379: ENDIF 380: DP3 TEMP[5].x, TEMP[0].xyzz, IMM[15].xyzz 381: MUL TEMP[0].xyz, TEMP[5].xxxx, TEMP[0].xyzz 382: MUL TEMP[0].xyz, IMM[0].xxxx, TEMP[0].xyzz 383: ADD TEMP[0].xyz, IMM[15].xyzz, -TEMP[0].xyzz 384: DP3 TEMP[5].x, TEMP[2].xyzz, TEMP[0].xyzz 385: FSLT TEMP[5].x, TEMP[5].xxxx, IMM[0].wwww 386: UIF TEMP[5].xxxx :0 387: MOV TEMP[5].x, IMM[1].yyyy 388: ELSE :0 389: MOV TEMP[5].x, IMM[0].yyyy 390: ENDIF 391: MUL TEMP[0].xyz, TEMP[0].xyzz, -TEMP[5].xxxx 392: MAD TEMP[0].xyz, TEMP[2].xyzz, IMM[1].wwww, TEMP[0].xyzz 393: MUL TEMP[2].x, TEMP[0].zzzz, CONST[3].xxxx 394: ADD TEMP[1].x, TEMP[1].xxxx, -TEMP[2].xxxx 395: MAD TEMP[0].xy, TEMP[0].xyyy, TEMP[3].xxxx, IN[0].xyyy 396: MOV TEMP[0].xy, TEMP[0].xyyy 397: TEX TEMP[0].zw, TEMP[0], SAMP[0], 2D 398: DP2 TEMP[0].x, TEMP[0].zwww, IMM[1].yzzz 399: MUL TEMP[0].x, TEMP[0].xxxx, CONST[0].zzzz 400: ADD TEMP[0].x, TEMP[1].xxxx, -TEMP[0].xxxx 401: MOV_SAT TEMP[0].x, TEMP[0].xxxx 402: FSLT TEMP[1].x, CONST[3].yyyy, TEMP[0].xxxx 403: UIF TEMP[1].xxxx :0 404: ADD TEMP[0].x, IMM[1].yyyy, -TEMP[0].xxxx 405: POW TEMP[0].x, TEMP[0].xxxx, CONST[3].zzzz 406: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[0].xxxx 407: ENDIF 408: MUL TEMP[0].x, TEMP[4].xxxx, IMM[2].wwww 409: ADD TEMP[0].x, IMM[1].yyyy, -TEMP[0].xxxx 410: MOV OUT[0], TEMP[0].xxxx 411: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %28 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %29 = load <8 x i32>, <8 x i32> addrspace(2)* %28, align 32, !tbaa !0 %30 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %31 = load <4 x i32>, <4 x i32> addrspace(2)* %30, align 16, !tbaa !0 %32 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %33 = bitcast <8 x i32> addrspace(2)* %32 to <32 x i8> addrspace(2)* %34 = load <32 x i8>, <32 x i8> addrspace(2)* %33, align 32, !tbaa !0 %35 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %36 = bitcast <4 x i32> addrspace(2)* %35 to <16 x i8> addrspace(2)* %37 = load <16 x i8>, <16 x i8> addrspace(2)* %36, align 16, !tbaa !0 %38 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %39 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %40 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %41 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %42 = bitcast float %40 to i32 %43 = bitcast float %41 to i32 %44 = insertelement <2 x i32> undef, i32 %42, i32 0 %45 = insertelement <2 x i32> %44, i32 %43, i32 1 %46 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %45, <32 x i8> %34, <16 x i8> %37, i32 2) %47 = extractelement <4 x float> %46, i32 0 %48 = extractelement <4 x float> %46, i32 1 %49 = extractelement <4 x float> %46, i32 2 %50 = fmul float %47, 2.000000e+00 %51 = fadd float %50, -1.000000e+00 %52 = fmul float %48, 2.000000e+00 %53 = fadd float %52, -1.000000e+00 %54 = fmul float %49, 2.000000e+00 %55 = fadd float %54, -1.000000e+00 %56 = bitcast float %38 to i32 %57 = bitcast float %39 to i32 %58 = insertelement <2 x i32> undef, i32 %56, i32 0 %59 = insertelement <2 x i32> %58, i32 %57, i32 1 %60 = bitcast <8 x i32> %29 to <32 x i8> %61 = bitcast <4 x i32> %31 to <16 x i8> %62 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %59, <32 x i8> %60, <16 x i8> %61, i32 2) %63 = extractelement <4 x float> %62, i32 0 %64 = extractelement <4 x float> %62, i32 1 %65 = extractelement <4 x float> %62, i32 2 %66 = extractelement <4 x float> %62, i32 3 %67 = fmul float %63, 0x400C717580000000 %68 = fadd float %67, 0xBFFC717580000000 %69 = fmul float %64, 0x400C717580000000 %70 = fadd float %69, 0xBFFC717580000000 %71 = fmul float %65, 0.000000e+00 %72 = fadd float %71, 1.000000e+00 %73 = fmul float %68, %68 %74 = fmul float %70, %70 %75 = fadd float %74, %73 %76 = fmul float %72, %72 %77 = fadd float %75, %76 %78 = fdiv float 1.000000e+00, %77 %79 = fmul float %78, 2.000000e+00 %80 = fmul float %79, %68 %81 = fmul float %79, %70 %82 = fadd float %79, -1.000000e+00 %83 = fmul float %66, 0x3F70101020000000 %84 = fadd float %65, %83 %85 = fmul float %84, %24 %86 = fdiv float 1.000000e+00, %85 %87 = fmul float %25, %86 %88 = fmul float %51, 0x3FD9AA0C40000000 %89 = fmul float %53, 0x3FEC7A5F80000000 %90 = fadd float %89, %88 %91 = fmul float %55, 0xBF91F02A40000000 %92 = fadd float %90, %91 %93 = fmul float %92, %51 %94 = fmul float %92, %53 %95 = fmul float %92, %55 %96 = fmul float %93, 2.000000e+00 %97 = fmul float %94, 2.000000e+00 %98 = fmul float %95, 2.000000e+00 %99 = fsub float 0x3FD9AA0C40000000, %96 %100 = fsub float 0x3FEC7A5F80000000, %97 %101 = fsub float 0xBF91F02A40000000, %98 %102 = fmul float %80, %99 %103 = fmul float %81, %100 %104 = fadd float %103, %102 %105 = fmul float %82, %101 %106 = fadd float %104, %105 %107 = fcmp olt float %106, 0.000000e+00 %. = select i1 %107, float 1.000000e+00, float -1.000000e+00 %108 = fmul float %., %99 %109 = fmul float %., %100 %110 = fmul float %., %101 %111 = fmul float %80, 0x3FD3333340000000 %112 = fsub float %111, %108 %113 = fmul float %81, 0x3FD3333340000000 %114 = fsub float %113, %109 %115 = fmul float %82, 0x3FD3333340000000 %116 = fsub float %115, %110 %117 = fmul float %116, %25 %118 = fsub float %85, %117 %119 = fmul float %112, %87 %120 = fadd float %119, %38 %121 = fmul float %114, %87 %122 = fadd float %121, %39 %123 = bitcast float %120 to i32 %124 = bitcast float %122 to i32 %125 = insertelement <2 x i32> undef, i32 %123, i32 0 %126 = insertelement <2 x i32> %125, i32 %124, i32 1 %127 = bitcast <8 x i32> %29 to <32 x i8> %128 = bitcast <4 x i32> %31 to <16 x i8> %129 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %126, <32 x i8> %127, <16 x i8> %128, i32 2) %130 = extractelement <4 x float> %129, i32 2 %131 = extractelement <4 x float> %129, i32 3 %132 = fmul float %131, 0x3F70101020000000 %133 = fadd float %130, %132 %134 = fmul float %133, %24 %135 = fsub float %118, %134 %136 = call float @llvm.AMDIL.clamp.(float %135, float 0.000000e+00, float 1.000000e+00) %137 = fcmp olt float %26, %136 br i1 %137, label %IF29, label %ENDIF28 IF29: ; preds = %main_body %138 = fsub float 1.000000e+00, %136 %139 = call float @llvm.pow.f32(float %138, float %27) br label %ENDIF28 ENDIF28: ; preds = %main_body, %IF29 %temp16.0 = phi float [ %139, %IF29 ], [ 0.000000e+00, %main_body ] %140 = fmul float %51, 0x3FC4B55400000000 %141 = fmul float %53, 0x3FC1222AC0000000 %142 = fadd float %141, %140 %143 = fmul float %55, 0xBFD6985920000000 %144 = fadd float %142, %143 %145 = fmul float %144, %51 %146 = fmul float %144, %53 %147 = fmul float %144, %55 %148 = fmul float %145, 2.000000e+00 %149 = fmul float %146, 2.000000e+00 %150 = fmul float %147, 2.000000e+00 %151 = fsub float 0x3FC4B55400000000, %148 %152 = fsub float 0x3FC1222AC0000000, %149 %153 = fsub float 0xBFD6985920000000, %150 %154 = fmul float %80, %151 %155 = fmul float %81, %152 %156 = fadd float %155, %154 %157 = fmul float %82, %153 %158 = fadd float %156, %157 %159 = fcmp olt float %158, 0.000000e+00 %.109 = select i1 %159, float 1.000000e+00, float -1.000000e+00 %160 = fmul float %.109, %151 %161 = fmul float %.109, %152 %162 = fmul float %.109, %153 %163 = fmul float %80, 0x3FD3333340000000 %164 = fsub float %163, %160 %165 = fmul float %81, 0x3FD3333340000000 %166 = fsub float %165, %161 %167 = fmul float %82, 0x3FD3333340000000 %168 = fsub float %167, %162 %169 = fmul float %168, %25 %170 = fsub float %85, %169 %171 = fmul float %164, %87 %172 = fadd float %171, %38 %173 = fmul float %166, %87 %174 = fadd float %173, %39 %175 = bitcast float %172 to i32 %176 = bitcast float %174 to i32 %177 = insertelement <2 x i32> undef, i32 %175, i32 0 %178 = insertelement <2 x i32> %177, i32 %176, i32 1 %179 = bitcast <8 x i32> %29 to <32 x i8> %180 = bitcast <4 x i32> %31 to <16 x i8> %181 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %178, <32 x i8> %179, <16 x i8> %180, i32 2) %182 = extractelement <4 x float> %181, i32 2 %183 = extractelement <4 x float> %181, i32 3 %184 = fmul float %183, 0x3F70101020000000 %185 = fadd float %182, %184 %186 = fmul float %185, %24 %187 = fsub float %170, %186 %188 = call float @llvm.AMDIL.clamp.(float %187, float 0.000000e+00, float 1.000000e+00) %189 = fcmp olt float %26, %188 br i1 %189, label %IF35, label %ENDIF34 IF35: ; preds = %ENDIF28 %190 = fsub float 1.000000e+00, %188 %191 = call float @llvm.pow.f32(float %190, float %27) %192 = fadd float %temp16.0, %191 br label %ENDIF34 ENDIF34: ; preds = %ENDIF28, %IF35 %temp16.1 = phi float [ %192, %IF35 ], [ %temp16.0, %ENDIF28 ] %193 = fmul float %51, 0xBFCD81FE80000000 %194 = fmul float %53, 0xBFC85232E0000000 %195 = fadd float %194, %193 %196 = fmul float %55, 0x3FE014CDE0000000 %197 = fadd float %195, %196 %198 = fmul float %197, %51 %199 = fmul float %197, %53 %200 = fmul float %197, %55 %201 = fmul float %198, 2.000000e+00 %202 = fmul float %199, 2.000000e+00 %203 = fmul float %200, 2.000000e+00 %204 = fsub float 0xBFCD81FE80000000, %201 %205 = fsub float 0xBFC85232E0000000, %202 %206 = fsub float 0x3FE014CDE0000000, %203 %207 = fmul float %80, %204 %208 = fmul float %81, %205 %209 = fadd float %208, %207 %210 = fmul float %82, %206 %211 = fadd float %209, %210 %212 = fcmp olt float %211, 0.000000e+00 %.110 = select i1 %212, float 1.000000e+00, float -1.000000e+00 %213 = fmul float %.110, %204 %214 = fmul float %.110, %205 %215 = fmul float %.110, %206 %216 = fmul float %80, 0x3FD3333340000000 %217 = fsub float %216, %213 %218 = fmul float %81, 0x3FD3333340000000 %219 = fsub float %218, %214 %220 = fmul float %82, 0x3FD3333340000000 %221 = fsub float %220, %215 %222 = fmul float %221, %25 %223 = fsub float %85, %222 %224 = fmul float %217, %87 %225 = fadd float %224, %38 %226 = fmul float %219, %87 %227 = fadd float %226, %39 %228 = bitcast float %225 to i32 %229 = bitcast float %227 to i32 %230 = insertelement <2 x i32> undef, i32 %228, i32 0 %231 = insertelement <2 x i32> %230, i32 %229, i32 1 %232 = bitcast <8 x i32> %29 to <32 x i8> %233 = bitcast <4 x i32> %31 to <16 x i8> %234 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %231, <32 x i8> %232, <16 x i8> %233, i32 2) %235 = extractelement <4 x float> %234, i32 2 %236 = extractelement <4 x float> %234, i32 3 %237 = fmul float %236, 0x3F70101020000000 %238 = fadd float %235, %237 %239 = fmul float %238, %24 %240 = fsub float %223, %239 %241 = call float @llvm.AMDIL.clamp.(float %240, float 0.000000e+00, float 1.000000e+00) %242 = fcmp olt float %26, %241 br i1 %242, label %IF41, label %ENDIF40 IF41: ; preds = %ENDIF34 %243 = fsub float 1.000000e+00, %241 %244 = call float @llvm.pow.f32(float %243, float %27) %245 = fadd float %temp16.1, %244 br label %ENDIF40 ENDIF40: ; preds = %ENDIF34, %IF41 %temp16.2 = phi float [ %245, %IF41 ], [ %temp16.1, %ENDIF34 ] %246 = fmul float %51, 0xBFE40579C0000000 %247 = fmul float %53, 0x3FBFC95980000000 %248 = fadd float %247, %246 %249 = fmul float %55, 0x3FBDCBF1E0000000 %250 = fadd float %248, %249 %251 = fmul float %250, %51 %252 = fmul float %250, %53 %253 = fmul float %250, %55 %254 = fmul float %251, 2.000000e+00 %255 = fmul float %252, 2.000000e+00 %256 = fmul float %253, 2.000000e+00 %257 = fsub float 0xBFE40579C0000000, %254 %258 = fsub float 0x3FBFC95980000000, %255 %259 = fsub float 0x3FBDCBF1E0000000, %256 %260 = fmul float %80, %257 %261 = fmul float %81, %258 %262 = fadd float %261, %260 %263 = fmul float %82, %259 %264 = fadd float %262, %263 %265 = fcmp olt float %264, 0.000000e+00 %.111 = select i1 %265, float 1.000000e+00, float -1.000000e+00 %266 = fmul float %.111, %257 %267 = fmul float %.111, %258 %268 = fmul float %.111, %259 %269 = fmul float %80, 0x3FD3333340000000 %270 = fsub float %269, %266 %271 = fmul float %81, 0x3FD3333340000000 %272 = fsub float %271, %267 %273 = fmul float %82, 0x3FD3333340000000 %274 = fsub float %273, %268 %275 = fmul float %274, %25 %276 = fsub float %85, %275 %277 = fmul float %270, %87 %278 = fadd float %277, %38 %279 = fmul float %272, %87 %280 = fadd float %279, %39 %281 = bitcast float %278 to i32 %282 = bitcast float %280 to i32 %283 = insertelement <2 x i32> undef, i32 %281, i32 0 %284 = insertelement <2 x i32> %283, i32 %282, i32 1 %285 = bitcast <8 x i32> %29 to <32 x i8> %286 = bitcast <4 x i32> %31 to <16 x i8> %287 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %284, <32 x i8> %285, <16 x i8> %286, i32 2) %288 = extractelement <4 x float> %287, i32 2 %289 = extractelement <4 x float> %287, i32 3 %290 = fmul float %289, 0x3F70101020000000 %291 = fadd float %288, %290 %292 = fmul float %291, %24 %293 = fsub float %276, %292 %294 = call float @llvm.AMDIL.clamp.(float %293, float 0.000000e+00, float 1.000000e+00) %295 = fcmp olt float %26, %294 br i1 %295, label %IF47, label %ENDIF46 IF47: ; preds = %ENDIF40 %296 = fsub float 1.000000e+00, %294 %297 = call float @llvm.pow.f32(float %296, float %27) %298 = fadd float %temp16.2, %297 br label %ENDIF46 ENDIF46: ; preds = %ENDIF40, %IF47 %temp16.3 = phi float [ %298, %IF47 ], [ %temp16.2, %ENDIF40 ] %299 = fmul float %51, 0x3FD873F9C0000000 %300 = fmul float %53, 0xBFD4BEB4E0000000 %301 = fadd float %300, %299 %302 = fmul float %55, 0x3FDA5273E0000000 %303 = fadd float %301, %302 %304 = fmul float %303, %51 %305 = fmul float %303, %53 %306 = fmul float %303, %55 %307 = fmul float %304, 2.000000e+00 %308 = fmul float %305, 2.000000e+00 %309 = fmul float %306, 2.000000e+00 %310 = fsub float 0x3FD873F9C0000000, %307 %311 = fsub float 0xBFD4BEB4E0000000, %308 %312 = fsub float 0x3FDA5273E0000000, %309 %313 = fmul float %80, %310 %314 = fmul float %81, %311 %315 = fadd float %314, %313 %316 = fmul float %82, %312 %317 = fadd float %315, %316 %318 = fcmp olt float %317, 0.000000e+00 %.112 = select i1 %318, float 1.000000e+00, float -1.000000e+00 %319 = fmul float %.112, %310 %320 = fmul float %.112, %311 %321 = fmul float %.112, %312 %322 = fmul float %80, 0x3FD3333340000000 %323 = fsub float %322, %319 %324 = fmul float %81, 0x3FD3333340000000 %325 = fsub float %324, %320 %326 = fmul float %82, 0x3FD3333340000000 %327 = fsub float %326, %321 %328 = fmul float %327, %25 %329 = fsub float %85, %328 %330 = fmul float %323, %87 %331 = fadd float %330, %38 %332 = fmul float %325, %87 %333 = fadd float %332, %39 %334 = bitcast float %331 to i32 %335 = bitcast float %333 to i32 %336 = insertelement <2 x i32> undef, i32 %334, i32 0 %337 = insertelement <2 x i32> %336, i32 %335, i32 1 %338 = bitcast <8 x i32> %29 to <32 x i8> %339 = bitcast <4 x i32> %31 to <16 x i8> %340 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %337, <32 x i8> %338, <16 x i8> %339, i32 2) %341 = extractelement <4 x float> %340, i32 2 %342 = extractelement <4 x float> %340, i32 3 %343 = fmul float %342, 0x3F70101020000000 %344 = fadd float %341, %343 %345 = fmul float %344, %24 %346 = fsub float %329, %345 %347 = call float @llvm.AMDIL.clamp.(float %346, float 0.000000e+00, float 1.000000e+00) %348 = fcmp olt float %26, %347 br i1 %348, label %IF53, label %ENDIF52 IF53: ; preds = %ENDIF46 %349 = fsub float 1.000000e+00, %347 %350 = call float @llvm.pow.f32(float %349, float %27) %351 = fadd float %temp16.3, %350 br label %ENDIF52 ENDIF52: ; preds = %ENDIF46, %IF53 %temp16.4 = phi float [ %351, %IF53 ], [ %temp16.3, %ENDIF46 ] %352 = fmul float %51, 0xBFB69A9A00000000 %353 = fmul float %53, 0x3FC51DEE20000000 %354 = fadd float %353, %352 %355 = fmul float %55, 0x3FC1DE0420000000 %356 = fadd float %354, %355 %357 = fmul float %356, %51 %358 = fmul float %356, %53 %359 = fmul float %356, %55 %360 = fmul float %357, 2.000000e+00 %361 = fmul float %358, 2.000000e+00 %362 = fmul float %359, 2.000000e+00 %363 = fsub float 0xBFB69A9A00000000, %360 %364 = fsub float 0x3FC51DEE20000000, %361 %365 = fsub float 0x3FC1DE0420000000, %362 %366 = fmul float %80, %363 %367 = fmul float %81, %364 %368 = fadd float %367, %366 %369 = fmul float %82, %365 %370 = fadd float %368, %369 %371 = fcmp olt float %370, 0.000000e+00 %.113 = select i1 %371, float 1.000000e+00, float -1.000000e+00 %372 = fmul float %.113, %363 %373 = fmul float %.113, %364 %374 = fmul float %.113, %365 %375 = fmul float %80, 0x3FD3333340000000 %376 = fsub float %375, %372 %377 = fmul float %81, 0x3FD3333340000000 %378 = fsub float %377, %373 %379 = fmul float %82, 0x3FD3333340000000 %380 = fsub float %379, %374 %381 = fmul float %380, %25 %382 = fsub float %85, %381 %383 = fmul float %376, %87 %384 = fadd float %383, %38 %385 = fmul float %378, %87 %386 = fadd float %385, %39 %387 = bitcast float %384 to i32 %388 = bitcast float %386 to i32 %389 = insertelement <2 x i32> undef, i32 %387, i32 0 %390 = insertelement <2 x i32> %389, i32 %388, i32 1 %391 = bitcast <8 x i32> %29 to <32 x i8> %392 = bitcast <4 x i32> %31 to <16 x i8> %393 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %390, <32 x i8> %391, <16 x i8> %392, i32 2) %394 = extractelement <4 x float> %393, i32 2 %395 = extractelement <4 x float> %393, i32 3 %396 = fmul float %395, 0x3F70101020000000 %397 = fadd float %394, %396 %398 = fmul float %397, %24 %399 = fsub float %382, %398 %400 = call float @llvm.AMDIL.clamp.(float %399, float 0.000000e+00, float 1.000000e+00) %401 = fcmp olt float %26, %400 br i1 %401, label %IF59, label %ENDIF58 IF59: ; preds = %ENDIF52 %402 = fsub float 1.000000e+00, %400 %403 = call float @llvm.pow.f32(float %402, float %27) %404 = fadd float %temp16.4, %403 br label %ENDIF58 ENDIF58: ; preds = %ENDIF52, %IF59 %temp16.5 = phi float [ %404, %IF59 ], [ %temp16.4, %ENDIF52 ] %405 = fmul float %51, 0x3FC836A5A0000000 %406 = fmul float %53, 0xBFC06E9BC0000000 %407 = fadd float %406, %405 %408 = fmul float %55, 0xBFB946BC00000000 %409 = fadd float %407, %408 %410 = fmul float %409, %51 %411 = fmul float %409, %53 %412 = fmul float %409, %55 %413 = fmul float %410, 2.000000e+00 %414 = fmul float %411, 2.000000e+00 %415 = fmul float %412, 2.000000e+00 %416 = fsub float 0x3FC836A5A0000000, %413 %417 = fsub float 0xBFC06E9BC0000000, %414 %418 = fsub float 0xBFB946BC00000000, %415 %419 = fmul float %80, %416 %420 = fmul float %81, %417 %421 = fadd float %420, %419 %422 = fmul float %82, %418 %423 = fadd float %421, %422 %424 = fcmp olt float %423, 0.000000e+00 %.114 = select i1 %424, float 1.000000e+00, float -1.000000e+00 %425 = fmul float %.114, %416 %426 = fmul float %.114, %417 %427 = fmul float %.114, %418 %428 = fmul float %80, 0x3FD3333340000000 %429 = fsub float %428, %425 %430 = fmul float %81, 0x3FD3333340000000 %431 = fsub float %430, %426 %432 = fmul float %82, 0x3FD3333340000000 %433 = fsub float %432, %427 %434 = fmul float %433, %25 %435 = fsub float %85, %434 %436 = fmul float %429, %87 %437 = fadd float %436, %38 %438 = fmul float %431, %87 %439 = fadd float %438, %39 %440 = bitcast float %437 to i32 %441 = bitcast float %439 to i32 %442 = insertelement <2 x i32> undef, i32 %440, i32 0 %443 = insertelement <2 x i32> %442, i32 %441, i32 1 %444 = bitcast <8 x i32> %29 to <32 x i8> %445 = bitcast <4 x i32> %31 to <16 x i8> %446 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %443, <32 x i8> %444, <16 x i8> %445, i32 2) %447 = extractelement <4 x float> %446, i32 2 %448 = extractelement <4 x float> %446, i32 3 %449 = fmul float %448, 0x3F70101020000000 %450 = fadd float %447, %449 %451 = fmul float %450, %24 %452 = fsub float %435, %451 %453 = call float @llvm.AMDIL.clamp.(float %452, float 0.000000e+00, float 1.000000e+00) %454 = fcmp olt float %26, %453 br i1 %454, label %IF65, label %ENDIF64 IF65: ; preds = %ENDIF58 %455 = fsub float 1.000000e+00, %453 %456 = call float @llvm.pow.f32(float %455, float %27) %457 = fadd float %temp16.5, %456 br label %ENDIF64 ENDIF64: ; preds = %ENDIF58, %IF65 %temp16.6 = phi float [ %457, %IF65 ], [ %temp16.5, %ENDIF58 ] %458 = fmul float %51, 0x3FC96C30A0000000 %459 = fmul float %53, 0x3FC69EE380000000 %460 = fadd float %459, %458 %461 = fmul float %55, 0x3FDC08FF20000000 %462 = fadd float %460, %461 %463 = fmul float %462, %51 %464 = fmul float %462, %53 %465 = fmul float %462, %55 %466 = fmul float %463, 2.000000e+00 %467 = fmul float %464, 2.000000e+00 %468 = fmul float %465, 2.000000e+00 %469 = fsub float 0x3FC96C30A0000000, %466 %470 = fsub float 0x3FC69EE380000000, %467 %471 = fsub float 0x3FDC08FF20000000, %468 %472 = fmul float %80, %469 %473 = fmul float %81, %470 %474 = fadd float %473, %472 %475 = fmul float %82, %471 %476 = fadd float %474, %475 %477 = fcmp olt float %476, 0.000000e+00 %.115 = select i1 %477, float 1.000000e+00, float -1.000000e+00 %478 = fmul float %.115, %469 %479 = fmul float %.115, %470 %480 = fmul float %.115, %471 %481 = fmul float %80, 0x3FD3333340000000 %482 = fsub float %481, %478 %483 = fmul float %81, 0x3FD3333340000000 %484 = fsub float %483, %479 %485 = fmul float %82, 0x3FD3333340000000 %486 = fsub float %485, %480 %487 = fmul float %486, %25 %488 = fsub float %85, %487 %489 = fmul float %482, %87 %490 = fadd float %489, %38 %491 = fmul float %484, %87 %492 = fadd float %491, %39 %493 = bitcast float %490 to i32 %494 = bitcast float %492 to i32 %495 = insertelement <2 x i32> undef, i32 %493, i32 0 %496 = insertelement <2 x i32> %495, i32 %494, i32 1 %497 = bitcast <8 x i32> %29 to <32 x i8> %498 = bitcast <4 x i32> %31 to <16 x i8> %499 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %496, <32 x i8> %497, <16 x i8> %498, i32 2) %500 = extractelement <4 x float> %499, i32 2 %501 = extractelement <4 x float> %499, i32 3 %502 = fmul float %501, 0x3F70101020000000 %503 = fadd float %500, %502 %504 = fmul float %503, %24 %505 = fsub float %488, %504 %506 = call float @llvm.AMDIL.clamp.(float %505, float 0.000000e+00, float 1.000000e+00) %507 = fcmp olt float %26, %506 br i1 %507, label %IF71, label %ENDIF70 IF71: ; preds = %ENDIF64 %508 = fsub float 1.000000e+00, %506 %509 = call float @llvm.pow.f32(float %508, float %27) %510 = fadd float %temp16.6, %509 br label %ENDIF70 ENDIF70: ; preds = %ENDIF64, %IF71 %temp16.7 = phi float [ %510, %IF71 ], [ %temp16.6, %ENDIF64 ] %511 = fmul float %51, 0xBFD51678E0000000 %512 = fmul float %53, 0x3F9B7CD6C0000000 %513 = fadd float %512, %511 %514 = fmul float %55, 0xBFD9BD6040000000 %515 = fadd float %513, %514 %516 = fmul float %515, %51 %517 = fmul float %515, %53 %518 = fmul float %515, %55 %519 = fmul float %516, 2.000000e+00 %520 = fmul float %517, 2.000000e+00 %521 = fmul float %518, 2.000000e+00 %522 = fsub float 0xBFD51678E0000000, %519 %523 = fsub float 0x3F9B7CD6C0000000, %520 %524 = fsub float 0xBFD9BD6040000000, %521 %525 = fmul float %80, %522 %526 = fmul float %81, %523 %527 = fadd float %526, %525 %528 = fmul float %82, %524 %529 = fadd float %527, %528 %530 = fcmp olt float %529, 0.000000e+00 %.116 = select i1 %530, float 1.000000e+00, float -1.000000e+00 %531 = fmul float %.116, %522 %532 = fmul float %.116, %523 %533 = fmul float %.116, %524 %534 = fmul float %80, 0x3FD3333340000000 %535 = fsub float %534, %531 %536 = fmul float %81, 0x3FD3333340000000 %537 = fsub float %536, %532 %538 = fmul float %82, 0x3FD3333340000000 %539 = fsub float %538, %533 %540 = fmul float %539, %25 %541 = fsub float %85, %540 %542 = fmul float %535, %87 %543 = fadd float %542, %38 %544 = fmul float %537, %87 %545 = fadd float %544, %39 %546 = bitcast float %543 to i32 %547 = bitcast float %545 to i32 %548 = insertelement <2 x i32> undef, i32 %546, i32 0 %549 = insertelement <2 x i32> %548, i32 %547, i32 1 %550 = bitcast <8 x i32> %29 to <32 x i8> %551 = bitcast <4 x i32> %31 to <16 x i8> %552 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %549, <32 x i8> %550, <16 x i8> %551, i32 2) %553 = extractelement <4 x float> %552, i32 2 %554 = extractelement <4 x float> %552, i32 3 %555 = fmul float %554, 0x3F70101020000000 %556 = fadd float %553, %555 %557 = fmul float %556, %24 %558 = fsub float %541, %557 %559 = call float @llvm.AMDIL.clamp.(float %558, float 0.000000e+00, float 1.000000e+00) %560 = fcmp olt float %26, %559 br i1 %560, label %IF77, label %ENDIF76 IF77: ; preds = %ENDIF70 %561 = fsub float 1.000000e+00, %559 %562 = call float @llvm.pow.f32(float %561, float %27) %563 = fadd float %temp16.7, %562 br label %ENDIF76 ENDIF76: ; preds = %ENDIF70, %IF77 %temp16.8 = phi float [ %563, %IF77 ], [ %temp16.7, %ENDIF70 ] %564 = fmul float %51, 0xBF9408DAE0000000 %565 = fmul float %53, 0xBFD3E43FC0000000 %566 = fadd float %565, %564 %567 = fmul float %55, 0xBFDA484D80000000 %568 = fadd float %566, %567 %569 = fmul float %568, %51 %570 = fmul float %568, %53 %571 = fmul float %568, %55 %572 = fmul float %569, 2.000000e+00 %573 = fmul float %570, 2.000000e+00 %574 = fmul float %571, 2.000000e+00 %575 = fsub float 0xBF9408DAE0000000, %572 %576 = fsub float 0xBFD3E43FC0000000, %573 %577 = fsub float 0xBFDA484D80000000, %574 %578 = fmul float %80, %575 %579 = fmul float %81, %576 %580 = fadd float %579, %578 %581 = fmul float %82, %577 %582 = fadd float %580, %581 %583 = fcmp olt float %582, 0.000000e+00 %.117 = select i1 %583, float 1.000000e+00, float -1.000000e+00 %584 = fmul float %.117, %575 %585 = fmul float %.117, %576 %586 = fmul float %.117, %577 %587 = fmul float %80, 0x3FD3333340000000 %588 = fsub float %587, %584 %589 = fmul float %81, 0x3FD3333340000000 %590 = fsub float %589, %585 %591 = fmul float %82, 0x3FD3333340000000 %592 = fsub float %591, %586 %593 = fmul float %592, %25 %594 = fsub float %85, %593 %595 = fmul float %588, %87 %596 = fadd float %595, %38 %597 = fmul float %590, %87 %598 = fadd float %597, %39 %599 = bitcast float %596 to i32 %600 = bitcast float %598 to i32 %601 = insertelement <2 x i32> undef, i32 %599, i32 0 %602 = insertelement <2 x i32> %601, i32 %600, i32 1 %603 = bitcast <8 x i32> %29 to <32 x i8> %604 = bitcast <4 x i32> %31 to <16 x i8> %605 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %602, <32 x i8> %603, <16 x i8> %604, i32 2) %606 = extractelement <4 x float> %605, i32 2 %607 = extractelement <4 x float> %605, i32 3 %608 = fmul float %607, 0x3F70101020000000 %609 = fadd float %606, %608 %610 = fmul float %609, %24 %611 = fsub float %594, %610 %612 = call float @llvm.AMDIL.clamp.(float %611, float 0.000000e+00, float 1.000000e+00) %613 = fcmp olt float %26, %612 br i1 %613, label %IF83, label %ENDIF82 IF83: ; preds = %ENDIF76 %614 = fsub float 1.000000e+00, %612 %615 = call float @llvm.pow.f32(float %614, float %27) %616 = fadd float %temp16.8, %615 br label %ENDIF82 ENDIF82: ; preds = %ENDIF76, %IF83 %temp16.9 = phi float [ %616, %IF83 ], [ %temp16.8, %ENDIF76 ] %617 = fmul float %51, 0xBFD4944600000000 %618 = fmul float %53, 0x3FE5DCD060000000 %619 = fadd float %618, %617 %620 = fmul float %55, 0xBFD5F95BA0000000 %621 = fadd float %619, %620 %622 = fmul float %621, %51 %623 = fmul float %621, %53 %624 = fmul float %621, %55 %625 = fmul float %622, 2.000000e+00 %626 = fmul float %623, 2.000000e+00 %627 = fmul float %624, 2.000000e+00 %628 = fsub float 0xBFD4944600000000, %625 %629 = fsub float 0x3FE5DCD060000000, %626 %630 = fsub float 0xBFD5F95BA0000000, %627 %631 = fmul float %80, %628 %632 = fmul float %81, %629 %633 = fadd float %632, %631 %634 = fmul float %82, %630 %635 = fadd float %633, %634 %636 = fcmp olt float %635, 0.000000e+00 %.118 = select i1 %636, float 1.000000e+00, float -1.000000e+00 %637 = fmul float %.118, %628 %638 = fmul float %.118, %629 %639 = fmul float %.118, %630 %640 = fmul float %80, 0x3FD3333340000000 %641 = fsub float %640, %637 %642 = fmul float %81, 0x3FD3333340000000 %643 = fsub float %642, %638 %644 = fmul float %82, 0x3FD3333340000000 %645 = fsub float %644, %639 %646 = fmul float %645, %25 %647 = fsub float %85, %646 %648 = fmul float %641, %87 %649 = fadd float %648, %38 %650 = fmul float %643, %87 %651 = fadd float %650, %39 %652 = bitcast float %649 to i32 %653 = bitcast float %651 to i32 %654 = insertelement <2 x i32> undef, i32 %652, i32 0 %655 = insertelement <2 x i32> %654, i32 %653, i32 1 %656 = bitcast <8 x i32> %29 to <32 x i8> %657 = bitcast <4 x i32> %31 to <16 x i8> %658 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %655, <32 x i8> %656, <16 x i8> %657, i32 2) %659 = extractelement <4 x float> %658, i32 2 %660 = extractelement <4 x float> %658, i32 3 %661 = fmul float %660, 0x3F70101020000000 %662 = fadd float %659, %661 %663 = fmul float %662, %24 %664 = fsub float %647, %663 %665 = call float @llvm.AMDIL.clamp.(float %664, float 0.000000e+00, float 1.000000e+00) %666 = fcmp olt float %26, %665 br i1 %666, label %IF89, label %ENDIF88 IF89: ; preds = %ENDIF82 %667 = fsub float 1.000000e+00, %665 %668 = call float @llvm.pow.f32(float %667, float %27) %669 = fadd float %temp16.9, %668 br label %ENDIF88 ENDIF88: ; preds = %ENDIF82, %IF89 %temp16.10 = phi float [ %669, %IF89 ], [ %temp16.9, %ENDIF82 ] %670 = fmul float %51, 0x3FE67BCD40000000 %671 = fmul float %53, 0x3FC518FB80000000 %672 = fadd float %671, %670 %673 = fmul float %55, 0x3F970BE0E0000000 %674 = fadd float %672, %673 %675 = fmul float %674, %51 %676 = fmul float %674, %53 %677 = fmul float %674, %55 %678 = fmul float %675, 2.000000e+00 %679 = fmul float %676, 2.000000e+00 %680 = fmul float %677, 2.000000e+00 %681 = fsub float 0x3FE67BCD40000000, %678 %682 = fsub float 0x3FC518FB80000000, %679 %683 = fsub float 0x3F970BE0E0000000, %680 %684 = fmul float %80, %681 %685 = fmul float %81, %682 %686 = fadd float %685, %684 %687 = fmul float %82, %683 %688 = fadd float %686, %687 %689 = fcmp olt float %688, 0.000000e+00 %.119 = select i1 %689, float 1.000000e+00, float -1.000000e+00 %690 = fmul float %.119, %681 %691 = fmul float %.119, %682 %692 = fmul float %.119, %683 %693 = fmul float %80, 0x3FD3333340000000 %694 = fsub float %693, %690 %695 = fmul float %81, 0x3FD3333340000000 %696 = fsub float %695, %691 %697 = fmul float %82, 0x3FD3333340000000 %698 = fsub float %697, %692 %699 = fmul float %698, %25 %700 = fsub float %85, %699 %701 = fmul float %694, %87 %702 = fadd float %701, %38 %703 = fmul float %696, %87 %704 = fadd float %703, %39 %705 = bitcast float %702 to i32 %706 = bitcast float %704 to i32 %707 = insertelement <2 x i32> undef, i32 %705, i32 0 %708 = insertelement <2 x i32> %707, i32 %706, i32 1 %709 = bitcast <8 x i32> %29 to <32 x i8> %710 = bitcast <4 x i32> %31 to <16 x i8> %711 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %708, <32 x i8> %709, <16 x i8> %710, i32 2) %712 = extractelement <4 x float> %711, i32 2 %713 = extractelement <4 x float> %711, i32 3 %714 = fmul float %713, 0x3F70101020000000 %715 = fadd float %712, %714 %716 = fmul float %715, %24 %717 = fsub float %700, %716 %718 = call float @llvm.AMDIL.clamp.(float %717, float 0.000000e+00, float 1.000000e+00) %719 = fcmp olt float %26, %718 br i1 %719, label %IF95, label %ENDIF94 IF95: ; preds = %ENDIF88 %720 = fsub float 1.000000e+00, %718 %721 = call float @llvm.pow.f32(float %720, float %27) %722 = fadd float %temp16.10, %721 br label %ENDIF94 ENDIF94: ; preds = %ENDIF88, %IF95 %temp16.11 = phi float [ %722, %IF95 ], [ %temp16.10, %ENDIF88 ] %723 = fmul float %51, 0x3FA2F783E0000000 %724 = fmul float %53, 0xBFEE0D5C80000000 %725 = fadd float %724, %723 %726 = fmul float %55, 0x3FC16466C0000000 %727 = fadd float %725, %726 %728 = fmul float %727, %51 %729 = fmul float %727, %53 %730 = fmul float %727, %55 %731 = fmul float %728, 2.000000e+00 %732 = fmul float %729, 2.000000e+00 %733 = fmul float %730, 2.000000e+00 %734 = fsub float 0x3FA2F783E0000000, %731 %735 = fsub float 0xBFEE0D5C80000000, %732 %736 = fsub float 0x3FC16466C0000000, %733 %737 = fmul float %80, %734 %738 = fmul float %81, %735 %739 = fadd float %738, %737 %740 = fmul float %82, %736 %741 = fadd float %739, %740 %742 = fcmp olt float %741, 0.000000e+00 %.120 = select i1 %742, float 1.000000e+00, float -1.000000e+00 %743 = fmul float %.120, %734 %744 = fmul float %.120, %735 %745 = fmul float %.120, %736 %746 = fmul float %80, 0x3FD3333340000000 %747 = fsub float %746, %743 %748 = fmul float %81, 0x3FD3333340000000 %749 = fsub float %748, %744 %750 = fmul float %82, 0x3FD3333340000000 %751 = fsub float %750, %745 %752 = fmul float %751, %25 %753 = fsub float %85, %752 %754 = fmul float %747, %87 %755 = fadd float %754, %38 %756 = fmul float %749, %87 %757 = fadd float %756, %39 %758 = bitcast float %755 to i32 %759 = bitcast float %757 to i32 %760 = insertelement <2 x i32> undef, i32 %758, i32 0 %761 = insertelement <2 x i32> %760, i32 %759, i32 1 %762 = bitcast <8 x i32> %29 to <32 x i8> %763 = bitcast <4 x i32> %31 to <16 x i8> %764 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %761, <32 x i8> %762, <16 x i8> %763, i32 2) %765 = extractelement <4 x float> %764, i32 2 %766 = extractelement <4 x float> %764, i32 3 %767 = fmul float %766, 0x3F70101020000000 %768 = fadd float %765, %767 %769 = fmul float %768, %24 %770 = fsub float %753, %769 %771 = call float @llvm.AMDIL.clamp.(float %770, float 0.000000e+00, float 1.000000e+00) %772 = fcmp olt float %26, %771 br i1 %772, label %IF101, label %ENDIF100 IF101: ; preds = %ENDIF94 %773 = fsub float 1.000000e+00, %771 %774 = call float @llvm.pow.f32(float %773, float %27) %775 = fadd float %temp16.11, %774 br label %ENDIF100 ENDIF100: ; preds = %ENDIF94, %IF101 %temp16.12 = phi float [ %775, %IF101 ], [ %temp16.11, %ENDIF94 ] %776 = fmul float %51, 0xBFE659A880000000 %777 = fmul float %53, 0xBFE33600E0000000 %778 = fadd float %777, %776 %779 = fmul float %55, 0xBFA4911660000000 %780 = fadd float %778, %779 %781 = fmul float %780, %51 %782 = fmul float %780, %53 %783 = fmul float %780, %55 %784 = fmul float %781, 2.000000e+00 %785 = fmul float %782, 2.000000e+00 %786 = fmul float %783, 2.000000e+00 %787 = fsub float 0xBFE659A880000000, %784 %788 = fsub float 0xBFE33600E0000000, %785 %789 = fsub float 0xBFA4911660000000, %786 %790 = fmul float %80, %787 %791 = fmul float %81, %788 %792 = fadd float %791, %790 %793 = fmul float %82, %789 %794 = fadd float %792, %793 %795 = fcmp olt float %794, 0.000000e+00 %.121 = select i1 %795, float 1.000000e+00, float -1.000000e+00 %796 = fmul float %.121, %787 %797 = fmul float %.121, %788 %798 = fmul float %.121, %789 %799 = fmul float %80, 0x3FD3333340000000 %800 = fsub float %799, %796 %801 = fmul float %81, 0x3FD3333340000000 %802 = fsub float %801, %797 %803 = fmul float %82, 0x3FD3333340000000 %804 = fsub float %803, %798 %805 = fmul float %804, %25 %806 = fsub float %85, %805 %807 = fmul float %800, %87 %808 = fadd float %807, %38 %809 = fmul float %802, %87 %810 = fadd float %809, %39 %811 = bitcast float %808 to i32 %812 = bitcast float %810 to i32 %813 = insertelement <2 x i32> undef, i32 %811, i32 0 %814 = insertelement <2 x i32> %813, i32 %812, i32 1 %815 = bitcast <8 x i32> %29 to <32 x i8> %816 = bitcast <4 x i32> %31 to <16 x i8> %817 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %814, <32 x i8> %815, <16 x i8> %816, i32 2) %818 = extractelement <4 x float> %817, i32 2 %819 = extractelement <4 x float> %817, i32 3 %820 = fmul float %819, 0x3F70101020000000 %821 = fadd float %818, %820 %822 = fmul float %821, %24 %823 = fsub float %806, %822 %824 = call float @llvm.AMDIL.clamp.(float %823, float 0.000000e+00, float 1.000000e+00) %825 = fcmp olt float %26, %824 br i1 %825, label %IF107, label %ENDIF106 IF107: ; preds = %ENDIF100 %826 = fsub float 1.000000e+00, %824 %827 = call float @llvm.pow.f32(float %826, float %27) %828 = fadd float %temp16.12, %827 br label %ENDIF106 ENDIF106: ; preds = %ENDIF100, %IF107 %temp16.13 = phi float [ %828, %IF107 ], [ %temp16.12, %ENDIF100 ] %829 = fmul float %temp16.13, 0x3FB24924A0000000 %830 = fsub float 1.000000e+00, %829 %831 = call i32 @llvm.SI.packf16(float %830, float %830) %832 = bitcast i32 %831 to float %833 = call i32 @llvm.SI.packf16(float %830, float %830) %834 = bitcast i32 %833 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %832, float %834, float %832, float %834) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[20:23], s[2:3], 0x0 ; C08A0300 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx4 s[24:27], s[4:5], 0x4 ; C08C0504 s_mov_b32 m0, s9 ; BEFC0309 v_mov_b32_e32 v4, 0xbfe38bac ; 7E0802FF BFE38BAC v_mov_b32_e32 v5, 0x40638bac ; 7E0A02FF 40638BAC v_mov_b32_e32 v11, 0x3b808081 ; 7E1602FF 3B808081 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[20:23], 0x2 ; C2021502 s_buffer_load_dword s16, s[20:23], 0xc ; C208150C s_load_dwordx8 s[28:35], s[6:7], 0x8 ; C0CE0708 s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 v_interp_p1_f32 v6, v0, 2, 0, [m0] ; C8180200 v_interp_p2_f32 v6, [v6], v1, 2, 0, [m0] ; C8190201 v_interp_p1_f32 v7, v0, 3, 0, [m0] ; C81C0300 v_interp_p2_f32 v7, [v7], v1, 3, 0, [m0] ; C81D0301 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[6:8], 7, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[28:35], s[24:27] ; F0800700 00C70606 s_buffer_load_dword s6, s[20:23], 0xd ; C203150D s_buffer_load_dword s5, s[20:23], 0xe ; C202950E image_sample v[12:15], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[8:15], s[0:3] ; F0800F00 00020C02 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mad_f32 v0, v5, v12, v4 ; D2820000 04121905 v_mac_f32_e32 v4, v5, v13 ; 3E081B05 v_mad_f32 v1, 0, v14, 1.0 ; D2820001 03CA1C80 v_mad_f32 v5, v11, v15, v14 ; D2820005 043A1F0B v_mul_f32_e32 v9, v0, v0 ; 10120100 v_mac_f32_e32 v9, v4, v4 ; 3E120904 v_mac_f32_e32 v9, v1, v1 ; 3E120301 v_rcp_f32_e32 v12, v9 ; 7E185509 v_mad_f32 v10, 2.0, v6, -1.0 ; D282000A 03CE0CF4 v_mad_f32 v9, 2.0, v7, -1.0 ; D2820009 03CE0EF4 v_mad_f32 v8, 2.0, v8, -1.0 ; D2820008 03CE10F4 v_add_f32_e32 v6, v12, v12 ; 060C190C v_mul_f32_e32 v1, v0, v6 ; 10020D00 v_mul_f32_e32 v0, v4, v6 ; 10000D04 v_mov_b32_e32 v13, 0x3ecd5062 ; 7E1A02FF 3ECD5062 v_mul_f32_e32 v4, v13, v10 ; 1008150D v_mov_b32_e32 v14, 0x3f63d2fc ; 7E1C02FF 3F63D2FC v_mac_f32_e32 v4, v14, v9 ; 3E08130E v_mov_b32_e32 v15, 0xbc8f8152 ; 7E1E02FF BC8F8152 v_mac_f32_e32 v4, v15, v8 ; 3E08110F v_mul_f32_e32 v6, v10, v4 ; 100C090A v_mac_f32_e32 v13, -2.0, v6 ; 3E1A0CF5 v_mul_f32_e32 v6, v9, v4 ; 100C0909 v_mac_f32_e32 v14, -2.0, v6 ; 3E1C0CF5 v_mul_f32_e32 v4, v8, v4 ; 10080908 v_mac_f32_e32 v15, -2.0, v4 ; 3E1E08F5 v_mad_f32 v4, 2.0, v12, -1.0 ; D2820004 03CE18F4 v_mul_f32_e32 v7, v13, v1 ; 100E030D v_mac_f32_e32 v7, v14, v0 ; 3E0E010E v_mul_f32_e32 v6, s4, v5 ; 100C0A04 v_rcp_f32_e32 v12, v6 ; 7E185506 v_mac_f32_e32 v7, v15, v4 ; 3E0E090F v_cmp_gt_f32_e32 vcc, 0, v7 ; 7C080E80 v_cndmask_b32_e64 v16, -1.0, 1.0, vcc ; D2000010 01A9E4F3 v_mul_f32_e32 v7, s16, v12 ; 100E1810 v_mul_f32_e32 v12, v13, v16 ; 1018210D v_mul_f32_e32 v13, v14, v16 ; 101A210E v_mov_b32_e32 v14, 0x3e99999a ; 7E1C02FF 3E99999A v_mad_f32 v12, v1, v14, -v12 ; D282000C 84321D01 v_mad_f32 v13, v0, v14, -v13 ; D282000D 84361D00 v_mad_f32 v17, v7, v12, v2 ; D2820011 040A1907 v_mad_f32 v18, v7, v13, v3 ; D2820012 040E1B07 image_sample v[12:13], 12, 0, 0, 0, 0, 0, 0, 0, v[17:18], s[8:15], s[0:3] ; F0800C00 00020C11 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v11, v11, v13, v12 ; D282000B 04321B0B v_mul_f32_e32 v12, v15, v16 ; 1018210F v_mad_f32 v12, v4, v14, -v12 ; D282000C 84321D04 v_mul_f32_e32 v12, s16, v12 ; 10181810 v_mad_f32 v5, v5, s4, -v12 ; D2820005 84300905 v_mad_f32 v5, -v11, s4, v5 ; D2820005 2414090B v_add_f32_e64 v11, 0, v5 clamp ; D206080B 00020A80 v_mov_b32_e32 v5, 0 ; 7E0A0280 v_cmp_lt_f32_e32 vcc, s6, v11 ; 7C021606 s_and_saveexec_b64 s[18:19], vcc ; BE92246A s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E v_sub_f32_e32 v5, 1.0, v11 ; 080A16F2 v_log_f32_e32 v5, v5 ; 7E0A4F05 v_mul_legacy_f32_e32 v5, s5, v5 ; 0E0A0A05 v_exp_f32_e32 v5, v5 ; 7E0A4B05 s_or_b64 exec, exec, s[18:19] ; 88FE127E v_mov_b32_e32 v11, 0x3e25aaa0 ; 7E1602FF 3E25AAA0 v_mul_f32_e32 v12, v11, v10 ; 1018150B v_mov_b32_e32 v13, 0x3e091156 ; 7E1A02FF 3E091156 v_mac_f32_e32 v12, v13, v9 ; 3E18130D v_mov_b32_e32 v14, 0xbeb4c2c9 ; 7E1C02FF BEB4C2C9 v_mac_f32_e32 v12, v14, v8 ; 3E18110E v_mul_f32_e32 v15, v10, v12 ; 101E190A v_mac_f32_e32 v11, -2.0, v15 ; 3E161EF5 v_mul_f32_e32 v15, v9, v12 ; 101E1909 v_mul_f32_e32 v12, v8, v12 ; 10181908 v_mac_f32_e32 v13, -2.0, v15 ; 3E1A1EF5 v_mac_f32_e32 v14, -2.0, v12 ; 3E1C18F5 v_mul_f32_e32 v12, v11, v1 ; 1018030B v_mac_f32_e32 v12, v13, v0 ; 3E18010D v_mac_f32_e32 v12, v14, v4 ; 3E18090E v_cmp_gt_f32_e32 vcc, 0, v12 ; 7C081880 v_cndmask_b32_e64 v12, -1.0, 1.0, vcc ; D200000C 01A9E4F3 v_mul_f32_e32 v11, v11, v12 ; 1016190B v_mul_f32_e32 v13, v13, v12 ; 101A190D v_mul_f32_e32 v12, v14, v12 ; 1018190E v_mov_b32_e32 v14, 0x3e99999a ; 7E1C02FF 3E99999A v_mad_f32 v11, v1, v14, -v11 ; D282000B 842E1D01 v_mad_f32 v13, v0, v14, -v13 ; D282000D 84361D00 v_mad_f32 v12, v4, v14, -v12 ; D282000C 84321D04 v_mad_f32 v14, v7, v11, v2 ; D282000E 040A1707 v_mad_f32 v15, v7, v13, v3 ; D282000F 040E1B07 v_mad_f32 v11, -v12, s16, v6 ; D282000B 2418210C image_sample v[12:13], 12, 0, 0, 0, 0, 0, 0, 0, v[14:15], s[8:15], s[0:3] ; F0800C00 00020C0E s_waitcnt vmcnt(0) ; BF8C0770 v_madmk_f32_e32 v12, v13, v12, 0x3b808081 ; 4018190D 3B808081 v_mad_f32 v11, -v12, s4, v11 ; D282000B 242C090C v_add_f32_e64 v11, 0, v11 clamp ; D206080B 00021680 v_cmp_lt_f32_e32 vcc, s6, v11 ; 7C021606 s_and_saveexec_b64 s[18:19], vcc ; BE92246A s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E v_sub_f32_e32 v11, 1.0, v11 ; 081616F2 v_log_f32_e32 v11, v11 ; 7E164F0B v_mul_legacy_f32_e32 v11, s5, v11 ; 0E161605 v_exp_f32_e32 v11, v11 ; 7E164B0B v_add_f32_e32 v5, v11, v5 ; 060A0B0B s_or_b64 exec, exec, s[18:19] ; 88FE127E v_mov_b32_e32 v11, 0xbe6c0ff4 ; 7E1602FF BE6C0FF4 v_mul_f32_e32 v12, v11, v10 ; 1018150B v_mov_b32_e32 v13, 0xbe429197 ; 7E1A02FF BE429197 v_mac_f32_e32 v12, v13, v9 ; 3E18130D v_mov_b32_e32 v14, 0x3f00a66f ; 7E1C02FF 3F00A66F v_mac_f32_e32 v12, v14, v8 ; 3E18110E v_mul_f32_e32 v15, v10, v12 ; 101E190A v_mac_f32_e32 v11, -2.0, v15 ; 3E161EF5 v_mul_f32_e32 v15, v9, v12 ; 101E1909 v_mul_f32_e32 v12, v8, v12 ; 10181908 v_mac_f32_e32 v13, -2.0, v15 ; 3E1A1EF5 v_mac_f32_e32 v14, -2.0, v12 ; 3E1C18F5 v_mul_f32_e32 v12, v11, v1 ; 1018030B v_mac_f32_e32 v12, v13, v0 ; 3E18010D v_mac_f32_e32 v12, v14, v4 ; 3E18090E v_cmp_gt_f32_e32 vcc, 0, v12 ; 7C081880 v_cndmask_b32_e64 v12, -1.0, 1.0, vcc ; D200000C 01A9E4F3 v_mul_f32_e32 v11, v11, v12 ; 1016190B v_mul_f32_e32 v13, v13, v12 ; 101A190D v_mul_f32_e32 v12, v14, v12 ; 1018190E v_mov_b32_e32 v14, 0x3e99999a ; 7E1C02FF 3E99999A v_mad_f32 v11, v1, v14, -v11 ; D282000B 842E1D01 v_mad_f32 v13, v0, v14, -v13 ; D282000D 84361D00 v_mad_f32 v12, v4, v14, -v12 ; D282000C 84321D04 v_mad_f32 v14, v7, v11, v2 ; D282000E 040A1707 v_mad_f32 v15, v7, v13, v3 ; D282000F 040E1B07 v_mad_f32 v11, -v12, s16, v6 ; D282000B 2418210C image_sample v[12:13], 12, 0, 0, 0, 0, 0, 0, 0, v[14:15], s[8:15], s[0:3] ; F0800C00 00020C0E s_waitcnt vmcnt(0) ; BF8C0770 v_madmk_f32_e32 v12, v13, v12, 0x3b808081 ; 4018190D 3B808081 v_mad_f32 v11, -v12, s4, v11 ; D282000B 242C090C v_add_f32_e64 v11, 0, v11 clamp ; D206080B 00021680 v_cmp_lt_f32_e32 vcc, s6, v11 ; 7C021606 s_and_saveexec_b64 s[18:19], vcc ; BE92246A s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E v_sub_f32_e32 v11, 1.0, v11 ; 081616F2 v_log_f32_e32 v11, v11 ; 7E164F0B v_mul_legacy_f32_e32 v11, s5, v11 ; 0E161605 v_exp_f32_e32 v11, v11 ; 7E164B0B v_add_f32_e32 v5, v11, v5 ; 060A0B0B s_or_b64 exec, exec, s[18:19] ; 88FE127E v_mov_b32_e32 v11, 0xbf202bce ; 7E1602FF BF202BCE v_mul_f32_e32 v12, v11, v10 ; 1018150B v_mov_b32_e32 v13, 0x3dfe4acc ; 7E1A02FF 3DFE4ACC v_mac_f32_e32 v12, v13, v9 ; 3E18130D v_mov_b32_e32 v14, 0x3dee5f8f ; 7E1C02FF 3DEE5F8F v_mac_f32_e32 v12, v14, v8 ; 3E18110E v_mul_f32_e32 v15, v10, v12 ; 101E190A v_mac_f32_e32 v11, -2.0, v15 ; 3E161EF5 v_mul_f32_e32 v15, v9, v12 ; 101E1909 v_mul_f32_e32 v12, v8, v12 ; 10181908 v_mac_f32_e32 v13, -2.0, v15 ; 3E1A1EF5 v_mac_f32_e32 v14, -2.0, v12 ; 3E1C18F5 v_mul_f32_e32 v12, v11, v1 ; 1018030B v_mac_f32_e32 v12, v13, v0 ; 3E18010D v_mac_f32_e32 v12, v14, v4 ; 3E18090E v_cmp_gt_f32_e32 vcc, 0, v12 ; 7C081880 v_cndmask_b32_e64 v12, -1.0, 1.0, vcc ; D200000C 01A9E4F3 v_mul_f32_e32 v11, v11, v12 ; 1016190B v_mul_f32_e32 v13, v13, v12 ; 101A190D v_mul_f32_e32 v12, v14, v12 ; 1018190E v_mov_b32_e32 v14, 0x3e99999a ; 7E1C02FF 3E99999A v_mad_f32 v11, v1, v14, -v11 ; D282000B 842E1D01 v_mad_f32 v13, v0, v14, -v13 ; D282000D 84361D00 v_mad_f32 v12, v4, v14, -v12 ; D282000C 84321D04 v_mad_f32 v14, v7, v11, v2 ; D282000E 040A1707 v_mad_f32 v15, v7, v13, v3 ; D282000F 040E1B07 v_mad_f32 v11, -v12, s16, v6 ; D282000B 2418210C image_sample v[12:13], 12, 0, 0, 0, 0, 0, 0, 0, v[14:15], s[8:15], s[0:3] ; F0800C00 00020C0E s_waitcnt vmcnt(0) ; BF8C0770 v_madmk_f32_e32 v12, v13, v12, 0x3b808081 ; 4018190D 3B808081 v_mad_f32 v11, -v12, s4, v11 ; D282000B 242C090C v_add_f32_e64 v11, 0, v11 clamp ; D206080B 00021680 v_cmp_lt_f32_e32 vcc, s6, v11 ; 7C021606 s_and_saveexec_b64 s[18:19], vcc ; BE92246A s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E v_sub_f32_e32 v11, 1.0, v11 ; 081616F2 v_log_f32_e32 v11, v11 ; 7E164F0B v_mul_legacy_f32_e32 v11, s5, v11 ; 0E161605 v_exp_f32_e32 v11, v11 ; 7E164B0B v_add_f32_e32 v5, v11, v5 ; 060A0B0B s_or_b64 exec, exec, s[18:19] ; 88FE127E v_mov_b32_e32 v11, 0x3ec39fce ; 7E1602FF 3EC39FCE v_mul_f32_e32 v12, v11, v10 ; 1018150B v_mov_b32_e32 v13, 0xbea5f5a7 ; 7E1A02FF BEA5F5A7 v_mac_f32_e32 v12, v13, v9 ; 3E18130D v_mov_b32_e32 v14, 0x3ed2939f ; 7E1C02FF 3ED2939F v_mac_f32_e32 v12, v14, v8 ; 3E18110E v_mul_f32_e32 v15, v10, v12 ; 101E190A v_mac_f32_e32 v11, -2.0, v15 ; 3E161EF5 v_mul_f32_e32 v15, v9, v12 ; 101E1909 v_mul_f32_e32 v12, v8, v12 ; 10181908 v_mac_f32_e32 v13, -2.0, v15 ; 3E1A1EF5 v_mac_f32_e32 v14, -2.0, v12 ; 3E1C18F5 v_mul_f32_e32 v12, v11, v1 ; 1018030B v_mac_f32_e32 v12, v13, v0 ; 3E18010D v_mac_f32_e32 v12, v14, v4 ; 3E18090E v_cmp_gt_f32_e32 vcc, 0, v12 ; 7C081880 v_cndmask_b32_e64 v12, -1.0, 1.0, vcc ; D200000C 01A9E4F3 v_mul_f32_e32 v11, v11, v12 ; 1016190B v_mul_f32_e32 v13, v13, v12 ; 101A190D v_mul_f32_e32 v12, v14, v12 ; 1018190E v_mov_b32_e32 v14, 0x3e99999a ; 7E1C02FF 3E99999A v_mad_f32 v11, v1, v14, -v11 ; D282000B 842E1D01 v_mad_f32 v13, v0, v14, -v13 ; D282000D 84361D00 v_mad_f32 v12, v4, v14, -v12 ; D282000C 84321D04 v_mad_f32 v14, v7, v11, v2 ; D282000E 040A1707 v_mad_f32 v15, v7, v13, v3 ; D282000F 040E1B07 v_mad_f32 v11, -v12, s16, v6 ; D282000B 2418210C image_sample v[12:13], 12, 0, 0, 0, 0, 0, 0, 0, v[14:15], s[8:15], s[0:3] ; F0800C00 00020C0E s_waitcnt vmcnt(0) ; BF8C0770 v_madmk_f32_e32 v12, v13, v12, 0x3b808081 ; 4018190D 3B808081 v_mad_f32 v11, -v12, s4, v11 ; D282000B 242C090C v_add_f32_e64 v11, 0, v11 clamp ; D206080B 00021680 v_cmp_lt_f32_e32 vcc, s6, v11 ; 7C021606 s_and_saveexec_b64 s[18:19], vcc ; BE92246A s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E v_sub_f32_e32 v11, 1.0, v11 ; 081616F2 v_log_f32_e32 v11, v11 ; 7E164F0B v_mul_legacy_f32_e32 v11, s5, v11 ; 0E161605 v_exp_f32_e32 v11, v11 ; 7E164B0B v_add_f32_e32 v5, v11, v5 ; 060A0B0B s_or_b64 exec, exec, s[18:19] ; 88FE127E v_mov_b32_e32 v11, 0xbdb4d4d0 ; 7E1602FF BDB4D4D0 v_mul_f32_e32 v12, v11, v10 ; 1018150B v_mov_b32_e32 v13, 0x3e28ef71 ; 7E1A02FF 3E28EF71 v_mac_f32_e32 v12, v13, v9 ; 3E18130D v_mov_b32_e32 v14, 0x3e0ef021 ; 7E1C02FF 3E0EF021 v_mac_f32_e32 v12, v14, v8 ; 3E18110E v_mul_f32_e32 v15, v10, v12 ; 101E190A v_mac_f32_e32 v11, -2.0, v15 ; 3E161EF5 v_mul_f32_e32 v15, v9, v12 ; 101E1909 v_mul_f32_e32 v12, v8, v12 ; 10181908 v_mac_f32_e32 v13, -2.0, v15 ; 3E1A1EF5 v_mac_f32_e32 v14, -2.0, v12 ; 3E1C18F5 v_mul_f32_e32 v12, v11, v1 ; 1018030B v_mac_f32_e32 v12, v13, v0 ; 3E18010D v_mac_f32_e32 v12, v14, v4 ; 3E18090E v_cmp_gt_f32_e32 vcc, 0, v12 ; 7C081880 v_cndmask_b32_e64 v12, -1.0, 1.0, vcc ; D200000C 01A9E4F3 v_mul_f32_e32 v11, v11, v12 ; 1016190B v_mul_f32_e32 v13, v13, v12 ; 101A190D v_mul_f32_e32 v12, v14, v12 ; 1018190E v_mov_b32_e32 v14, 0x3e99999a ; 7E1C02FF 3E99999A v_mad_f32 v11, v1, v14, -v11 ; D282000B 842E1D01 v_mad_f32 v13, v0, v14, -v13 ; D282000D 84361D00 v_mad_f32 v12, v4, v14, -v12 ; D282000C 84321D04 v_mad_f32 v14, v7, v11, v2 ; D282000E 040A1707 v_mad_f32 v15, v7, v13, v3 ; D282000F 040E1B07 v_mad_f32 v11, -v12, s16, v6 ; D282000B 2418210C image_sample v[12:13], 12, 0, 0, 0, 0, 0, 0, 0, v[14:15], s[8:15], s[0:3] ; F0800C00 00020C0E s_waitcnt vmcnt(0) ; BF8C0770 v_madmk_f32_e32 v12, v13, v12, 0x3b808081 ; 4018190D 3B808081 v_mad_f32 v11, -v12, s4, v11 ; D282000B 242C090C v_add_f32_e64 v11, 0, v11 clamp ; D206080B 00021680 v_cmp_lt_f32_e32 vcc, s6, v11 ; 7C021606 s_and_saveexec_b64 s[18:19], vcc ; BE92246A s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E v_sub_f32_e32 v11, 1.0, v11 ; 081616F2 v_log_f32_e32 v11, v11 ; 7E164F0B v_mul_legacy_f32_e32 v11, s5, v11 ; 0E161605 v_exp_f32_e32 v11, v11 ; 7E164B0B v_add_f32_e32 v5, v11, v5 ; 060A0B0B s_or_b64 exec, exec, s[18:19] ; 88FE127E v_mov_b32_e32 v11, 0x3e41b52d ; 7E1602FF 3E41B52D v_mul_f32_e32 v12, v11, v10 ; 1018150B v_mov_b32_e32 v13, 0xbe0374de ; 7E1A02FF BE0374DE v_mac_f32_e32 v12, v13, v9 ; 3E18130D v_mov_b32_e32 v14, 0xbdca35e0 ; 7E1C02FF BDCA35E0 v_mac_f32_e32 v12, v14, v8 ; 3E18110E v_mul_f32_e32 v15, v10, v12 ; 101E190A v_mac_f32_e32 v11, -2.0, v15 ; 3E161EF5 v_mul_f32_e32 v15, v9, v12 ; 101E1909 v_mul_f32_e32 v12, v8, v12 ; 10181908 v_mac_f32_e32 v13, -2.0, v15 ; 3E1A1EF5 v_mac_f32_e32 v14, -2.0, v12 ; 3E1C18F5 v_mul_f32_e32 v12, v11, v1 ; 1018030B v_mac_f32_e32 v12, v13, v0 ; 3E18010D v_mac_f32_e32 v12, v14, v4 ; 3E18090E v_cmp_gt_f32_e32 vcc, 0, v12 ; 7C081880 v_cndmask_b32_e64 v12, -1.0, 1.0, vcc ; D200000C 01A9E4F3 v_mul_f32_e32 v11, v11, v12 ; 1016190B v_mul_f32_e32 v13, v13, v12 ; 101A190D v_mul_f32_e32 v12, v14, v12 ; 1018190E v_mov_b32_e32 v14, 0x3e99999a ; 7E1C02FF 3E99999A v_mad_f32 v11, v1, v14, -v11 ; D282000B 842E1D01 v_mad_f32 v13, v0, v14, -v13 ; D282000D 84361D00 v_mad_f32 v12, v4, v14, -v12 ; D282000C 84321D04 v_mad_f32 v14, v7, v11, v2 ; D282000E 040A1707 v_mad_f32 v15, v7, v13, v3 ; D282000F 040E1B07 v_mad_f32 v11, -v12, s16, v6 ; D282000B 2418210C image_sample v[12:13], 12, 0, 0, 0, 0, 0, 0, 0, v[14:15], s[8:15], s[0:3] ; F0800C00 00020C0E s_waitcnt vmcnt(0) ; BF8C0770 v_madmk_f32_e32 v12, v13, v12, 0x3b808081 ; 4018190D 3B808081 v_mad_f32 v11, -v12, s4, v11 ; D282000B 242C090C v_add_f32_e64 v11, 0, v11 clamp ; D206080B 00021680 v_cmp_lt_f32_e32 vcc, s6, v11 ; 7C021606 s_and_saveexec_b64 s[18:19], vcc ; BE92246A s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E v_sub_f32_e32 v11, 1.0, v11 ; 081616F2 v_log_f32_e32 v11, v11 ; 7E164F0B v_mul_legacy_f32_e32 v11, s5, v11 ; 0E161605 v_exp_f32_e32 v11, v11 ; 7E164B0B v_add_f32_e32 v5, v11, v5 ; 060A0B0B s_or_b64 exec, exec, s[18:19] ; 88FE127E v_mov_b32_e32 v11, 0x3e4b6185 ; 7E1602FF 3E4B6185 v_mul_f32_e32 v12, v11, v10 ; 1018150B v_mov_b32_e32 v13, 0x3e34f71c ; 7E1A02FF 3E34F71C v_mac_f32_e32 v12, v13, v9 ; 3E18130D v_mov_b32_e32 v14, 0x3ee047f9 ; 7E1C02FF 3EE047F9 v_mac_f32_e32 v12, v14, v8 ; 3E18110E v_mul_f32_e32 v15, v10, v12 ; 101E190A v_mac_f32_e32 v11, -2.0, v15 ; 3E161EF5 v_mul_f32_e32 v15, v9, v12 ; 101E1909 v_mul_f32_e32 v12, v8, v12 ; 10181908 v_mac_f32_e32 v13, -2.0, v15 ; 3E1A1EF5 v_mac_f32_e32 v14, -2.0, v12 ; 3E1C18F5 v_mul_f32_e32 v12, v11, v1 ; 1018030B v_mac_f32_e32 v12, v13, v0 ; 3E18010D v_mac_f32_e32 v12, v14, v4 ; 3E18090E v_cmp_gt_f32_e32 vcc, 0, v12 ; 7C081880 v_cndmask_b32_e64 v12, -1.0, 1.0, vcc ; D200000C 01A9E4F3 v_mul_f32_e32 v11, v11, v12 ; 1016190B v_mul_f32_e32 v13, v13, v12 ; 101A190D v_mul_f32_e32 v12, v14, v12 ; 1018190E v_mov_b32_e32 v14, 0x3e99999a ; 7E1C02FF 3E99999A v_mad_f32 v11, v1, v14, -v11 ; D282000B 842E1D01 v_mad_f32 v13, v0, v14, -v13 ; D282000D 84361D00 v_mad_f32 v12, v4, v14, -v12 ; D282000C 84321D04 v_mad_f32 v14, v7, v11, v2 ; D282000E 040A1707 v_mad_f32 v15, v7, v13, v3 ; D282000F 040E1B07 v_mad_f32 v11, -v12, s16, v6 ; D282000B 2418210C image_sample v[12:13], 12, 0, 0, 0, 0, 0, 0, 0, v[14:15], s[8:15], s[0:3] ; F0800C00 00020C0E s_waitcnt vmcnt(0) ; BF8C0770 v_madmk_f32_e32 v12, v13, v12, 0x3b808081 ; 4018190D 3B808081 v_mad_f32 v11, -v12, s4, v11 ; D282000B 242C090C v_add_f32_e64 v11, 0, v11 clamp ; D206080B 00021680 v_cmp_lt_f32_e32 vcc, s6, v11 ; 7C021606 s_and_saveexec_b64 s[18:19], vcc ; BE92246A s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E v_sub_f32_e32 v11, 1.0, v11 ; 081616F2 v_log_f32_e32 v11, v11 ; 7E164F0B v_mul_legacy_f32_e32 v11, s5, v11 ; 0E161605 v_exp_f32_e32 v11, v11 ; 7E164B0B v_add_f32_e32 v5, v11, v5 ; 060A0B0B s_or_b64 exec, exec, s[18:19] ; 88FE127E v_mov_b32_e32 v11, 0xbea8b3c7 ; 7E1602FF BEA8B3C7 v_mul_f32_e32 v12, v11, v10 ; 1018150B v_mov_b32_e32 v13, 0x3cdbe6b6 ; 7E1A02FF 3CDBE6B6 v_mac_f32_e32 v12, v13, v9 ; 3E18130D v_mov_b32_e32 v14, 0xbecdeb02 ; 7E1C02FF BECDEB02 v_mac_f32_e32 v12, v14, v8 ; 3E18110E v_mul_f32_e32 v15, v10, v12 ; 101E190A v_mac_f32_e32 v11, -2.0, v15 ; 3E161EF5 v_mul_f32_e32 v15, v9, v12 ; 101E1909 v_mul_f32_e32 v12, v8, v12 ; 10181908 v_mac_f32_e32 v13, -2.0, v15 ; 3E1A1EF5 v_mac_f32_e32 v14, -2.0, v12 ; 3E1C18F5 v_mul_f32_e32 v12, v11, v1 ; 1018030B v_mac_f32_e32 v12, v13, v0 ; 3E18010D v_mac_f32_e32 v12, v14, v4 ; 3E18090E v_cmp_gt_f32_e32 vcc, 0, v12 ; 7C081880 v_cndmask_b32_e64 v12, -1.0, 1.0, vcc ; D200000C 01A9E4F3 v_mul_f32_e32 v11, v11, v12 ; 1016190B v_mul_f32_e32 v13, v13, v12 ; 101A190D v_mul_f32_e32 v12, v14, v12 ; 1018190E v_mov_b32_e32 v14, 0x3e99999a ; 7E1C02FF 3E99999A v_mad_f32 v11, v1, v14, -v11 ; D282000B 842E1D01 v_mad_f32 v13, v0, v14, -v13 ; D282000D 84361D00 v_mad_f32 v12, v4, v14, -v12 ; D282000C 84321D04 v_mad_f32 v14, v7, v11, v2 ; D282000E 040A1707 v_mad_f32 v15, v7, v13, v3 ; D282000F 040E1B07 v_mad_f32 v11, -v12, s16, v6 ; D282000B 2418210C image_sample v[12:13], 12, 0, 0, 0, 0, 0, 0, 0, v[14:15], s[8:15], s[0:3] ; F0800C00 00020C0E s_waitcnt vmcnt(0) ; BF8C0770 v_madmk_f32_e32 v12, v13, v12, 0x3b808081 ; 4018190D 3B808081 v_mad_f32 v11, -v12, s4, v11 ; D282000B 242C090C v_add_f32_e64 v11, 0, v11 clamp ; D206080B 00021680 v_cmp_lt_f32_e32 vcc, s6, v11 ; 7C021606 s_and_saveexec_b64 s[18:19], vcc ; BE92246A s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E v_sub_f32_e32 v11, 1.0, v11 ; 081616F2 v_log_f32_e32 v11, v11 ; 7E164F0B v_mul_legacy_f32_e32 v11, s5, v11 ; 0E161605 v_exp_f32_e32 v11, v11 ; 7E164B0B v_add_f32_e32 v5, v11, v5 ; 060A0B0B s_or_b64 exec, exec, s[18:19] ; 88FE127E v_mov_b32_e32 v11, 0xbca046d7 ; 7E1602FF BCA046D7 v_mul_f32_e32 v12, v11, v10 ; 1018150B v_mov_b32_e32 v13, 0xbe9f21fe ; 7E1A02FF BE9F21FE v_mac_f32_e32 v12, v13, v9 ; 3E18130D v_mov_b32_e32 v14, 0xbed2426c ; 7E1C02FF BED2426C v_mac_f32_e32 v12, v14, v8 ; 3E18110E v_mul_f32_e32 v15, v10, v12 ; 101E190A v_mac_f32_e32 v11, -2.0, v15 ; 3E161EF5 v_mul_f32_e32 v15, v9, v12 ; 101E1909 v_mul_f32_e32 v12, v8, v12 ; 10181908 v_mac_f32_e32 v13, -2.0, v15 ; 3E1A1EF5 v_mac_f32_e32 v14, -2.0, v12 ; 3E1C18F5 v_mul_f32_e32 v12, v11, v1 ; 1018030B v_mac_f32_e32 v12, v13, v0 ; 3E18010D v_mac_f32_e32 v12, v14, v4 ; 3E18090E v_cmp_gt_f32_e32 vcc, 0, v12 ; 7C081880 v_cndmask_b32_e64 v12, -1.0, 1.0, vcc ; D200000C 01A9E4F3 v_mul_f32_e32 v11, v11, v12 ; 1016190B v_mul_f32_e32 v13, v13, v12 ; 101A190D v_mul_f32_e32 v12, v14, v12 ; 1018190E v_mov_b32_e32 v14, 0x3e99999a ; 7E1C02FF 3E99999A v_mad_f32 v11, v1, v14, -v11 ; D282000B 842E1D01 v_mad_f32 v13, v0, v14, -v13 ; D282000D 84361D00 v_mad_f32 v12, v4, v14, -v12 ; D282000C 84321D04 v_mad_f32 v14, v7, v11, v2 ; D282000E 040A1707 v_mad_f32 v15, v7, v13, v3 ; D282000F 040E1B07 v_mad_f32 v11, -v12, s16, v6 ; D282000B 2418210C image_sample v[12:13], 12, 0, 0, 0, 0, 0, 0, 0, v[14:15], s[8:15], s[0:3] ; F0800C00 00020C0E s_waitcnt vmcnt(0) ; BF8C0770 v_madmk_f32_e32 v12, v13, v12, 0x3b808081 ; 4018190D 3B808081 v_mad_f32 v11, -v12, s4, v11 ; D282000B 242C090C v_add_f32_e64 v11, 0, v11 clamp ; D206080B 00021680 v_cmp_lt_f32_e32 vcc, s6, v11 ; 7C021606 s_and_saveexec_b64 s[18:19], vcc ; BE92246A s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E v_sub_f32_e32 v11, 1.0, v11 ; 081616F2 v_log_f32_e32 v11, v11 ; 7E164F0B v_mul_legacy_f32_e32 v11, s5, v11 ; 0E161605 v_exp_f32_e32 v11, v11 ; 7E164B0B v_add_f32_e32 v5, v11, v5 ; 060A0B0B s_or_b64 exec, exec, s[18:19] ; 88FE127E v_mov_b32_e32 v11, 0xbea4a230 ; 7E1602FF BEA4A230 v_mul_f32_e32 v12, v11, v10 ; 1018150B v_mov_b32_e32 v13, 0x3f2ee683 ; 7E1A02FF 3F2EE683 v_mac_f32_e32 v12, v13, v9 ; 3E18130D v_mov_b32_e32 v14, 0xbeafcadd ; 7E1C02FF BEAFCADD v_mac_f32_e32 v12, v14, v8 ; 3E18110E v_mul_f32_e32 v15, v10, v12 ; 101E190A v_mac_f32_e32 v11, -2.0, v15 ; 3E161EF5 v_mul_f32_e32 v15, v9, v12 ; 101E1909 v_mul_f32_e32 v12, v8, v12 ; 10181908 v_mac_f32_e32 v13, -2.0, v15 ; 3E1A1EF5 v_mac_f32_e32 v14, -2.0, v12 ; 3E1C18F5 v_mul_f32_e32 v12, v11, v1 ; 1018030B v_mac_f32_e32 v12, v13, v0 ; 3E18010D v_mac_f32_e32 v12, v14, v4 ; 3E18090E v_cmp_gt_f32_e32 vcc, 0, v12 ; 7C081880 v_cndmask_b32_e64 v12, -1.0, 1.0, vcc ; D200000C 01A9E4F3 v_mul_f32_e32 v11, v11, v12 ; 1016190B v_mul_f32_e32 v13, v13, v12 ; 101A190D v_mul_f32_e32 v12, v14, v12 ; 1018190E v_mov_b32_e32 v14, 0x3e99999a ; 7E1C02FF 3E99999A v_mad_f32 v11, v1, v14, -v11 ; D282000B 842E1D01 v_mad_f32 v13, v0, v14, -v13 ; D282000D 84361D00 v_mad_f32 v12, v4, v14, -v12 ; D282000C 84321D04 v_mad_f32 v14, v7, v11, v2 ; D282000E 040A1707 v_mad_f32 v15, v7, v13, v3 ; D282000F 040E1B07 v_mad_f32 v11, -v12, s16, v6 ; D282000B 2418210C image_sample v[12:13], 12, 0, 0, 0, 0, 0, 0, 0, v[14:15], s[8:15], s[0:3] ; F0800C00 00020C0E s_waitcnt vmcnt(0) ; BF8C0770 v_madmk_f32_e32 v12, v13, v12, 0x3b808081 ; 4018190D 3B808081 v_mad_f32 v11, -v12, s4, v11 ; D282000B 242C090C v_add_f32_e64 v11, 0, v11 clamp ; D206080B 00021680 v_cmp_lt_f32_e32 vcc, s6, v11 ; 7C021606 s_and_saveexec_b64 s[18:19], vcc ; BE92246A s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E v_sub_f32_e32 v11, 1.0, v11 ; 081616F2 v_log_f32_e32 v11, v11 ; 7E164F0B v_mul_legacy_f32_e32 v11, s5, v11 ; 0E161605 v_exp_f32_e32 v11, v11 ; 7E164B0B v_add_f32_e32 v5, v11, v5 ; 060A0B0B s_or_b64 exec, exec, s[18:19] ; 88FE127E v_mov_b32_e32 v11, 0x3f33de6a ; 7E1602FF 3F33DE6A v_mul_f32_e32 v12, v11, v10 ; 1018150B v_mov_b32_e32 v13, 0x3e28c7dc ; 7E1A02FF 3E28C7DC v_mac_f32_e32 v12, v13, v9 ; 3E18130D v_mov_b32_e32 v14, 0x3cb85f07 ; 7E1C02FF 3CB85F07 v_mac_f32_e32 v12, v14, v8 ; 3E18110E v_mul_f32_e32 v15, v10, v12 ; 101E190A v_mac_f32_e32 v11, -2.0, v15 ; 3E161EF5 v_mul_f32_e32 v15, v9, v12 ; 101E1909 v_mul_f32_e32 v12, v8, v12 ; 10181908 v_mac_f32_e32 v13, -2.0, v15 ; 3E1A1EF5 v_mac_f32_e32 v14, -2.0, v12 ; 3E1C18F5 v_mul_f32_e32 v12, v11, v1 ; 1018030B v_mac_f32_e32 v12, v13, v0 ; 3E18010D v_mac_f32_e32 v12, v14, v4 ; 3E18090E v_cmp_gt_f32_e32 vcc, 0, v12 ; 7C081880 v_cndmask_b32_e64 v12, -1.0, 1.0, vcc ; D200000C 01A9E4F3 v_mul_f32_e32 v11, v11, v12 ; 1016190B v_mul_f32_e32 v13, v13, v12 ; 101A190D v_mul_f32_e32 v12, v14, v12 ; 1018190E v_mov_b32_e32 v14, 0x3e99999a ; 7E1C02FF 3E99999A v_mad_f32 v11, v1, v14, -v11 ; D282000B 842E1D01 v_mad_f32 v13, v0, v14, -v13 ; D282000D 84361D00 v_mad_f32 v12, v4, v14, -v12 ; D282000C 84321D04 v_mad_f32 v14, v7, v11, v2 ; D282000E 040A1707 v_mad_f32 v15, v7, v13, v3 ; D282000F 040E1B07 v_mad_f32 v11, -v12, s16, v6 ; D282000B 2418210C image_sample v[12:13], 12, 0, 0, 0, 0, 0, 0, 0, v[14:15], s[8:15], s[0:3] ; F0800C00 00020C0E s_waitcnt vmcnt(0) ; BF8C0770 v_madmk_f32_e32 v12, v13, v12, 0x3b808081 ; 4018190D 3B808081 v_mad_f32 v11, -v12, s4, v11 ; D282000B 242C090C v_add_f32_e64 v11, 0, v11 clamp ; D206080B 00021680 v_cmp_lt_f32_e32 vcc, s6, v11 ; 7C021606 s_and_saveexec_b64 s[18:19], vcc ; BE92246A s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E v_sub_f32_e32 v11, 1.0, v11 ; 081616F2 v_log_f32_e32 v11, v11 ; 7E164F0B v_mul_legacy_f32_e32 v11, s5, v11 ; 0E161605 v_exp_f32_e32 v11, v11 ; 7E164B0B v_add_f32_e32 v5, v11, v5 ; 060A0B0B s_or_b64 exec, exec, s[18:19] ; 88FE127E v_mov_b32_e32 v11, 0x3d17bc1f ; 7E1602FF 3D17BC1F v_mul_f32_e32 v12, v11, v10 ; 1018150B v_mov_b32_e32 v13, 0xbf706ae4 ; 7E1A02FF BF706AE4 v_mac_f32_e32 v12, v13, v9 ; 3E18130D v_mov_b32_e32 v14, 0x3e0b2336 ; 7E1C02FF 3E0B2336 v_mac_f32_e32 v12, v14, v8 ; 3E18110E v_mul_f32_e32 v15, v10, v12 ; 101E190A v_mac_f32_e32 v11, -2.0, v15 ; 3E161EF5 v_mul_f32_e32 v15, v9, v12 ; 101E1909 v_mul_f32_e32 v12, v8, v12 ; 10181908 v_mac_f32_e32 v13, -2.0, v15 ; 3E1A1EF5 v_mac_f32_e32 v14, -2.0, v12 ; 3E1C18F5 v_mul_f32_e32 v12, v11, v1 ; 1018030B v_mac_f32_e32 v12, v13, v0 ; 3E18010D v_mac_f32_e32 v12, v14, v4 ; 3E18090E v_cmp_gt_f32_e32 vcc, 0, v12 ; 7C081880 v_cndmask_b32_e64 v12, -1.0, 1.0, vcc ; D200000C 01A9E4F3 v_mul_f32_e32 v11, v11, v12 ; 1016190B v_mul_f32_e32 v13, v13, v12 ; 101A190D v_mul_f32_e32 v12, v14, v12 ; 1018190E v_mov_b32_e32 v14, 0x3e99999a ; 7E1C02FF 3E99999A v_mad_f32 v11, v1, v14, -v11 ; D282000B 842E1D01 v_mad_f32 v13, v0, v14, -v13 ; D282000D 84361D00 v_mad_f32 v12, v4, v14, -v12 ; D282000C 84321D04 v_mad_f32 v14, v7, v11, v2 ; D282000E 040A1707 v_mad_f32 v15, v7, v13, v3 ; D282000F 040E1B07 v_mad_f32 v11, -v12, s16, v6 ; D282000B 2418210C image_sample v[12:13], 12, 0, 0, 0, 0, 0, 0, 0, v[14:15], s[8:15], s[0:3] ; F0800C00 00020C0E s_waitcnt vmcnt(0) ; BF8C0770 v_madmk_f32_e32 v12, v13, v12, 0x3b808081 ; 4018190D 3B808081 v_mad_f32 v11, -v12, s4, v11 ; D282000B 242C090C v_add_f32_e64 v11, 0, v11 clamp ; D206080B 00021680 v_cmp_lt_f32_e32 vcc, s6, v11 ; 7C021606 s_and_saveexec_b64 s[18:19], vcc ; BE92246A s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E v_sub_f32_e32 v11, 1.0, v11 ; 081616F2 v_log_f32_e32 v11, v11 ; 7E164F0B v_mul_legacy_f32_e32 v11, s5, v11 ; 0E161605 v_exp_f32_e32 v11, v11 ; 7E164B0B v_add_f32_e32 v5, v11, v5 ; 060A0B0B s_or_b64 exec, exec, s[18:19] ; 88FE127E v_mov_b32_e32 v11, 0xbf32cd44 ; 7E1602FF BF32CD44 v_mul_f32_e32 v12, v11, v10 ; 1018150B v_mov_b32_e32 v13, 0xbf19b007 ; 7E1A02FF BF19B007 v_mac_f32_e32 v12, v13, v9 ; 3E18130D v_mov_b32_e32 v14, 0xbd2488b3 ; 7E1C02FF BD2488B3 v_mac_f32_e32 v12, v14, v8 ; 3E18110E v_mul_f32_e32 v10, v10, v12 ; 1014190A v_mul_f32_e32 v9, v9, v12 ; 10121909 v_mul_f32_e32 v8, v8, v12 ; 10101908 v_mac_f32_e32 v11, -2.0, v10 ; 3E1614F5 v_mac_f32_e32 v13, -2.0, v9 ; 3E1A12F5 v_mac_f32_e32 v14, -2.0, v8 ; 3E1C10F5 v_mul_f32_e32 v8, v11, v1 ; 1010030B v_mac_f32_e32 v8, v13, v0 ; 3E10010D v_mac_f32_e32 v8, v14, v4 ; 3E10090E v_cmp_gt_f32_e32 vcc, 0, v8 ; 7C081080 v_cndmask_b32_e64 v8, -1.0, 1.0, vcc ; D2000008 01A9E4F3 v_mul_f32_e32 v9, v11, v8 ; 1012110B v_mul_f32_e32 v10, v13, v8 ; 1014110D v_mul_f32_e32 v8, v14, v8 ; 1010110E v_mov_b32_e32 v11, 0x3e99999a ; 7E1602FF 3E99999A v_mad_f32 v1, v1, v11, -v9 ; D2820001 84261701 v_mad_f32 v0, v0, v11, -v10 ; D2820000 842A1700 v_mad_f32 v4, v4, v11, -v8 ; D2820004 84221704 v_mad_f32 v4, -v4, s16, v6 ; D2820004 24182104 v_mac_f32_e32 v2, v7, v1 ; 3E040307 v_mac_f32_e32 v3, v7, v0 ; 3E060107 image_sample v[0:1], 12, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[8:15], s[0:3] ; F0800C00 00020002 s_waitcnt vmcnt(0) ; BF8C0770 v_madmk_f32_e32 v0, v1, v0, 0x3b808081 ; 40000101 3B808081 v_mad_f32 v0, -v0, s4, v4 ; D2820000 24100900 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_cmp_lt_f32_e32 vcc, s6, v0 ; 7C020006 s_and_saveexec_b64 s[0:1], vcc ; BE80246A s_xor_b64 s[0:1], exec, s[0:1] ; 8980007E v_sub_f32_e32 v0, 1.0, v0 ; 080000F2 v_log_f32_e32 v0, v0 ; 7E004F00 v_mul_legacy_f32_e32 v0, s5, v0 ; 0E000005 v_exp_f32_e32 v0, v0 ; 7E004B00 v_add_f32_e32 v5, v0, v5 ; 060A0B00 s_or_b64 exec, exec, s[0:1] ; 88FE007E v_mov_b32_e32 v0, 0xbd924925 ; 7E0002FF BD924925 v_mad_f32 v0, v0, v5, 1.0 ; D2820000 03CA0B00 v_cvt_pkrtz_f16_f32_e32 v0, v0, v0 ; 5E000100 exp 15, 0, 1, 1, 1, v0, v0, v0, v0 ; F8001C0F 00000000 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 20 Code Size: 3392 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL CONST[0..4] DCL TEMP[0..1], LOCAL 0: MUL TEMP[0], CONST[1], IN[0].xxxx 1: MAD TEMP[0], CONST[2], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[3], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[4], IN[0].wwww, TEMP[0] 4: MAD TEMP[1].xy, IN[1].xyyy, CONST[0].xyyy, CONST[0].zwww 5: MOV OUT[1], TEMP[1] 6: MOV OUT[0], TEMP[0] 7: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %34 = load <16 x i8>, <16 x i8> addrspace(2)* %33, align 16, !tbaa !0 %35 = add i32 %5, %7 %36 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %34, i32 0, i32 %35) %37 = extractelement <4 x float> %36, i32 0 %38 = extractelement <4 x float> %36, i32 1 %39 = extractelement <4 x float> %36, i32 2 %40 = extractelement <4 x float> %36, i32 3 %41 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0 %43 = add i32 %5, %7 %44 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %43) %45 = extractelement <4 x float> %44, i32 0 %46 = extractelement <4 x float> %44, i32 1 %47 = fmul float %17, %37 %48 = fmul float %18, %37 %49 = fmul float %19, %37 %50 = fmul float %20, %37 %51 = fmul float %21, %38 %52 = fadd float %51, %47 %53 = fmul float %22, %38 %54 = fadd float %53, %48 %55 = fmul float %23, %38 %56 = fadd float %55, %49 %57 = fmul float %24, %38 %58 = fadd float %57, %50 %59 = fmul float %25, %39 %60 = fadd float %59, %52 %61 = fmul float %26, %39 %62 = fadd float %61, %54 %63 = fmul float %27, %39 %64 = fadd float %63, %56 %65 = fmul float %28, %39 %66 = fadd float %65, %58 %67 = fmul float %29, %40 %68 = fadd float %67, %60 %69 = fmul float %30, %40 %70 = fadd float %69, %62 %71 = fmul float %31, %40 %72 = fadd float %71, %64 %73 = fmul float %32, %40 %74 = fadd float %73, %66 %75 = fmul float %45, %13 %76 = fadd float %75, %15 %77 = fmul float %46, %14 %78 = fadd float %77, %16 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %76, float %78, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %68, float %70, float %72, float %74) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[0:3], 0x0 ; C2060100 buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[8:11], 0 idxen ; E00C2000 80020500 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_buffer_load_dword s5, s[0:3], 0x2 ; C2028102 s_buffer_load_dword s6, s[0:3], 0x3 ; C2030103 s_buffer_load_dword s7, s[0:3], 0x4 ; C2038104 s_buffer_load_dword s8, s[0:3], 0x5 ; C2040105 s_buffer_load_dword s9, s[0:3], 0x6 ; C2048106 s_buffer_load_dword s10, s[0:3], 0x7 ; C2050107 s_buffer_load_dword s11, s[0:3], 0x8 ; C2058108 s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v0, s5 ; 7E000205 v_mov_b32_e32 v7, s6 ; 7E0E0206 v_mac_f32_e32 v0, s12, v5 ; 3E000A0C v_mac_f32_e32 v7, s4, v6 ; 3E0E0C04 s_buffer_load_dword s4, s[0:3], 0xb ; C202010B s_buffer_load_dword s5, s[0:3], 0xc ; C202810C s_buffer_load_dword s6, s[0:3], 0xd ; C203010D s_buffer_load_dword s12, s[0:3], 0xe ; C206010E s_buffer_load_dword s15, s[0:3], 0xf ; C207810F s_buffer_load_dword s16, s[0:3], 0x10 ; C2080110 s_buffer_load_dword s17, s[0:3], 0x11 ; C2088111 s_buffer_load_dword s18, s[0:3], 0x12 ; C2090112 s_buffer_load_dword s0, s[0:3], 0x13 ; C2000113 v_mul_f32_e32 v5, s7, v1 ; 100A0207 v_mac_f32_e32 v5, s11, v2 ; 3E0A040B v_mul_f32_e32 v6, s8, v1 ; 100C0208 v_mac_f32_e32 v6, s13, v2 ; 3E0C040D v_mul_f32_e32 v8, s9, v1 ; 10100209 v_mac_f32_e32 v8, s14, v2 ; 3E10040E v_mul_f32_e32 v1, s10, v1 ; 1002020A s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v1, s4, v2 ; 3E020404 v_mac_f32_e32 v5, s5, v3 ; 3E0A0605 v_mac_f32_e32 v6, s6, v3 ; 3E0C0606 v_mac_f32_e32 v8, s12, v3 ; 3E10060C v_mac_f32_e32 v1, s15, v3 ; 3E02060F v_mac_f32_e32 v5, s16, v4 ; 3E0A0810 v_mac_f32_e32 v6, s17, v4 ; 3E0C0811 v_mac_f32_e32 v8, s18, v4 ; 3E100812 v_mac_f32_e32 v1, s0, v4 ; 3E020800 v_mov_b32_e32 v2, 0 ; 7E040280 exp 15, 32, 0, 0, 0, v0, v7, v2, v2 ; F800020F 02020700 exp 15, 12, 0, 1, 0, v5, v6, v8, v1 ; F80008CF 01080605 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 228 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL CONST[0] DCL CONST[3] DCL TEMP[0..9], LOCAL IMM[0] FLT32 { 5.0000, 4.0000, 0.1000, 0.2000} IMM[1] FLT32 { 1.0000, 0.0039, 2.0000, 3.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0].x, TEMP[0], SAMP[1], 2D 2: MUL TEMP[0].x, TEMP[0].xxxx, IMM[0].xxxx 3: MOV TEMP[1].xy, IN[0].xyyy 4: TEX TEMP[1], TEMP[1], SAMP[0], 2D 5: ADD TEMP[2].xy, IN[0].xyyy, CONST[3].xyyy 6: MOV TEMP[3].xy, TEMP[2].xyyy 7: TEX TEMP[3], TEMP[3], SAMP[0], 2D 8: ADD TEMP[4].xy, TEMP[1].xyyy, -TEMP[3].xyyy 9: ABS TEMP[4].xy, TEMP[4].xyyy 10: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[4].yyyy 11: FSLT TEMP[4].x, TEMP[4].xxxx, IMM[0].zzzz 12: DP2 TEMP[5].x, TEMP[1].zwww, IMM[1].xyyy 13: DP2 TEMP[3].x, TEMP[3].zwww, IMM[1].xyyy 14: ADD TEMP[3].x, TEMP[5].xxxx, -TEMP[3].xxxx 15: ABS TEMP[3].x, TEMP[3].xxxx 16: MUL TEMP[3].x, TEMP[3].xxxx, CONST[0].zzzz 17: FSLT TEMP[3].x, TEMP[3].xxxx, IMM[0].wwww 18: AND TEMP[3].x, TEMP[4].xxxx, TEMP[3].xxxx 19: AND TEMP[3].x, TEMP[3].xxxx, IMM[1].xxxx 20: MUL TEMP[3].x, IMM[0].yyyy, TEMP[3].xxxx 21: MOV TEMP[2].xy, TEMP[2].xyyy 22: TEX TEMP[2].x, TEMP[2], SAMP[1], 2D 23: MAD TEMP[0].x, TEMP[2].xxxx, TEMP[3].xxxx, TEMP[0].xxxx 24: ADD TEMP[2].x, IMM[0].xxxx, TEMP[3].xxxx 25: MUL TEMP[3].xy, CONST[3].xyyy, IMM[1].zzzz 26: ADD TEMP[4].xy, IN[0].xyyy, TEMP[3].xyyy 27: MOV TEMP[5].xy, TEMP[4].xyyy 28: TEX TEMP[5], TEMP[5], SAMP[0], 2D 29: ADD TEMP[6].xy, TEMP[1].xyyy, -TEMP[5].xyyy 30: ABS TEMP[6].xy, TEMP[6].xyyy 31: ADD TEMP[6].x, TEMP[6].xxxx, TEMP[6].yyyy 32: FSLT TEMP[6].x, TEMP[6].xxxx, IMM[0].zzzz 33: DP2 TEMP[7].x, TEMP[1].zwww, IMM[1].xyyy 34: DP2 TEMP[5].x, TEMP[5].zwww, IMM[1].xyyy 35: ADD TEMP[5].x, TEMP[7].xxxx, -TEMP[5].xxxx 36: ABS TEMP[5].x, TEMP[5].xxxx 37: MUL TEMP[5].x, TEMP[5].xxxx, CONST[0].zzzz 38: FSLT TEMP[5].x, TEMP[5].xxxx, IMM[0].wwww 39: AND TEMP[5].x, TEMP[6].xxxx, TEMP[5].xxxx 40: AND TEMP[5].x, TEMP[5].xxxx, IMM[1].xxxx 41: MUL TEMP[5].x, IMM[1].wwww, TEMP[5].xxxx 42: MOV TEMP[4].xy, TEMP[4].xyyy 43: TEX TEMP[4].x, TEMP[4], SAMP[1], 2D 44: MAD TEMP[0].x, TEMP[4].xxxx, TEMP[5].xxxx, TEMP[0].xxxx 45: ADD TEMP[2].x, TEMP[2].xxxx, TEMP[5].xxxx 46: MUL TEMP[4].xy, CONST[3].xyyy, IMM[1].wwww 47: ADD TEMP[5].xy, IN[0].xyyy, TEMP[4].xyyy 48: MOV TEMP[6].xy, TEMP[5].xyyy 49: TEX TEMP[6], TEMP[6], SAMP[0], 2D 50: ADD TEMP[7].xy, TEMP[1].xyyy, -TEMP[6].xyyy 51: ABS TEMP[7].xy, TEMP[7].xyyy 52: ADD TEMP[7].x, TEMP[7].xxxx, TEMP[7].yyyy 53: FSLT TEMP[7].x, TEMP[7].xxxx, IMM[0].zzzz 54: DP2 TEMP[8].x, TEMP[1].zwww, IMM[1].xyyy 55: DP2 TEMP[6].x, TEMP[6].zwww, IMM[1].xyyy 56: ADD TEMP[6].x, TEMP[8].xxxx, -TEMP[6].xxxx 57: ABS TEMP[6].x, TEMP[6].xxxx 58: MUL TEMP[6].x, TEMP[6].xxxx, CONST[0].zzzz 59: FSLT TEMP[6].x, TEMP[6].xxxx, IMM[0].wwww 60: AND TEMP[6].x, TEMP[7].xxxx, TEMP[6].xxxx 61: AND TEMP[6].x, TEMP[6].xxxx, IMM[1].xxxx 62: MUL TEMP[6].x, IMM[1].zzzz, TEMP[6].xxxx 63: MOV TEMP[5].xy, TEMP[5].xyyy 64: TEX TEMP[5].x, TEMP[5], SAMP[1], 2D 65: MAD TEMP[0].x, TEMP[5].xxxx, TEMP[6].xxxx, TEMP[0].xxxx 66: ADD TEMP[2].x, TEMP[2].xxxx, TEMP[6].xxxx 67: MUL TEMP[5].xy, CONST[3].xyyy, IMM[0].yyyy 68: ADD TEMP[6].xy, IN[0].xyyy, TEMP[5].xyyy 69: MOV TEMP[7].xy, TEMP[6].xyyy 70: TEX TEMP[7], TEMP[7], SAMP[0], 2D 71: ADD TEMP[8].xy, TEMP[1].xyyy, -TEMP[7].xyyy 72: ABS TEMP[8].xy, TEMP[8].xyyy 73: ADD TEMP[8].x, TEMP[8].xxxx, TEMP[8].yyyy 74: FSLT TEMP[8].x, TEMP[8].xxxx, IMM[0].zzzz 75: DP2 TEMP[9].x, TEMP[1].zwww, IMM[1].xyyy 76: DP2 TEMP[7].x, TEMP[7].zwww, IMM[1].xyyy 77: ADD TEMP[7].x, TEMP[9].xxxx, -TEMP[7].xxxx 78: ABS TEMP[7].x, TEMP[7].xxxx 79: MUL TEMP[7].x, TEMP[7].xxxx, CONST[0].zzzz 80: FSLT TEMP[7].x, TEMP[7].xxxx, IMM[0].wwww 81: AND TEMP[7].x, TEMP[8].xxxx, TEMP[7].xxxx 82: AND TEMP[7].x, TEMP[7].xxxx, IMM[1].xxxx 83: MOV TEMP[6].xy, TEMP[6].xyyy 84: TEX TEMP[6].x, TEMP[6], SAMP[1], 2D 85: MAD TEMP[0].x, TEMP[6].xxxx, TEMP[7].xxxx, TEMP[0].xxxx 86: ADD TEMP[2].x, TEMP[2].xxxx, TEMP[7].xxxx 87: ADD TEMP[6].xy, IN[0].xyyy, -CONST[3].xyyy 88: MOV TEMP[7].xy, TEMP[6].xyyy 89: TEX TEMP[7], TEMP[7], SAMP[0], 2D 90: ADD TEMP[8].xy, TEMP[1].xyyy, -TEMP[7].xyyy 91: ABS TEMP[8].xy, TEMP[8].xyyy 92: ADD TEMP[8].x, TEMP[8].xxxx, TEMP[8].yyyy 93: FSLT TEMP[8].x, TEMP[8].xxxx, IMM[0].zzzz 94: DP2 TEMP[9].x, TEMP[1].zwww, IMM[1].xyyy 95: DP2 TEMP[7].x, TEMP[7].zwww, IMM[1].xyyy 96: ADD TEMP[7].x, TEMP[9].xxxx, -TEMP[7].xxxx 97: ABS TEMP[7].x, TEMP[7].xxxx 98: MUL TEMP[7].x, TEMP[7].xxxx, CONST[0].zzzz 99: FSLT TEMP[7].x, TEMP[7].xxxx, IMM[0].wwww 100: AND TEMP[7].x, TEMP[8].xxxx, TEMP[7].xxxx 101: AND TEMP[7].x, TEMP[7].xxxx, IMM[1].xxxx 102: MUL TEMP[7].x, IMM[0].yyyy, TEMP[7].xxxx 103: MOV TEMP[6].xy, TEMP[6].xyyy 104: TEX TEMP[6].x, TEMP[6], SAMP[1], 2D 105: MAD TEMP[0].x, TEMP[6].xxxx, TEMP[7].xxxx, TEMP[0].xxxx 106: ADD TEMP[2].x, TEMP[2].xxxx, TEMP[7].xxxx 107: ADD TEMP[3].xy, IN[0].xyyy, -TEMP[3].xyyy 108: MOV TEMP[6].xy, TEMP[3].xyyy 109: TEX TEMP[6], TEMP[6], SAMP[0], 2D 110: ADD TEMP[7].xy, TEMP[1].xyyy, -TEMP[6].xyyy 111: ABS TEMP[7].xy, TEMP[7].xyyy 112: ADD TEMP[7].x, TEMP[7].xxxx, TEMP[7].yyyy 113: FSLT TEMP[7].x, TEMP[7].xxxx, IMM[0].zzzz 114: DP2 TEMP[8].x, TEMP[1].zwww, IMM[1].xyyy 115: DP2 TEMP[6].x, TEMP[6].zwww, IMM[1].xyyy 116: ADD TEMP[6].x, TEMP[8].xxxx, -TEMP[6].xxxx 117: ABS TEMP[6].x, TEMP[6].xxxx 118: MUL TEMP[6].x, TEMP[6].xxxx, CONST[0].zzzz 119: FSLT TEMP[6].x, TEMP[6].xxxx, IMM[0].wwww 120: AND TEMP[6].x, TEMP[7].xxxx, TEMP[6].xxxx 121: AND TEMP[6].x, TEMP[6].xxxx, IMM[1].xxxx 122: MUL TEMP[6].x, IMM[1].wwww, TEMP[6].xxxx 123: MOV TEMP[3].xy, TEMP[3].xyyy 124: TEX TEMP[3].x, TEMP[3], SAMP[1], 2D 125: MAD TEMP[0].x, TEMP[3].xxxx, TEMP[6].xxxx, TEMP[0].xxxx 126: ADD TEMP[2].x, TEMP[2].xxxx, TEMP[6].xxxx 127: ADD TEMP[3].xy, IN[0].xyyy, -TEMP[4].xyyy 128: MOV TEMP[4].xy, TEMP[3].xyyy 129: TEX TEMP[4], TEMP[4], SAMP[0], 2D 130: ADD TEMP[6].xy, TEMP[1].xyyy, -TEMP[4].xyyy 131: ABS TEMP[6].xy, TEMP[6].xyyy 132: ADD TEMP[6].x, TEMP[6].xxxx, TEMP[6].yyyy 133: FSLT TEMP[6].x, TEMP[6].xxxx, IMM[0].zzzz 134: DP2 TEMP[7].x, TEMP[1].zwww, IMM[1].xyyy 135: DP2 TEMP[4].x, TEMP[4].zwww, IMM[1].xyyy 136: ADD TEMP[4].x, TEMP[7].xxxx, -TEMP[4].xxxx 137: ABS TEMP[4].x, TEMP[4].xxxx 138: MUL TEMP[4].x, TEMP[4].xxxx, CONST[0].zzzz 139: FSLT TEMP[4].x, TEMP[4].xxxx, IMM[0].wwww 140: AND TEMP[4].x, TEMP[6].xxxx, TEMP[4].xxxx 141: AND TEMP[4].x, TEMP[4].xxxx, IMM[1].xxxx 142: MUL TEMP[4].x, IMM[1].zzzz, TEMP[4].xxxx 143: MOV TEMP[3].xy, TEMP[3].xyyy 144: TEX TEMP[3].x, TEMP[3], SAMP[1], 2D 145: MAD TEMP[0].x, TEMP[3].xxxx, TEMP[4].xxxx, TEMP[0].xxxx 146: ADD TEMP[2].x, TEMP[2].xxxx, TEMP[4].xxxx 147: ADD TEMP[3].xy, IN[0].xyyy, -TEMP[5].xyyy 148: MOV TEMP[4].xy, TEMP[3].xyyy 149: TEX TEMP[4], TEMP[4], SAMP[0], 2D 150: ADD TEMP[5].xy, TEMP[1].xyyy, -TEMP[4].xyyy 151: ABS TEMP[5].xy, TEMP[5].xyyy 152: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[5].yyyy 153: FSLT TEMP[5].x, TEMP[5].xxxx, IMM[0].zzzz 154: DP2 TEMP[1].x, TEMP[1].zwww, IMM[1].xyyy 155: DP2 TEMP[4].x, TEMP[4].zwww, IMM[1].xyyy 156: ADD TEMP[1].x, TEMP[1].xxxx, -TEMP[4].xxxx 157: ABS TEMP[1].x, TEMP[1].xxxx 158: MUL TEMP[1].x, TEMP[1].xxxx, CONST[0].zzzz 159: FSLT TEMP[1].x, TEMP[1].xxxx, IMM[0].wwww 160: AND TEMP[1].x, TEMP[5].xxxx, TEMP[1].xxxx 161: AND TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx 162: MOV TEMP[3].xy, TEMP[3].xyyy 163: TEX TEMP[3].x, TEMP[3], SAMP[1], 2D 164: MAD TEMP[0].x, TEMP[3].xxxx, TEMP[1].xxxx, TEMP[0].xxxx 165: ADD TEMP[2].x, TEMP[2].xxxx, TEMP[1].xxxx 166: RCP TEMP[1].x, TEMP[2].xxxx 167: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[1].xxxx 168: MOV OUT[0], TEMP[0].xxxx 169: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %27 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %28 = load <8 x i32>, <8 x i32> addrspace(2)* %27, align 32, !tbaa !0 %29 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %30 = load <4 x i32>, <4 x i32> addrspace(2)* %29, align 16, !tbaa !0 %31 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %32 = load <8 x i32>, <8 x i32> addrspace(2)* %31, align 32, !tbaa !0 %33 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %34 = load <4 x i32>, <4 x i32> addrspace(2)* %33, align 16, !tbaa !0 %35 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %36 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %37 = bitcast float %35 to i32 %38 = bitcast float %36 to i32 %39 = insertelement <2 x i32> undef, i32 %37, i32 0 %40 = insertelement <2 x i32> %39, i32 %38, i32 1 %41 = bitcast <8 x i32> %32 to <32 x i8> %42 = bitcast <4 x i32> %34 to <16 x i8> %43 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %40, <32 x i8> %41, <16 x i8> %42, i32 2) %44 = extractelement <4 x float> %43, i32 0 %45 = fmul float %44, 5.000000e+00 %46 = bitcast float %35 to i32 %47 = bitcast float %36 to i32 %48 = insertelement <2 x i32> undef, i32 %46, i32 0 %49 = insertelement <2 x i32> %48, i32 %47, i32 1 %50 = bitcast <8 x i32> %28 to <32 x i8> %51 = bitcast <4 x i32> %30 to <16 x i8> %52 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %49, <32 x i8> %50, <16 x i8> %51, i32 2) %53 = extractelement <4 x float> %52, i32 0 %54 = extractelement <4 x float> %52, i32 1 %55 = extractelement <4 x float> %52, i32 2 %56 = extractelement <4 x float> %52, i32 3 %57 = fadd float %35, %25 %58 = fadd float %36, %26 %59 = bitcast float %57 to i32 %60 = bitcast float %58 to i32 %61 = insertelement <2 x i32> undef, i32 %59, i32 0 %62 = insertelement <2 x i32> %61, i32 %60, i32 1 %63 = bitcast <8 x i32> %28 to <32 x i8> %64 = bitcast <4 x i32> %30 to <16 x i8> %65 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %62, <32 x i8> %63, <16 x i8> %64, i32 2) %66 = extractelement <4 x float> %65, i32 0 %67 = extractelement <4 x float> %65, i32 1 %68 = extractelement <4 x float> %65, i32 2 %69 = extractelement <4 x float> %65, i32 3 %70 = fsub float %53, %66 %71 = fsub float %54, %67 %72 = call float @llvm.fabs.f32(float %70) %73 = call float @llvm.fabs.f32(float %71) %74 = fadd float %72, %73 %75 = fcmp olt float %74, 0x3FB99999A0000000 %76 = fmul float %56, 0x3F70101020000000 %77 = fadd float %55, %76 %78 = fmul float %69, 0x3F70101020000000 %79 = fadd float %68, %78 %80 = fsub float %77, %79 %81 = call float @llvm.fabs.f32(float %80) %82 = fmul float %81, %24 %83 = fcmp olt float %82, 0x3FC99999A0000000 %84 = and i1 %75, %83 %85 = select i1 %84, float 4.000000e+00, float 0.000000e+00 %86 = bitcast float %57 to i32 %87 = bitcast float %58 to i32 %88 = insertelement <2 x i32> undef, i32 %86, i32 0 %89 = insertelement <2 x i32> %88, i32 %87, i32 1 %90 = bitcast <8 x i32> %32 to <32 x i8> %91 = bitcast <4 x i32> %34 to <16 x i8> %92 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %89, <32 x i8> %90, <16 x i8> %91, i32 2) %93 = extractelement <4 x float> %92, i32 0 %94 = fmul float %93, %85 %95 = fadd float %94, %45 %96 = fadd float %85, 5.000000e+00 %97 = fmul float %25, 2.000000e+00 %98 = fmul float %26, 2.000000e+00 %99 = fadd float %35, %97 %100 = fadd float %36, %98 %101 = bitcast float %99 to i32 %102 = bitcast float %100 to i32 %103 = insertelement <2 x i32> undef, i32 %101, i32 0 %104 = insertelement <2 x i32> %103, i32 %102, i32 1 %105 = bitcast <8 x i32> %28 to <32 x i8> %106 = bitcast <4 x i32> %30 to <16 x i8> %107 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %104, <32 x i8> %105, <16 x i8> %106, i32 2) %108 = extractelement <4 x float> %107, i32 0 %109 = extractelement <4 x float> %107, i32 1 %110 = extractelement <4 x float> %107, i32 2 %111 = extractelement <4 x float> %107, i32 3 %112 = fsub float %53, %108 %113 = fsub float %54, %109 %114 = call float @llvm.fabs.f32(float %112) %115 = call float @llvm.fabs.f32(float %113) %116 = fadd float %114, %115 %117 = fcmp olt float %116, 0x3FB99999A0000000 %118 = fmul float %56, 0x3F70101020000000 %119 = fadd float %55, %118 %120 = fmul float %111, 0x3F70101020000000 %121 = fadd float %110, %120 %122 = fsub float %119, %121 %123 = call float @llvm.fabs.f32(float %122) %124 = fmul float %123, %24 %125 = fcmp olt float %124, 0x3FC99999A0000000 %126 = and i1 %117, %125 %127 = select i1 %126, float 3.000000e+00, float 0.000000e+00 %128 = bitcast float %99 to i32 %129 = bitcast float %100 to i32 %130 = insertelement <2 x i32> undef, i32 %128, i32 0 %131 = insertelement <2 x i32> %130, i32 %129, i32 1 %132 = bitcast <8 x i32> %32 to <32 x i8> %133 = bitcast <4 x i32> %34 to <16 x i8> %134 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %131, <32 x i8> %132, <16 x i8> %133, i32 2) %135 = extractelement <4 x float> %134, i32 0 %136 = fmul float %135, %127 %137 = fadd float %136, %95 %138 = fadd float %96, %127 %139 = fmul float %25, 3.000000e+00 %140 = fmul float %26, 3.000000e+00 %141 = fadd float %35, %139 %142 = fadd float %36, %140 %143 = bitcast float %141 to i32 %144 = bitcast float %142 to i32 %145 = insertelement <2 x i32> undef, i32 %143, i32 0 %146 = insertelement <2 x i32> %145, i32 %144, i32 1 %147 = bitcast <8 x i32> %28 to <32 x i8> %148 = bitcast <4 x i32> %30 to <16 x i8> %149 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %146, <32 x i8> %147, <16 x i8> %148, i32 2) %150 = extractelement <4 x float> %149, i32 0 %151 = extractelement <4 x float> %149, i32 1 %152 = extractelement <4 x float> %149, i32 2 %153 = extractelement <4 x float> %149, i32 3 %154 = fsub float %53, %150 %155 = fsub float %54, %151 %156 = call float @llvm.fabs.f32(float %154) %157 = call float @llvm.fabs.f32(float %155) %158 = fadd float %156, %157 %159 = fcmp olt float %158, 0x3FB99999A0000000 %160 = fmul float %56, 0x3F70101020000000 %161 = fadd float %55, %160 %162 = fmul float %153, 0x3F70101020000000 %163 = fadd float %152, %162 %164 = fsub float %161, %163 %165 = call float @llvm.fabs.f32(float %164) %166 = fmul float %165, %24 %167 = fcmp olt float %166, 0x3FC99999A0000000 %168 = and i1 %159, %167 %169 = select i1 %168, float 2.000000e+00, float 0.000000e+00 %170 = bitcast float %141 to i32 %171 = bitcast float %142 to i32 %172 = insertelement <2 x i32> undef, i32 %170, i32 0 %173 = insertelement <2 x i32> %172, i32 %171, i32 1 %174 = bitcast <8 x i32> %32 to <32 x i8> %175 = bitcast <4 x i32> %34 to <16 x i8> %176 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %173, <32 x i8> %174, <16 x i8> %175, i32 2) %177 = extractelement <4 x float> %176, i32 0 %178 = fmul float %177, %169 %179 = fadd float %178, %137 %180 = fadd float %138, %169 %181 = fmul float %25, 4.000000e+00 %182 = fmul float %26, 4.000000e+00 %183 = fadd float %35, %181 %184 = fadd float %36, %182 %185 = bitcast float %183 to i32 %186 = bitcast float %184 to i32 %187 = insertelement <2 x i32> undef, i32 %185, i32 0 %188 = insertelement <2 x i32> %187, i32 %186, i32 1 %189 = bitcast <8 x i32> %28 to <32 x i8> %190 = bitcast <4 x i32> %30 to <16 x i8> %191 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %188, <32 x i8> %189, <16 x i8> %190, i32 2) %192 = extractelement <4 x float> %191, i32 0 %193 = extractelement <4 x float> %191, i32 1 %194 = extractelement <4 x float> %191, i32 2 %195 = extractelement <4 x float> %191, i32 3 %196 = fsub float %53, %192 %197 = fsub float %54, %193 %198 = call float @llvm.fabs.f32(float %196) %199 = call float @llvm.fabs.f32(float %197) %200 = fadd float %198, %199 %201 = fcmp olt float %200, 0x3FB99999A0000000 %202 = fmul float %56, 0x3F70101020000000 %203 = fadd float %55, %202 %204 = fmul float %195, 0x3F70101020000000 %205 = fadd float %194, %204 %206 = fsub float %203, %205 %207 = call float @llvm.fabs.f32(float %206) %208 = fmul float %207, %24 %209 = fcmp olt float %208, 0x3FC99999A0000000 %210 = and i1 %201, %209 %211 = select i1 %210, float 1.000000e+00, float 0.000000e+00 %212 = bitcast float %183 to i32 %213 = bitcast float %184 to i32 %214 = insertelement <2 x i32> undef, i32 %212, i32 0 %215 = insertelement <2 x i32> %214, i32 %213, i32 1 %216 = bitcast <8 x i32> %32 to <32 x i8> %217 = bitcast <4 x i32> %34 to <16 x i8> %218 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %215, <32 x i8> %216, <16 x i8> %217, i32 2) %219 = extractelement <4 x float> %218, i32 0 %220 = fmul float %219, %211 %221 = fadd float %220, %179 %222 = fadd float %180, %211 %223 = fsub float %35, %25 %224 = fsub float %36, %26 %225 = bitcast float %223 to i32 %226 = bitcast float %224 to i32 %227 = insertelement <2 x i32> undef, i32 %225, i32 0 %228 = insertelement <2 x i32> %227, i32 %226, i32 1 %229 = bitcast <8 x i32> %28 to <32 x i8> %230 = bitcast <4 x i32> %30 to <16 x i8> %231 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %228, <32 x i8> %229, <16 x i8> %230, i32 2) %232 = extractelement <4 x float> %231, i32 0 %233 = extractelement <4 x float> %231, i32 1 %234 = extractelement <4 x float> %231, i32 2 %235 = extractelement <4 x float> %231, i32 3 %236 = fsub float %53, %232 %237 = fsub float %54, %233 %238 = call float @llvm.fabs.f32(float %236) %239 = call float @llvm.fabs.f32(float %237) %240 = fadd float %238, %239 %241 = fcmp olt float %240, 0x3FB99999A0000000 %242 = fmul float %56, 0x3F70101020000000 %243 = fadd float %55, %242 %244 = fmul float %235, 0x3F70101020000000 %245 = fadd float %234, %244 %246 = fsub float %243, %245 %247 = call float @llvm.fabs.f32(float %246) %248 = fmul float %247, %24 %249 = fcmp olt float %248, 0x3FC99999A0000000 %250 = and i1 %241, %249 %251 = select i1 %250, float 4.000000e+00, float 0.000000e+00 %252 = bitcast float %223 to i32 %253 = bitcast float %224 to i32 %254 = insertelement <2 x i32> undef, i32 %252, i32 0 %255 = insertelement <2 x i32> %254, i32 %253, i32 1 %256 = bitcast <8 x i32> %32 to <32 x i8> %257 = bitcast <4 x i32> %34 to <16 x i8> %258 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %255, <32 x i8> %256, <16 x i8> %257, i32 2) %259 = extractelement <4 x float> %258, i32 0 %260 = fmul float %259, %251 %261 = fadd float %260, %221 %262 = fadd float %222, %251 %263 = fsub float %35, %97 %264 = fsub float %36, %98 %265 = bitcast float %263 to i32 %266 = bitcast float %264 to i32 %267 = insertelement <2 x i32> undef, i32 %265, i32 0 %268 = insertelement <2 x i32> %267, i32 %266, i32 1 %269 = bitcast <8 x i32> %28 to <32 x i8> %270 = bitcast <4 x i32> %30 to <16 x i8> %271 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %268, <32 x i8> %269, <16 x i8> %270, i32 2) %272 = extractelement <4 x float> %271, i32 0 %273 = extractelement <4 x float> %271, i32 1 %274 = extractelement <4 x float> %271, i32 2 %275 = extractelement <4 x float> %271, i32 3 %276 = fsub float %53, %272 %277 = fsub float %54, %273 %278 = call float @llvm.fabs.f32(float %276) %279 = call float @llvm.fabs.f32(float %277) %280 = fadd float %278, %279 %281 = fcmp olt float %280, 0x3FB99999A0000000 %282 = fmul float %56, 0x3F70101020000000 %283 = fadd float %55, %282 %284 = fmul float %275, 0x3F70101020000000 %285 = fadd float %274, %284 %286 = fsub float %283, %285 %287 = call float @llvm.fabs.f32(float %286) %288 = fmul float %287, %24 %289 = fcmp olt float %288, 0x3FC99999A0000000 %290 = and i1 %281, %289 %291 = select i1 %290, float 3.000000e+00, float 0.000000e+00 %292 = bitcast float %263 to i32 %293 = bitcast float %264 to i32 %294 = insertelement <2 x i32> undef, i32 %292, i32 0 %295 = insertelement <2 x i32> %294, i32 %293, i32 1 %296 = bitcast <8 x i32> %32 to <32 x i8> %297 = bitcast <4 x i32> %34 to <16 x i8> %298 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %295, <32 x i8> %296, <16 x i8> %297, i32 2) %299 = extractelement <4 x float> %298, i32 0 %300 = fmul float %299, %291 %301 = fadd float %300, %261 %302 = fadd float %262, %291 %303 = fsub float %35, %139 %304 = fsub float %36, %140 %305 = bitcast float %303 to i32 %306 = bitcast float %304 to i32 %307 = insertelement <2 x i32> undef, i32 %305, i32 0 %308 = insertelement <2 x i32> %307, i32 %306, i32 1 %309 = bitcast <8 x i32> %28 to <32 x i8> %310 = bitcast <4 x i32> %30 to <16 x i8> %311 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %308, <32 x i8> %309, <16 x i8> %310, i32 2) %312 = extractelement <4 x float> %311, i32 0 %313 = extractelement <4 x float> %311, i32 1 %314 = extractelement <4 x float> %311, i32 2 %315 = extractelement <4 x float> %311, i32 3 %316 = fsub float %53, %312 %317 = fsub float %54, %313 %318 = call float @llvm.fabs.f32(float %316) %319 = call float @llvm.fabs.f32(float %317) %320 = fadd float %318, %319 %321 = fcmp olt float %320, 0x3FB99999A0000000 %322 = fmul float %56, 0x3F70101020000000 %323 = fadd float %55, %322 %324 = fmul float %315, 0x3F70101020000000 %325 = fadd float %314, %324 %326 = fsub float %323, %325 %327 = call float @llvm.fabs.f32(float %326) %328 = fmul float %327, %24 %329 = fcmp olt float %328, 0x3FC99999A0000000 %330 = and i1 %321, %329 %331 = select i1 %330, float 2.000000e+00, float 0.000000e+00 %332 = bitcast float %303 to i32 %333 = bitcast float %304 to i32 %334 = insertelement <2 x i32> undef, i32 %332, i32 0 %335 = insertelement <2 x i32> %334, i32 %333, i32 1 %336 = bitcast <8 x i32> %32 to <32 x i8> %337 = bitcast <4 x i32> %34 to <16 x i8> %338 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %335, <32 x i8> %336, <16 x i8> %337, i32 2) %339 = extractelement <4 x float> %338, i32 0 %340 = fmul float %339, %331 %341 = fadd float %340, %301 %342 = fadd float %302, %331 %343 = fsub float %35, %181 %344 = fsub float %36, %182 %345 = bitcast float %343 to i32 %346 = bitcast float %344 to i32 %347 = insertelement <2 x i32> undef, i32 %345, i32 0 %348 = insertelement <2 x i32> %347, i32 %346, i32 1 %349 = bitcast <8 x i32> %28 to <32 x i8> %350 = bitcast <4 x i32> %30 to <16 x i8> %351 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %348, <32 x i8> %349, <16 x i8> %350, i32 2) %352 = extractelement <4 x float> %351, i32 0 %353 = extractelement <4 x float> %351, i32 1 %354 = extractelement <4 x float> %351, i32 2 %355 = extractelement <4 x float> %351, i32 3 %356 = fsub float %53, %352 %357 = fsub float %54, %353 %358 = call float @llvm.fabs.f32(float %356) %359 = call float @llvm.fabs.f32(float %357) %360 = fadd float %358, %359 %361 = fcmp olt float %360, 0x3FB99999A0000000 %362 = fmul float %56, 0x3F70101020000000 %363 = fadd float %55, %362 %364 = fmul float %355, 0x3F70101020000000 %365 = fadd float %354, %364 %366 = fsub float %363, %365 %367 = call float @llvm.fabs.f32(float %366) %368 = fmul float %367, %24 %369 = fcmp olt float %368, 0x3FC99999A0000000 %370 = and i1 %361, %369 %371 = select i1 %370, float 1.000000e+00, float 0.000000e+00 %372 = bitcast float %343 to i32 %373 = bitcast float %344 to i32 %374 = insertelement <2 x i32> undef, i32 %372, i32 0 %375 = insertelement <2 x i32> %374, i32 %373, i32 1 %376 = bitcast <8 x i32> %32 to <32 x i8> %377 = bitcast <4 x i32> %34 to <16 x i8> %378 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %375, <32 x i8> %376, <16 x i8> %377, i32 2) %379 = extractelement <4 x float> %378, i32 0 %380 = fmul float %379, %371 %381 = fadd float %380, %341 %382 = fadd float %342, %371 %383 = fdiv float 1.000000e+00, %382 %384 = fmul float %381, %383 %385 = call i32 @llvm.SI.packf16(float %384, float %384) %386 = bitcast i32 %385 to float %387 = call i32 @llvm.SI.packf16(float %384, float %384) %388 = bitcast i32 %387 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %386, float %388, float %386, float %388) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[20:23], s[2:3], 0x0 ; C08A0300 s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500 s_load_dwordx4 s[0:3], s[4:5], 0x4 ; C0800504 s_mov_b32 m0, s9 ; BEFC0309 v_mov_b32_e32 v2, 0x3dcccccd ; 7E0402FF 3DCCCCCD v_mov_b32_e32 v3, 0x3b808081 ; 7E0602FF 3B808081 v_mov_b32_e32 v4, 0x3e4ccccd ; 7E0802FF 3E4CCCCD v_mov_b32_e32 v5, 0x40400000 ; 7E0A02FF 40400000 v_interp_p1_f32 v6, v0, 0, 0, [m0] ; C8180000 v_interp_p2_f32 v6, [v6], v1, 0, 0, [m0] ; C8190001 v_interp_p1_f32 v7, v0, 1, 0, [m0] ; C81C0100 v_interp_p2_f32 v7, [v7], v1, 1, 0, [m0] ; C81D0101 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s16, s[20:23], 0x2 ; C2081502 s_buffer_load_dword s17, s[20:23], 0xc ; C208950C s_buffer_load_dword s18, s[20:23], 0xd ; C209150D s_load_dwordx8 s[20:27], s[6:7], 0x0 ; C0CA0700 s_load_dwordx8 s[4:11], s[6:7], 0x8 ; C0C20708 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s17, v6 ; 06000C11 v_add_f32_e32 v1, s18, v7 ; 06020E12 image_sample v8, 1, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[4:11], s[0:3] ; F0800100 00010806 image_sample v[9:12], 15, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[20:27], s[12:15] ; F0800F00 00650906 image_sample v[13:16], 15, 0, 0, 0, 0, 0, 0, 0, v[0:1], s[20:27], s[12:15] ; F0800F00 00650D00 s_waitcnt vmcnt(0) ; BF8C0770 v_subrev_f32_e32 v13, v13, v9 ; 0A1A130D v_subrev_f32_e32 v14, v14, v10 ; 0A1C150E v_mad_f32 v15, v3, v16, v15 ; D282000F 043E2103 image_sample v0, 1, 0, 0, 0, 0, 0, 0, 0, v[0:1], s[4:11], s[0:3] ; F0800100 00010000 v_mad_f32 v16, 2.0, s17, v6 ; D2820010 041822F4 v_mad_f32 v17, 2.0, s18, v7 ; D2820011 041C24F4 image_sample v[18:21], 15, 0, 0, 0, 0, 0, 0, 0, v[16:17], s[20:27], s[12:15] ; F0800F00 00651210 s_waitcnt vmcnt(0) ; BF8C0770 v_subrev_f32_e32 v1, v18, v9 ; 0A021312 v_subrev_f32_e32 v18, v19, v10 ; 0A241513 v_mad_f32 v19, v3, v21, v20 ; D2820013 04522B03 image_sample v16, 1, 0, 0, 0, 0, 0, 0, 0, v[16:17], s[4:11], s[0:3] ; F0800100 00011010 v_mad_f32 v20, s17, v5, v6 ; D2820014 041A0A11 v_mad_f32 v21, s18, v5, v7 ; D2820015 041E0A12 image_sample v[22:25], 15, 0, 0, 0, 0, 0, 0, 0, v[20:21], s[20:27], s[12:15] ; F0800F00 00651614 s_waitcnt vmcnt(0) ; BF8C0770 v_subrev_f32_e32 v17, v22, v9 ; 0A221316 v_subrev_f32_e32 v22, v23, v10 ; 0A2C1517 v_mad_f32 v23, v3, v25, v24 ; D2820017 04623303 image_sample v20, 1, 0, 0, 0, 0, 0, 0, 0, v[20:21], s[4:11], s[0:3] ; F0800100 00011414 v_mad_f32 v24, 4.0, s17, v6 ; D2820018 041822F6 v_mad_f32 v25, 4.0, s18, v7 ; D2820019 041C24F6 image_sample v[26:29], 15, 0, 0, 0, 0, 0, 0, 0, v[24:25], s[20:27], s[12:15] ; F0800F00 00651A18 s_waitcnt vmcnt(0) ; BF8C0770 v_subrev_f32_e32 v21, v26, v9 ; 0A2A131A v_subrev_f32_e32 v26, v27, v10 ; 0A34151B v_mad_f32 v27, v3, v29, v28 ; D282001B 04723B03 image_sample v24, 1, 0, 0, 0, 0, 0, 0, 0, v[24:25], s[4:11], s[0:3] ; F0800100 00011818 v_subrev_f32_e32 v28, s17, v6 ; 0A380C11 v_subrev_f32_e32 v29, s18, v7 ; 0A3A0E12 image_sample v[30:33], 15, 0, 0, 0, 0, 0, 0, 0, v[28:29], s[20:27], s[12:15] ; F0800F00 00651E1C s_waitcnt vmcnt(0) ; BF8C0770 v_subrev_f32_e32 v25, v30, v9 ; 0A32131E v_subrev_f32_e32 v30, v31, v10 ; 0A3C151F v_mad_f32 v31, v3, v33, v32 ; D282001F 04824303 image_sample v28, 1, 0, 0, 0, 0, 0, 0, 0, v[28:29], s[4:11], s[0:3] ; F0800100 00011C1C v_mad_f32 v32, -2.0, s17, v6 ; D2820020 041822F5 v_mad_f32 v33, -2.0, s18, v7 ; D2820021 041C24F5 image_sample v[34:37], 15, 0, 0, 0, 0, 0, 0, 0, v[32:33], s[20:27], s[12:15] ; F0800F00 00652220 s_waitcnt vmcnt(0) ; BF8C0770 v_subrev_f32_e32 v29, v34, v9 ; 0A3A1322 v_subrev_f32_e32 v34, v35, v10 ; 0A441523 v_mad_f32 v35, v3, v37, v36 ; D2820023 04924B03 image_sample v32, 1, 0, 0, 0, 0, 0, 0, 0, v[32:33], s[4:11], s[0:3] ; F0800100 00012020 v_mad_f32 v36, -s17, v5, v6 ; D2820024 241A0A11 v_mad_f32 v37, -s18, v5, v7 ; D2820025 241E0A12 image_sample v[38:41], 15, 0, 0, 0, 0, 0, 0, 0, v[36:37], s[20:27], s[12:15] ; F0800F00 00652624 s_waitcnt vmcnt(0) ; BF8C0770 v_subrev_f32_e32 v33, v38, v9 ; 0A421326 v_subrev_f32_e32 v38, v39, v10 ; 0A4C1527 v_mad_f32 v39, v3, v41, v40 ; D2820027 04A25303 v_mad_f32 v40, 4.0, -s17, v6 ; D2820028 441822F6 v_mad_f32 v41, 4.0, -s18, v7 ; D2820029 441C24F6 image_sample v6, 1, 0, 0, 0, 0, 0, 0, 0, v[36:37], s[4:11], s[0:3] ; F0800100 00010624 v_mad_f32 v7, v3, v12, v11 ; D2820007 042E1903 image_sample v[42:45], 15, 0, 0, 0, 0, 0, 0, 0, v[40:41], s[20:27], s[12:15] ; F0800F00 00652A28 s_waitcnt vmcnt(0) ; BF8C0770 v_subrev_f32_e32 v9, v42, v9 ; 0A12132A v_subrev_f32_e32 v10, v43, v10 ; 0A14152B v_mad_f32 v3, v3, v45, v44 ; D2820003 04B25B03 image_sample v11, 1, 0, 0, 0, 0, 0, 0, 0, v[40:41], s[4:11], s[0:3] ; F0800100 00010B28 v_add_f32_e64 v12, |v13|, |v14| ; D206030C 00021D0D v_cmp_lt_f32_e32 vcc, v12, v2 ; 7C02050C v_subrev_f32_e32 v12, v15, v7 ; 0A180F0F v_mul_f32_e64 v12, |v12|, s16 ; D210010C 0000210C v_cmp_lt_f32_e64 s[0:1], v12, v4 ; D0020000 0002090C v_add_f32_e64 v1, |v1|, |v18| ; D2060301 00022501 v_cmp_lt_f32_e64 s[2:3], v1, v2 ; D0020002 00020501 v_subrev_f32_e32 v1, v19, v7 ; 0A020F13 v_mul_f32_e64 v1, |v1|, s16 ; D2100101 00002101 v_cmp_lt_f32_e64 s[4:5], v1, v4 ; D0020004 00020901 v_add_f32_e64 v1, |v17|, |v22| ; D2060301 00022D11 v_cmp_lt_f32_e64 s[6:7], v1, v2 ; D0020006 00020501 v_add_f32_e64 v1, |v21|, |v26| ; D2060301 00023515 v_subrev_f32_e32 v12, v23, v7 ; 0A180F17 v_mul_f32_e64 v12, |v12|, s16 ; D210010C 0000210C v_cmp_lt_f32_e64 s[8:9], v12, v4 ; D0020008 0002090C v_add_f32_e64 v12, |v25|, |v30| ; D206030C 00023D19 v_cmp_lt_f32_e64 s[10:11], v1, v2 ; D002000A 00020501 v_add_f32_e64 v1, |v29|, |v34| ; D2060301 0002451D v_subrev_f32_e32 v13, v27, v7 ; 0A1A0F1B v_mul_f32_e64 v13, |v13|, s16 ; D210010D 0000210D v_cmp_lt_f32_e64 s[12:13], v13, v4 ; D002000C 0002090D v_add_f32_e64 v13, |v33|, |v38| ; D206030D 00024D21 v_cmp_lt_f32_e64 s[14:15], v12, v2 ; D002000E 0002050C v_cndmask_b32_e64 v12, 0, 4.0, s[0:1] ; D200000C 0001EC80 v_cndmask_b32_e32 v12, 0, v12 ; 00181880 v_subrev_f32_e32 v14, v31, v7 ; 0A1C0F1F v_mul_f32_e64 v14, |v14|, s16 ; D210010E 0000210E v_cmp_lt_f32_e32 vcc, v14, v4 ; 7C02090E v_cndmask_b32_e64 v14, 0, v5, s[4:5] ; D200000E 00120A80 v_cndmask_b32_e64 v14, 0, v14, s[2:3] ; D200000E 000A1C80 v_cmp_lt_f32_e64 s[0:1], v1, v2 ; D0020000 00020501 v_cndmask_b32_e64 v1, 0, 2.0, s[8:9] ; D2000001 0021E880 v_cndmask_b32_e64 v1, 0, v1, s[6:7] ; D2000001 001A0280 v_subrev_f32_e32 v15, v35, v7 ; 0A1E0F23 v_mul_f32_e64 v15, |v15|, s16 ; D210010F 0000210F v_cmp_lt_f32_e64 s[2:3], v15, v4 ; D0020002 0002090F v_cndmask_b32_e64 v15, 0, 1.0, s[12:13] ; D200000F 0031E480 v_cndmask_b32_e64 v15, 0, v15, s[10:11] ; D200000F 002A1E80 v_cmp_lt_f32_e64 s[4:5], v13, v2 ; D0020004 0002050D v_cndmask_b32_e64 v13, 0, 4.0, vcc ; D200000D 01A9EC80 v_cndmask_b32_e64 v13, 0, v13, s[14:15] ; D200000D 003A1A80 v_cndmask_b32_e64 v5, 0, v5, s[2:3] ; D2000005 000A0A80 v_cndmask_b32_e64 v5, 0, v5, s[0:1] ; D2000005 00020A80 v_subrev_f32_e32 v17, v39, v7 ; 0A220F27 v_mul_f32_e64 v17, |v17|, s16 ; D2100111 00002111 v_cmp_lt_f32_e32 vcc, v17, v4 ; 7C020911 v_cndmask_b32_e64 v17, 0, 2.0, vcc ; D2000011 01A9E880 v_cndmask_b32_e64 v17, 0, v17, s[4:5] ; D2000011 00122280 v_add_f32_e64 v9, |v9|, |v10| ; D2060309 00021509 v_subrev_f32_e32 v3, v3, v7 ; 0A060F03 v_mul_f32_e64 v3, |v3|, s16 ; D2100103 00002103 v_cmp_lt_f32_e32 vcc, v3, v4 ; 7C020903 v_cndmask_b32_e64 v3, 0, 1.0, vcc ; D2000003 01A9E480 v_cmp_lt_f32_e32 vcc, v9, v2 ; 7C020509 v_cndmask_b32_e32 v2, 0, v3 ; 00040680 v_mov_b32_e32 v3, 0x40a00000 ; 7E0602FF 40A00000 v_mul_f32_e32 v4, v3, v8 ; 10081103 v_mac_f32_e32 v4, v12, v0 ; 3E08010C v_add_f32_e32 v0, v3, v12 ; 06001903 v_mac_f32_e32 v4, v14, v16 ; 3E08210E v_add_f32_e32 v0, v14, v0 ; 0600010E v_mac_f32_e32 v4, v1, v20 ; 3E082901 v_add_f32_e32 v0, v1, v0 ; 06000101 v_mac_f32_e32 v4, v15, v24 ; 3E08310F v_add_f32_e32 v0, v15, v0 ; 0600010F v_mac_f32_e32 v4, v13, v28 ; 3E08390D v_add_f32_e32 v0, v13, v0 ; 0600010D v_add_f32_e32 v0, v5, v0 ; 06000105 v_add_f32_e32 v0, v17, v0 ; 06000111 v_add_f32_e32 v0, v2, v0 ; 06000102 v_rcp_f32_e32 v0, v0 ; 7E005500 v_mac_f32_e32 v4, v5, v32 ; 3E084105 v_mac_f32_e32 v4, v17, v6 ; 3E080D11 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v4, v2, v11 ; 3E081702 v_mul_f32_e32 v0, v0, v4 ; 10000900 v_cvt_pkrtz_f16_f32_e32 v0, v0, v0 ; 5E000100 exp 15, 0, 1, 1, 1, v0, v0, v0, v0 ; F8001C0F 00000000 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 48 Code Size: 1012 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL CONST[0..3] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MUL TEMP[0], CONST[0], IN[0].xxxx 1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0] 4: MUL TEMP[1], IMM[0].xyyy, IN[1].xxxx 5: MUL TEMP[2], IMM[0].yxyy, IN[1].yyyy 6: ADD TEMP[3].xy, TEMP[1], TEMP[2] 7: MOV TEMP[3].xy, TEMP[3].xyxx 8: ADD TEMP[1].xy, TEMP[1], TEMP[2] 9: MOV TEMP[3].zw, TEMP[1].yyxy 10: MOV OUT[1], TEMP[3] 11: MOV OUT[0], TEMP[0] 12: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = fmul float %13, %33 %44 = fmul float %14, %33 %45 = fmul float %15, %33 %46 = fmul float %16, %33 %47 = fmul float %17, %34 %48 = fadd float %47, %43 %49 = fmul float %18, %34 %50 = fadd float %49, %44 %51 = fmul float %19, %34 %52 = fadd float %51, %45 %53 = fmul float %20, %34 %54 = fadd float %53, %46 %55 = fmul float %21, %35 %56 = fadd float %55, %48 %57 = fmul float %22, %35 %58 = fadd float %57, %50 %59 = fmul float %23, %35 %60 = fadd float %59, %52 %61 = fmul float %24, %35 %62 = fadd float %61, %54 %63 = fmul float %25, %36 %64 = fadd float %63, %56 %65 = fmul float %26, %36 %66 = fadd float %65, %58 %67 = fmul float %27, %36 %68 = fadd float %67, %60 %69 = fmul float %28, %36 %70 = fadd float %69, %62 %71 = fmul float %41, 0.000000e+00 %72 = fmul float %42, 0.000000e+00 %73 = fadd float %41, %72 %74 = fadd float %71, %42 %75 = fadd float %41, %72 %76 = fadd float %71, %42 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %73, float %74, float %75, float %76) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %64, float %66, float %68, float %70) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[8:11], 0 idxen ; E00C2000 80020500 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s4, v1 ; 10000204 v_mac_f32_e32 v0, s8, v2 ; 3E000408 v_mul_f32_e32 v7, s5, v1 ; 100E0205 v_mac_f32_e32 v7, s9, v2 ; 3E0E0409 v_mul_f32_e32 v8, s6, v1 ; 10100206 v_mac_f32_e32 v8, s10, v2 ; 3E10040A v_mul_f32_e32 v1, s7, v1 ; 10020207 v_mac_f32_e32 v1, s11, v2 ; 3E02040B v_mac_f32_e32 v0, s12, v3 ; 3E00060C v_mac_f32_e32 v7, s13, v3 ; 3E0E060D v_mac_f32_e32 v8, s14, v3 ; 3E10060E v_mac_f32_e32 v1, s15, v3 ; 3E02060F v_mac_f32_e32 v0, s16, v4 ; 3E000810 v_mac_f32_e32 v7, s17, v4 ; 3E0E0811 v_mac_f32_e32 v8, s18, v4 ; 3E100812 v_mac_f32_e32 v1, s0, v4 ; 3E020800 v_mad_f32 v2, 0, v6, v5 ; D2820002 04160C80 v_mac_f32_e32 v6, 0, v5 ; 3E0C0A80 exp 15, 32, 0, 0, 0, v2, v6, v2, v6 ; F800020F 06020602 exp 15, 12, 0, 1, 0, v0, v7, v8, v1 ; F80008CF 01080700 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 204 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL CONST[0] DCL TEMP[0..2], LOCAL 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MOV TEMP[1].w, TEMP[0].wwww 3: MOV TEMP[2].xy, IN[0].zwww 4: TEX TEMP[2].x, TEMP[2], SAMP[1], 2D 5: POW TEMP[2].x, TEMP[2].xxxx, CONST[0].wwww 6: MUL TEMP[1].xyz, TEMP[0].xyzz, TEMP[2].xxxx 7: MOV OUT[0], TEMP[1] 8: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %25 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %26 = load <32 x i8>, <32 x i8> addrspace(2)* %25, align 32, !tbaa !0 %27 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %28 = load <16 x i8>, <16 x i8> addrspace(2)* %27, align 16, !tbaa !0 %29 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %30 = bitcast <8 x i32> addrspace(2)* %29 to <32 x i8> addrspace(2)* %31 = load <32 x i8>, <32 x i8> addrspace(2)* %30, align 32, !tbaa !0 %32 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %33 = bitcast <4 x i32> addrspace(2)* %32 to <16 x i8> addrspace(2)* %34 = load <16 x i8>, <16 x i8> addrspace(2)* %33, align 16, !tbaa !0 %35 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %36 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %37 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %38 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %39 = bitcast float %35 to i32 %40 = bitcast float %36 to i32 %41 = insertelement <2 x i32> undef, i32 %39, i32 0 %42 = insertelement <2 x i32> %41, i32 %40, i32 1 %43 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %42, <32 x i8> %26, <16 x i8> %28, i32 2) %44 = extractelement <4 x float> %43, i32 0 %45 = extractelement <4 x float> %43, i32 1 %46 = extractelement <4 x float> %43, i32 2 %47 = extractelement <4 x float> %43, i32 3 %48 = bitcast float %37 to i32 %49 = bitcast float %38 to i32 %50 = insertelement <2 x i32> undef, i32 %48, i32 0 %51 = insertelement <2 x i32> %50, i32 %49, i32 1 %52 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %51, <32 x i8> %31, <16 x i8> %34, i32 2) %53 = extractelement <4 x float> %52, i32 0 %54 = call float @llvm.pow.f32(float %53, float %24) %55 = fmul float %44, %54 %56 = fmul float %45, %54 %57 = fmul float %46, %54 %58 = call i32 @llvm.SI.packf16(float %55, float %56) %59 = bitcast i32 %58 to float %60 = call i32 @llvm.SI.packf16(float %57, float %47) %61 = bitcast i32 %60 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %59, float %61, float %59, float %61) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500 s_load_dwordx4 s[16:19], s[4:5], 0x4 ; C0880504 s_mov_b32 m0, s9 ; BEFC0309 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s0, s[0:3], 0x3 ; C2000103 s_load_dwordx8 s[20:27], s[6:7], 0x0 ; C0CA0700 s_load_dwordx8 s[4:11], s[6:7], 0x8 ; C0C20708 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[20:27], s[12:15] ; F0800F00 00650002 image_sample v4, 1, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[4:11], s[16:19] ; F0800100 00810404 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v4, v4 ; 7E084F04 v_mul_legacy_f32_e32 v4, s0, v4 ; 0E080800 v_exp_f32_e32 v4, v4 ; 7E084B04 v_mul_f32_e32 v0, v4, v0 ; 10000104 v_mul_f32_e32 v1, v4, v1 ; 10020304 v_mul_f32_e32 v2, v4, v2 ; 10040504 v_cvt_pkrtz_f16_f32_e32 v2, v2, v3 ; 5E040702 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 exp 15, 0, 1, 1, 1, v0, v2, v0, v2 ; F8001C0F 02000200 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 8 Code Size: 136 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL CONST[0..16] DCL TEMP[0..5], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MUL TEMP[0], CONST[13], IN[0].xxxx 1: MAD TEMP[0], CONST[14], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[15], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[16], IN[0].wwww, TEMP[0] 4: MOV TEMP[1].x, CONST[7].xxxx 5: MOV TEMP[1].y, CONST[8].xxxx 6: MOV TEMP[1].z, CONST[9].xxxx 7: MOV TEMP[2].x, CONST[7].yyyy 8: MOV TEMP[2].y, CONST[8].yyyy 9: MOV TEMP[2].z, CONST[9].yyyy 10: MOV TEMP[3].x, CONST[7].zzzz 11: MOV TEMP[3].y, CONST[8].zzzz 12: MOV TEMP[3].z, CONST[9].zzzz 13: MUL TEMP[1].xyz, TEMP[1].xyzz, IN[1].xxxx 14: MAD TEMP[1].xyz, TEMP[2].xyzz, IN[1].yyyy, TEMP[1].xyzz 15: MAD TEMP[1].xyz, TEMP[3].xyzz, IN[1].zzzz, TEMP[1].xyzz 16: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz 17: RSQ TEMP[2].x, TEMP[2].xxxx 18: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx 19: MOV TEMP[2].w, IMM[0].xxxx 20: MOV TEMP[2].xyz, TEMP[1].xyzx 21: DP4 TEMP[3].x, CONST[0], TEMP[2] 22: DP4 TEMP[4].x, CONST[1], TEMP[2] 23: MOV TEMP[3].y, TEMP[4].xxxx 24: DP4 TEMP[2].x, CONST[2], TEMP[2] 25: MOV TEMP[3].z, TEMP[2].xxxx 26: MUL TEMP[2], TEMP[1].xyzz, TEMP[1].yzzx 27: DP4 TEMP[4].x, CONST[3], TEMP[2] 28: DP4 TEMP[5].x, CONST[4], TEMP[2] 29: MOV TEMP[4].y, TEMP[5].xxxx 30: DP4 TEMP[2].x, CONST[5], TEMP[2] 31: MOV TEMP[4].z, TEMP[2].xxxx 32: MUL TEMP[2].x, TEMP[1].yyyy, TEMP[1].yyyy 33: MAD TEMP[2].x, TEMP[1].xxxx, TEMP[1].xxxx, -TEMP[2].xxxx 34: MAD TEMP[2].xyz, CONST[6].xyzz, TEMP[2].xxxx, TEMP[4].xyzz 35: ADD TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xyzz 36: MAD TEMP[3].xy, IN[2].xyyy, CONST[12].xyyy, CONST[12].zwww 37: MOV TEMP[3].w, TEMP[1].xxxx 38: MOV TEMP[1].xy, TEMP[1].yzyy 39: MOV TEMP[1].zw, TEMP[2].yyxy 40: MOV TEMP[2].x, TEMP[2].zzzz 41: MAD TEMP[4].x, TEMP[0].zzzz, CONST[11].zzzz, CONST[11].wwww 42: MOV TEMP[3].z, TEMP[4].xxxx 43: MOV OUT[3], TEMP[2] 44: MOV OUT[2], TEMP[1] 45: MOV OUT[0], TEMP[0] 46: MOV OUT[1], TEMP[3] 47: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 204) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236) %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240) %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244) %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248) %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 252) %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256) %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260) %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264) %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 268) %71 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %72 = load <16 x i8>, <16 x i8> addrspace(2)* %71, align 16, !tbaa !0 %73 = add i32 %5, %7 %74 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %72, i32 0, i32 %73) %75 = extractelement <4 x float> %74, i32 0 %76 = extractelement <4 x float> %74, i32 1 %77 = extractelement <4 x float> %74, i32 2 %78 = extractelement <4 x float> %74, i32 3 %79 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %80 = load <16 x i8>, <16 x i8> addrspace(2)* %79, align 16, !tbaa !0 %81 = add i32 %5, %7 %82 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %80, i32 0, i32 %81) %83 = extractelement <4 x float> %82, i32 0 %84 = extractelement <4 x float> %82, i32 1 %85 = extractelement <4 x float> %82, i32 2 %86 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %87 = load <16 x i8>, <16 x i8> addrspace(2)* %86, align 16, !tbaa !0 %88 = add i32 %5, %7 %89 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %87, i32 0, i32 %88) %90 = extractelement <4 x float> %89, i32 0 %91 = extractelement <4 x float> %89, i32 1 %92 = fmul float %55, %75 %93 = fmul float %56, %75 %94 = fmul float %57, %75 %95 = fmul float %58, %75 %96 = fmul float %59, %76 %97 = fadd float %96, %92 %98 = fmul float %60, %76 %99 = fadd float %98, %93 %100 = fmul float %61, %76 %101 = fadd float %100, %94 %102 = fmul float %62, %76 %103 = fadd float %102, %95 %104 = fmul float %63, %77 %105 = fadd float %104, %97 %106 = fmul float %64, %77 %107 = fadd float %106, %99 %108 = fmul float %65, %77 %109 = fadd float %108, %101 %110 = fmul float %66, %77 %111 = fadd float %110, %103 %112 = fmul float %67, %78 %113 = fadd float %112, %105 %114 = fmul float %68, %78 %115 = fadd float %114, %107 %116 = fmul float %69, %78 %117 = fadd float %116, %109 %118 = fmul float %70, %78 %119 = fadd float %118, %111 %120 = fmul float %40, %83 %121 = fmul float %43, %83 %122 = fmul float %46, %83 %123 = fmul float %41, %84 %124 = fadd float %123, %120 %125 = fmul float %44, %84 %126 = fadd float %125, %121 %127 = fmul float %47, %84 %128 = fadd float %127, %122 %129 = fmul float %42, %85 %130 = fadd float %129, %124 %131 = fmul float %45, %85 %132 = fadd float %131, %126 %133 = fmul float %48, %85 %134 = fadd float %133, %128 %135 = fmul float %130, %130 %136 = fmul float %132, %132 %137 = fadd float %136, %135 %138 = fmul float %134, %134 %139 = fadd float %137, %138 %140 = call float @llvm.AMDGPU.rsq.clamped.f32(float %139) %141 = fmul float %130, %140 %142 = fmul float %132, %140 %143 = fmul float %134, %140 %144 = fmul float %13, %141 %145 = fmul float %14, %142 %146 = fadd float %144, %145 %147 = fmul float %15, %143 %148 = fadd float %146, %147 %149 = fadd float %148, %16 %150 = fmul float %17, %141 %151 = fmul float %18, %142 %152 = fadd float %150, %151 %153 = fmul float %19, %143 %154 = fadd float %152, %153 %155 = fadd float %154, %20 %156 = fmul float %21, %141 %157 = fmul float %22, %142 %158 = fadd float %156, %157 %159 = fmul float %23, %143 %160 = fadd float %158, %159 %161 = fadd float %160, %24 %162 = fmul float %141, %142 %163 = fmul float %142, %143 %164 = fmul float %143, %143 %165 = fmul float %143, %141 %166 = fmul float %25, %162 %167 = fmul float %26, %163 %168 = fadd float %166, %167 %169 = fmul float %27, %164 %170 = fadd float %168, %169 %171 = fmul float %28, %165 %172 = fadd float %170, %171 %173 = fmul float %29, %162 %174 = fmul float %30, %163 %175 = fadd float %173, %174 %176 = fmul float %31, %164 %177 = fadd float %175, %176 %178 = fmul float %32, %165 %179 = fadd float %177, %178 %180 = fmul float %33, %162 %181 = fmul float %34, %163 %182 = fadd float %180, %181 %183 = fmul float %35, %164 %184 = fadd float %182, %183 %185 = fmul float %36, %165 %186 = fadd float %184, %185 %187 = fmul float %142, %142 %188 = fmul float %141, %141 %189 = fsub float %188, %187 %190 = fmul float %37, %189 %191 = fadd float %190, %172 %192 = fmul float %38, %189 %193 = fadd float %192, %179 %194 = fmul float %39, %189 %195 = fadd float %194, %186 %196 = fadd float %191, %149 %197 = fadd float %193, %155 %198 = fadd float %195, %161 %199 = fmul float %90, %51 %200 = fadd float %199, %53 %201 = fmul float %91, %52 %202 = fadd float %201, %54 %203 = fmul float %117, %49 %204 = fadd float %203, %50 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %200, float %202, float %204, float %141) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %142, float %143, float %196, float %197) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %198, float %197, float %198, float %165) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %113, float %115, float %117, float %119) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[8:11], v0, s[16:19], 0 idxen ; E00C2000 80040800 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x32 ; C2020132 s_buffer_load_dword s5, s[0:3], 0x30 ; C2028130 s_buffer_load_dword s6, s[0:3], 0x33 ; C2030133 s_buffer_load_dword s7, s[0:3], 0x1c ; C203811C s_buffer_load_dword s8, s[0:3], 0x31 ; C2040131 s_buffer_load_dword s9, s[0:3], 0x34 ; C2048134 s_buffer_load_dword s10, s[0:3], 0x35 ; C2050135 s_buffer_load_dword s11, s[0:3], 0x36 ; C2058136 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s4 ; 7E000204 s_buffer_load_dword s4, s[0:3], 0x1d ; C202011D s_buffer_load_dword s12, s[0:3], 0x20 ; C2060120 s_buffer_load_dword s13, s[0:3], 0x21 ; C2068121 v_mac_f32_e32 v0, s5, v8 ; 3E001005 v_mov_b32_e32 v8, s6 ; 7E100206 s_buffer_load_dword s5, s[0:3], 0x24 ; C2028124 s_buffer_load_dword s6, s[0:3], 0x25 ; C2030125 v_mac_f32_e32 v8, s8, v9 ; 3E101208 s_buffer_load_dword s8, s[0:3], 0x1e ; C204011E v_mul_f32_e32 v9, s7, v5 ; 10120A07 s_buffer_load_dword s7, s[0:3], 0x22 ; C2038122 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v9, s4, v6 ; 3E120C04 v_mul_f32_e32 v10, s12, v5 ; 10140A0C v_mac_f32_e32 v10, s13, v6 ; 3E140C0D s_buffer_load_dword s4, s[0:3], 0x26 ; C2020126 v_mul_f32_e32 v5, s5, v5 ; 100A0A05 v_mac_f32_e32 v5, s6, v6 ; 3E0A0C06 s_buffer_load_dword s5, s[0:3], 0x38 ; C2028138 v_mac_f32_e32 v9, s8, v7 ; 3E120E08 s_buffer_load_dword s6, s[0:3], 0x39 ; C2030139 v_mac_f32_e32 v10, s7, v7 ; 3E140E07 s_buffer_load_dword s7, s[0:3], 0x3a ; C203813A s_buffer_load_dword s8, s[0:3], 0x2e ; C204012E s_buffer_load_dword s12, s[0:3], 0x2f ; C206012F s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v5, s4, v7 ; 3E0A0E04 v_mul_f32_e32 v6, s9, v1 ; 100C0209 s_buffer_load_dword s4, s[0:3], 0x3b ; C202013B v_mac_f32_e32 v6, s5, v2 ; 3E0C0405 v_mul_f32_e32 v7, s10, v1 ; 100E020A v_mac_f32_e32 v7, s6, v2 ; 3E0E0406 v_mul_f32_e32 v11, s11, v1 ; 1016020B s_buffer_load_dword s5, s[0:3], 0x37 ; C2028137 v_mac_f32_e32 v11, s7, v2 ; 3E160407 s_buffer_load_dword s6, s[0:3], 0x3c ; C203013C s_buffer_load_dword s7, s[0:3], 0x3d ; C203813D s_buffer_load_dword s9, s[0:3], 0x3e ; C204813E s_buffer_load_dword s10, s[0:3], 0x3f ; C205013F s_buffer_load_dword s11, s[0:3], 0x40 ; C2058140 s_buffer_load_dword s13, s[0:3], 0x41 ; C2068141 s_buffer_load_dword s14, s[0:3], 0x42 ; C2070142 s_buffer_load_dword s15, s[0:3], 0x43 ; C2078143 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s5, v1 ; 10020205 v_mac_f32_e32 v1, s4, v2 ; 3E020404 v_mac_f32_e32 v6, s6, v3 ; 3E0C0606 v_mac_f32_e32 v7, s7, v3 ; 3E0E0607 v_mac_f32_e32 v11, s9, v3 ; 3E160609 v_mac_f32_e32 v1, s10, v3 ; 3E02060A v_mac_f32_e32 v6, s11, v4 ; 3E0C080B v_mac_f32_e32 v7, s13, v4 ; 3E0E080D v_mac_f32_e32 v11, s14, v4 ; 3E16080E v_mac_f32_e32 v1, s15, v4 ; 3E02080F s_buffer_load_dword s4, s[0:3], 0x19 ; C2020119 s_buffer_load_dword s5, s[0:3], 0x1a ; C202811A v_mul_f32_e32 v2, v9, v9 ; 10041309 v_mac_f32_e32 v2, v10, v10 ; 3E04150A v_mac_f32_e32 v2, v5, v5 ; 3E040B05 v_rsq_clamp_f32_e32 v2, v2 ; 7E045902 s_buffer_load_dword s6, s[0:3], 0x0 ; C2030100 s_buffer_load_dword s7, s[0:3], 0x1 ; C2038101 s_buffer_load_dword s9, s[0:3], 0x2 ; C2048102 s_buffer_load_dword s10, s[0:3], 0x3 ; C2050103 s_buffer_load_dword s11, s[0:3], 0x4 ; C2058104 s_buffer_load_dword s13, s[0:3], 0x5 ; C2068105 s_buffer_load_dword s14, s[0:3], 0x6 ; C2070106 s_buffer_load_dword s15, s[0:3], 0x7 ; C2078107 s_buffer_load_dword s16, s[0:3], 0x8 ; C2080108 s_buffer_load_dword s17, s[0:3], 0x9 ; C2088109 s_buffer_load_dword s18, s[0:3], 0xa ; C209010A s_buffer_load_dword s19, s[0:3], 0xb ; C209810B s_buffer_load_dword s20, s[0:3], 0xc ; C20A010C s_buffer_load_dword s21, s[0:3], 0xd ; C20A810D s_buffer_load_dword s22, s[0:3], 0xe ; C20B010E s_buffer_load_dword s23, s[0:3], 0x14 ; C20B8114 s_buffer_load_dword s24, s[0:3], 0x15 ; C20C0115 s_buffer_load_dword s25, s[0:3], 0xf ; C20C810F s_buffer_load_dword s26, s[0:3], 0x10 ; C20D0110 s_buffer_load_dword s27, s[0:3], 0x11 ; C20D8111 s_buffer_load_dword s28, s[0:3], 0x12 ; C20E0112 s_buffer_load_dword s29, s[0:3], 0x13 ; C20E8113 s_buffer_load_dword s30, s[0:3], 0x16 ; C20F0116 s_buffer_load_dword s31, s[0:3], 0x17 ; C20F8117 s_buffer_load_dword s0, s[0:3], 0x18 ; C2000118 v_mul_f32_e32 v3, v2, v10 ; 10061502 v_mul_f32_e32 v4, v2, v5 ; 10080B02 v_mul_f32_e32 v5, v4, v3 ; 100A0704 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v10, s21, v5 ; 10140A15 v_mul_f32_e32 v12, s27, v5 ; 10180A1B v_mul_f32_e32 v5, s24, v5 ; 100A0A18 v_mul_f32_e32 v2, v2, v9 ; 10041302 v_mul_f32_e32 v9, v3, v2 ; 10120503 v_mac_f32_e32 v10, s20, v9 ; 3E141214 v_mac_f32_e32 v12, s26, v9 ; 3E18121A v_mac_f32_e32 v5, s23, v9 ; 3E0A1217 v_mul_f32_e32 v9, v4, v4 ; 10120904 v_mac_f32_e32 v10, s22, v9 ; 3E141216 v_mac_f32_e32 v12, s28, v9 ; 3E18121C v_mac_f32_e32 v5, s30, v9 ; 3E0A121E v_mul_f32_e32 v9, v2, v4 ; 10120902 v_mac_f32_e32 v10, s25, v9 ; 3E141219 v_mac_f32_e32 v12, s29, v9 ; 3E18121D v_mac_f32_e32 v5, s31, v9 ; 3E0A121F v_mul_f32_e32 v13, v3, v3 ; 101A0703 v_mad_f32 v13, v2, v2, -v13 ; D282000D 84360502 v_mac_f32_e32 v10, s0, v13 ; 3E141A00 v_mac_f32_e32 v12, s4, v13 ; 3E181A04 v_mac_f32_e32 v5, s5, v13 ; 3E0A1A05 v_mov_b32_e32 v13, s12 ; 7E1A020C v_mac_f32_e32 v13, s8, v11 ; 3E1A1608 v_mul_f32_e32 v14, s7, v3 ; 101C0607 v_mac_f32_e32 v14, s6, v2 ; 3E1C0406 v_mul_f32_e32 v15, s13, v3 ; 101E060D v_mac_f32_e32 v15, s11, v2 ; 3E1E040B v_mul_f32_e32 v16, s17, v3 ; 10200611 v_mac_f32_e32 v16, s16, v2 ; 3E200410 v_mac_f32_e32 v14, s9, v4 ; 3E1C0809 v_mac_f32_e32 v15, s14, v4 ; 3E1E080E v_mac_f32_e32 v16, s18, v4 ; 3E200812 v_add_f32_e32 v14, s10, v14 ; 061C1C0A v_add_f32_e32 v15, s15, v15 ; 061E1E0F v_add_f32_e32 v16, s19, v16 ; 06202013 exp 15, 32, 0, 0, 0, v0, v8, v13, v2 ; F800020F 020D0800 s_waitcnt expcnt(0) ; BF8C070F v_add_f32_e32 v0, v14, v10 ; 0600150E v_add_f32_e32 v2, v15, v12 ; 0604190F exp 15, 33, 0, 0, 0, v3, v4, v0, v2 ; F800021F 02000403 s_waitcnt expcnt(0) ; BF8C070F v_add_f32_e32 v0, v16, v5 ; 06000B10 exp 15, 34, 0, 0, 0, v0, v2, v0, v9 ; F800022F 09000200 exp 15, 12, 0, 1, 0, v6, v7, v11, v1 ; F80008CF 010B0706 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 20 Code Size: 652 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[0..3] DCL CONST[5] DCL TEMP[0..4], LOCAL IMM[0] FLT32 { 2.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].x, IN[0].wwww 1: MOV TEMP[0].yz, IN[1].yxyy 2: MOV TEMP[1].xy, IN[1].zwzz 3: MOV TEMP[1].z, IN[2].xxxx 4: MOV TEMP[2].xy, IN[0].xyyy 5: TEX TEMP[2], TEMP[2], SAMP[0], 2D 6: ADD TEMP[3].x, IMM[0].xxxx, -CONST[3].wwww 7: MUL TEMP[3].xyz, TEMP[2].xyzz, TEMP[3].xxxx 8: FSLT TEMP[4].x, TEMP[2].wwww, CONST[5].xxxx 9: AND TEMP[4].x, TEMP[4].xxxx, IMM[0].yyyy 10: KILL_IF -TEMP[4].xxxx 11: MUL TEMP[4].xyz, TEMP[3].xyzz, CONST[2].xyzz 12: DP3 TEMP[0].x, TEMP[0].xyzz, CONST[0].xyzz 13: MAX TEMP[0].x, IMM[0].zzzz, TEMP[0].xxxx 14: MUL TEMP[0].xyz, TEMP[4].xyzz, TEMP[0].xxxx 15: MAD TEMP[0].xyz, TEMP[3].xyzz, TEMP[1].xyzz, TEMP[0].xyzz 16: MOV TEMP[1].w, TEMP[2].wwww 17: MOV_SAT TEMP[2].x, IN[0].zzzz 18: LRP TEMP[1].xyz, TEMP[2].xxxx, TEMP[0].xyzz, CONST[1].xyzz 19: MOV OUT[0], TEMP[1] 20: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %35 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %36 = load <32 x i8>, <32 x i8> addrspace(2)* %35, align 32, !tbaa !0 %37 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %40 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %41 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %42 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %43 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %44 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %45 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %46 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %47 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %48 = bitcast float %39 to i32 %49 = bitcast float %40 to i32 %50 = insertelement <2 x i32> undef, i32 %48, i32 0 %51 = insertelement <2 x i32> %50, i32 %49, i32 1 %52 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %51, <32 x i8> %36, <16 x i8> %38, i32 2) %53 = extractelement <4 x float> %52, i32 0 %54 = extractelement <4 x float> %52, i32 1 %55 = extractelement <4 x float> %52, i32 2 %56 = extractelement <4 x float> %52, i32 3 %57 = fsub float 2.000000e+00, %33 %58 = fmul float %53, %57 %59 = fmul float %54, %57 %60 = fmul float %55, %57 %61 = fcmp olt float %56, %34 %62 = select i1 %61, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %62) %63 = fmul float %58, %30 %64 = fmul float %59, %31 %65 = fmul float %60, %32 %66 = fmul float %42, %24 %67 = fmul float %43, %25 %68 = fadd float %67, %66 %69 = fmul float %44, %26 %70 = fadd float %68, %69 %71 = call float @llvm.maxnum.f32(float %70, float 0.000000e+00) %72 = fmul float %63, %71 %73 = fmul float %64, %71 %74 = fmul float %65, %71 %75 = fmul float %58, %45 %76 = fadd float %75, %72 %77 = fmul float %59, %46 %78 = fadd float %77, %73 %79 = fmul float %60, %47 %80 = fadd float %79, %74 %81 = call float @llvm.AMDIL.clamp.(float %41, float 0.000000e+00, float 1.000000e+00) %82 = call float @llvm.AMDGPU.lrp(float %81, float %76, float %27) %83 = call float @llvm.AMDGPU.lrp(float %81, float %78, float %28) %84 = call float @llvm.AMDGPU.lrp(float %81, float %80, float %29) %85 = call i32 @llvm.SI.packf16(float %82, float %83) %86 = bitcast i32 %85 to float %87 = call i32 @llvm.SI.packf16(float %84, float %56) %88 = bitcast i32 %87 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %86, float %88, float %86, float %88) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600 v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601 v_interp_p1_f32 v9, v0, 3, 1, [m0] ; C8240700 v_interp_p2_f32 v9, [v9], v1, 3, 1, [m0] ; C8250701 v_interp_p1_f32 v0, v0, 0, 2, [m0] ; C8000800 v_interp_p2_f32 v0, [v0], v1, 0, 2, [m0] ; C8010801 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[8:11] ; F0800F00 00430A02 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_buffer_load_dword s5, s[0:3], 0x14 ; C2028114 s_buffer_load_dword s6, s[0:3], 0x6 ; C2030106 s_buffer_load_dword s7, s[0:3], 0x8 ; C2038108 s_buffer_load_dword s8, s[0:3], 0x9 ; C2040109 s_buffer_load_dword s9, s[0:3], 0xa ; C204810A s_buffer_load_dword s10, s[0:3], 0x0 ; C2050100 s_buffer_load_dword s11, s[0:3], 0x1 ; C2058101 s_buffer_load_dword s12, s[0:3], 0x2 ; C2060102 s_buffer_load_dword s13, s[0:3], 0x4 ; C2068104 s_buffer_load_dword s0, s[0:3], 0x5 ; C2000105 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_cmp_gt_f32_e32 vcc, s5, v13 ; 7C081A05 v_cndmask_b32_e64 v1, 0, -1.0, vcc ; D2000001 01A9E680 v_sub_f32_e64 v2, 2.0, s4 ; D2080002 000008F4 v_mul_f32_e32 v3, v2, v10 ; 10061502 v_mul_f32_e32 v10, v2, v11 ; 10141702 v_mul_f32_e32 v2, v2, v12 ; 10041902 v_cmpx_le_f32_e32 vcc, 0, v1 ; 7C260280 v_mul_f32_e32 v1, s10, v5 ; 10020A0A v_mac_f32_e32 v1, s11, v6 ; 3E020C0B v_mac_f32_e32 v1, s12, v7 ; 3E020E0C v_mul_f32_e32 v5, s9, v2 ; 100A0409 v_max_f32_e32 v1, 0, v1 ; 20020280 v_mul_f32_e32 v5, v1, v5 ; 100A0B01 v_mac_f32_e32 v5, v0, v2 ; 3E0A0500 v_add_f32_e64 v0, 0, v4 clamp ; D2060800 00020880 v_sub_f32_e32 v2, 1.0, v0 ; 080400F2 v_mul_f32_e32 v4, s6, v2 ; 10080406 v_mac_f32_e32 v4, v5, v0 ; 3E080105 v_cvt_pkrtz_f16_f32_e32 v4, v4, v13 ; 5E081B04 v_mul_f32_e32 v5, s7, v3 ; 100A0607 v_mul_f32_e32 v5, v1, v5 ; 100A0B01 v_mac_f32_e32 v5, v8, v3 ; 3E0A0708 v_mul_f32_e32 v3, s8, v10 ; 10061408 v_mul_f32_e32 v1, v1, v3 ; 10020701 v_mac_f32_e32 v1, v9, v10 ; 3E021509 v_mul_f32_e32 v3, s13, v2 ; 1006040D v_mac_f32_e32 v3, v5, v0 ; 3E060105 v_mul_f32_e32 v2, s0, v2 ; 10040400 v_mac_f32_e32 v2, v1, v0 ; 3E040101 v_cvt_pkrtz_f16_f32_e32 v0, v3, v2 ; 5E000503 exp 15, 0, 1, 1, 1, v0, v4, v0, v4 ; F8001C0F 04000400 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 16 Code Size: 296 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL CONST[0..28] DCL TEMP[0..7], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MUL TEMP[0], CONST[25], IN[0].xxxx 1: MAD TEMP[0], CONST[26], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[27], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[28], IN[0].wwww, TEMP[0] 4: MUL TEMP[1], CONST[15], IN[0].xxxx 5: MAD TEMP[1], CONST[16], IN[0].yyyy, TEMP[1] 6: MAD TEMP[1], CONST[17], IN[0].zzzz, TEMP[1] 7: MAD TEMP[1].xyz, CONST[18], IN[0].wwww, TEMP[1] 8: MOV TEMP[2].x, CONST[19].xxxx 9: MOV TEMP[2].y, CONST[20].xxxx 10: MOV TEMP[2].z, CONST[21].xxxx 11: MOV TEMP[3].x, CONST[19].yyyy 12: MOV TEMP[3].y, CONST[20].yyyy 13: MOV TEMP[3].z, CONST[21].yyyy 14: MOV TEMP[4].x, CONST[19].zzzz 15: MOV TEMP[4].y, CONST[20].zzzz 16: MOV TEMP[4].z, CONST[21].zzzz 17: MUL TEMP[2].xyz, TEMP[2].xyzz, IN[1].xxxx 18: MAD TEMP[2].xyz, TEMP[3].xyzz, IN[1].yyyy, TEMP[2].xyzz 19: MAD TEMP[2].xyz, TEMP[4].xyzz, IN[1].zzzz, TEMP[2].xyzz 20: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz 21: RSQ TEMP[3].x, TEMP[3].xxxx 22: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx 23: MOV TEMP[3].w, IMM[0].xxxx 24: MOV TEMP[3].xyz, TEMP[2].xyzx 25: DP4 TEMP[4].x, CONST[8], TEMP[3] 26: DP4 TEMP[5].x, CONST[9], TEMP[3] 27: MOV TEMP[4].y, TEMP[5].xxxx 28: DP4 TEMP[3].x, CONST[10], TEMP[3] 29: MOV TEMP[4].z, TEMP[3].xxxx 30: MUL TEMP[3], TEMP[2].xyzz, TEMP[2].yzzx 31: DP4 TEMP[5].x, CONST[11], TEMP[3] 32: DP4 TEMP[6].x, CONST[12], TEMP[3] 33: MOV TEMP[5].y, TEMP[6].xxxx 34: DP4 TEMP[3].x, CONST[13], TEMP[3] 35: MOV TEMP[5].z, TEMP[3].xxxx 36: ADD TEMP[3], CONST[0], -TEMP[1].xxxx 37: ADD TEMP[6], CONST[1], -TEMP[1].yyyy 38: ADD TEMP[1], CONST[2], -TEMP[1].zzzz 39: MUL TEMP[7], TEMP[3], TEMP[3] 40: MAD TEMP[7], TEMP[6], TEMP[6], TEMP[7] 41: MAD TEMP[7], TEMP[1], TEMP[1], TEMP[7] 42: MUL TEMP[3], TEMP[3], TEMP[2].xxxx 43: MAD TEMP[3], TEMP[6], TEMP[2].yyyy, TEMP[3] 44: MAD TEMP[1], TEMP[1], TEMP[2].zzzz, TEMP[3] 45: RSQ TEMP[3].x, TEMP[7].xxxx 46: RSQ TEMP[3].y, TEMP[7].yyyy 47: RSQ TEMP[3].z, TEMP[7].zzzz 48: RSQ TEMP[3].w, TEMP[7].wwww 49: MUL TEMP[1], TEMP[1], TEMP[3] 50: MAX TEMP[1], IMM[0].yyyy, TEMP[1] 51: MAD TEMP[3], TEMP[7], CONST[3], IMM[0].xxxx 52: RCP TEMP[6].x, TEMP[3].xxxx 53: RCP TEMP[6].y, TEMP[3].yyyy 54: RCP TEMP[6].z, TEMP[3].zzzz 55: RCP TEMP[6].w, TEMP[3].wwww 56: MUL TEMP[1], TEMP[1], TEMP[6] 57: MUL TEMP[3].x, TEMP[2].yyyy, TEMP[2].yyyy 58: MAD TEMP[3].x, TEMP[2].xxxx, TEMP[2].xxxx, -TEMP[3].xxxx 59: MAD TEMP[3].xyz, CONST[14].xyzz, TEMP[3].xxxx, TEMP[5].xyzz 60: ADD TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xyzz 61: MUL TEMP[4].xyz, CONST[4].xyzz, TEMP[1].xxxx 62: MAD TEMP[4].xyz, CONST[5].xyzz, TEMP[1].yyyy, TEMP[4].xyzz 63: MAD TEMP[4].xyz, CONST[6].xyzz, TEMP[1].zzzz, TEMP[4].xyzz 64: MAD TEMP[1].xyz, CONST[7].xyzz, TEMP[1].wwww, TEMP[4].xyzz 65: ADD TEMP[1].xyz, TEMP[3].xyzz, TEMP[1].xyzz 66: MAD TEMP[3].xy, IN[2].xyyy, CONST[24].xyyy, CONST[24].zwww 67: MOV TEMP[3].w, TEMP[2].xxxx 68: MOV TEMP[2].xy, TEMP[2].yzyy 69: MOV TEMP[2].zw, TEMP[1].yyxy 70: MOV TEMP[1].x, TEMP[1].zzzz 71: MAD TEMP[4].x, TEMP[0].zzzz, CONST[23].zzzz, CONST[23].wwww 72: MOV TEMP[3].z, TEMP[4].xxxx 73: MOV OUT[3], TEMP[1] 74: MOV OUT[2], TEMP[2] 75: MOV OUT[0], TEMP[0] 76: MOV OUT[1], TEMP[3] 77: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 156) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 172) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 204) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220) %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224) %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228) %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232) %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240) %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244) %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248) %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256) %72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260) %73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264) %74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272) %75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276) %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280) %77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288) %78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292) %79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296) %80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304) %81 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308) %82 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312) %83 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320) %84 = call float @llvm.SI.load.const(<16 x i8> %12, i32 324) %85 = call float @llvm.SI.load.const(<16 x i8> %12, i32 328) %86 = call float @llvm.SI.load.const(<16 x i8> %12, i32 336) %87 = call float @llvm.SI.load.const(<16 x i8> %12, i32 340) %88 = call float @llvm.SI.load.const(<16 x i8> %12, i32 344) %89 = call float @llvm.SI.load.const(<16 x i8> %12, i32 376) %90 = call float @llvm.SI.load.const(<16 x i8> %12, i32 380) %91 = call float @llvm.SI.load.const(<16 x i8> %12, i32 384) %92 = call float @llvm.SI.load.const(<16 x i8> %12, i32 388) %93 = call float @llvm.SI.load.const(<16 x i8> %12, i32 392) %94 = call float @llvm.SI.load.const(<16 x i8> %12, i32 396) %95 = call float @llvm.SI.load.const(<16 x i8> %12, i32 400) %96 = call float @llvm.SI.load.const(<16 x i8> %12, i32 404) %97 = call float @llvm.SI.load.const(<16 x i8> %12, i32 408) %98 = call float @llvm.SI.load.const(<16 x i8> %12, i32 412) %99 = call float @llvm.SI.load.const(<16 x i8> %12, i32 416) %100 = call float @llvm.SI.load.const(<16 x i8> %12, i32 420) %101 = call float @llvm.SI.load.const(<16 x i8> %12, i32 424) %102 = call float @llvm.SI.load.const(<16 x i8> %12, i32 428) %103 = call float @llvm.SI.load.const(<16 x i8> %12, i32 432) %104 = call float @llvm.SI.load.const(<16 x i8> %12, i32 436) %105 = call float @llvm.SI.load.const(<16 x i8> %12, i32 440) %106 = call float @llvm.SI.load.const(<16 x i8> %12, i32 444) %107 = call float @llvm.SI.load.const(<16 x i8> %12, i32 448) %108 = call float @llvm.SI.load.const(<16 x i8> %12, i32 452) %109 = call float @llvm.SI.load.const(<16 x i8> %12, i32 456) %110 = call float @llvm.SI.load.const(<16 x i8> %12, i32 460) %111 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %112 = load <16 x i8>, <16 x i8> addrspace(2)* %111, align 16, !tbaa !0 %113 = add i32 %5, %7 %114 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %112, i32 0, i32 %113) %115 = extractelement <4 x float> %114, i32 0 %116 = extractelement <4 x float> %114, i32 1 %117 = extractelement <4 x float> %114, i32 2 %118 = extractelement <4 x float> %114, i32 3 %119 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %120 = load <16 x i8>, <16 x i8> addrspace(2)* %119, align 16, !tbaa !0 %121 = add i32 %5, %7 %122 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %120, i32 0, i32 %121) %123 = extractelement <4 x float> %122, i32 0 %124 = extractelement <4 x float> %122, i32 1 %125 = extractelement <4 x float> %122, i32 2 %126 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %127 = load <16 x i8>, <16 x i8> addrspace(2)* %126, align 16, !tbaa !0 %128 = add i32 %5, %7 %129 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %127, i32 0, i32 %128) %130 = extractelement <4 x float> %129, i32 0 %131 = extractelement <4 x float> %129, i32 1 %132 = fmul float %95, %115 %133 = fmul float %96, %115 %134 = fmul float %97, %115 %135 = fmul float %98, %115 %136 = fmul float %99, %116 %137 = fadd float %136, %132 %138 = fmul float %100, %116 %139 = fadd float %138, %133 %140 = fmul float %101, %116 %141 = fadd float %140, %134 %142 = fmul float %102, %116 %143 = fadd float %142, %135 %144 = fmul float %103, %117 %145 = fadd float %144, %137 %146 = fmul float %104, %117 %147 = fadd float %146, %139 %148 = fmul float %105, %117 %149 = fadd float %148, %141 %150 = fmul float %106, %117 %151 = fadd float %150, %143 %152 = fmul float %107, %118 %153 = fadd float %152, %145 %154 = fmul float %108, %118 %155 = fadd float %154, %147 %156 = fmul float %109, %118 %157 = fadd float %156, %149 %158 = fmul float %110, %118 %159 = fadd float %158, %151 %160 = fmul float %68, %115 %161 = fmul float %69, %115 %162 = fmul float %70, %115 %163 = fmul float %71, %116 %164 = fadd float %163, %160 %165 = fmul float %72, %116 %166 = fadd float %165, %161 %167 = fmul float %73, %116 %168 = fadd float %167, %162 %169 = fmul float %74, %117 %170 = fadd float %169, %164 %171 = fmul float %75, %117 %172 = fadd float %171, %166 %173 = fmul float %76, %117 %174 = fadd float %173, %168 %175 = fmul float %77, %118 %176 = fadd float %175, %170 %177 = fmul float %78, %118 %178 = fadd float %177, %172 %179 = fmul float %79, %118 %180 = fadd float %179, %174 %181 = fmul float %80, %123 %182 = fmul float %83, %123 %183 = fmul float %86, %123 %184 = fmul float %81, %124 %185 = fadd float %184, %181 %186 = fmul float %84, %124 %187 = fadd float %186, %182 %188 = fmul float %87, %124 %189 = fadd float %188, %183 %190 = fmul float %82, %125 %191 = fadd float %190, %185 %192 = fmul float %85, %125 %193 = fadd float %192, %187 %194 = fmul float %88, %125 %195 = fadd float %194, %189 %196 = fmul float %191, %191 %197 = fmul float %193, %193 %198 = fadd float %197, %196 %199 = fmul float %195, %195 %200 = fadd float %198, %199 %201 = call float @llvm.AMDGPU.rsq.clamped.f32(float %200) %202 = fmul float %191, %201 %203 = fmul float %193, %201 %204 = fmul float %195, %201 %205 = fmul float %41, %202 %206 = fmul float %42, %203 %207 = fadd float %205, %206 %208 = fmul float %43, %204 %209 = fadd float %207, %208 %210 = fadd float %209, %44 %211 = fmul float %45, %202 %212 = fmul float %46, %203 %213 = fadd float %211, %212 %214 = fmul float %47, %204 %215 = fadd float %213, %214 %216 = fadd float %215, %48 %217 = fmul float %49, %202 %218 = fmul float %50, %203 %219 = fadd float %217, %218 %220 = fmul float %51, %204 %221 = fadd float %219, %220 %222 = fadd float %221, %52 %223 = fmul float %202, %203 %224 = fmul float %203, %204 %225 = fmul float %204, %204 %226 = fmul float %204, %202 %227 = fmul float %53, %223 %228 = fmul float %54, %224 %229 = fadd float %227, %228 %230 = fmul float %55, %225 %231 = fadd float %229, %230 %232 = fmul float %56, %226 %233 = fadd float %231, %232 %234 = fmul float %57, %223 %235 = fmul float %58, %224 %236 = fadd float %234, %235 %237 = fmul float %59, %225 %238 = fadd float %236, %237 %239 = fmul float %60, %226 %240 = fadd float %238, %239 %241 = fmul float %61, %223 %242 = fmul float %62, %224 %243 = fadd float %241, %242 %244 = fmul float %63, %225 %245 = fadd float %243, %244 %246 = fmul float %64, %226 %247 = fadd float %245, %246 %248 = fsub float %13, %176 %249 = fsub float %14, %176 %250 = fsub float %15, %176 %251 = fsub float %16, %176 %252 = fsub float %17, %178 %253 = fsub float %18, %178 %254 = fsub float %19, %178 %255 = fsub float %20, %178 %256 = fsub float %21, %180 %257 = fsub float %22, %180 %258 = fsub float %23, %180 %259 = fsub float %24, %180 %260 = fmul float %248, %248 %261 = fmul float %249, %249 %262 = fmul float %250, %250 %263 = fmul float %251, %251 %264 = fmul float %252, %252 %265 = fadd float %264, %260 %266 = fmul float %253, %253 %267 = fadd float %266, %261 %268 = fmul float %254, %254 %269 = fadd float %268, %262 %270 = fmul float %255, %255 %271 = fadd float %270, %263 %272 = fmul float %256, %256 %273 = fadd float %272, %265 %274 = fmul float %257, %257 %275 = fadd float %274, %267 %276 = fmul float %258, %258 %277 = fadd float %276, %269 %278 = fmul float %259, %259 %279 = fadd float %278, %271 %280 = fmul float %248, %202 %281 = fmul float %249, %202 %282 = fmul float %250, %202 %283 = fmul float %251, %202 %284 = fmul float %252, %203 %285 = fadd float %284, %280 %286 = fmul float %253, %203 %287 = fadd float %286, %281 %288 = fmul float %254, %203 %289 = fadd float %288, %282 %290 = fmul float %255, %203 %291 = fadd float %290, %283 %292 = fmul float %256, %204 %293 = fadd float %292, %285 %294 = fmul float %257, %204 %295 = fadd float %294, %287 %296 = fmul float %258, %204 %297 = fadd float %296, %289 %298 = fmul float %259, %204 %299 = fadd float %298, %291 %300 = call float @llvm.AMDGPU.rsq.clamped.f32(float %273) %301 = call float @llvm.AMDGPU.rsq.clamped.f32(float %275) %302 = call float @llvm.AMDGPU.rsq.clamped.f32(float %277) %303 = call float @llvm.AMDGPU.rsq.clamped.f32(float %279) %304 = fmul float %293, %300 %305 = fmul float %295, %301 %306 = fmul float %297, %302 %307 = fmul float %299, %303 %308 = call float @llvm.maxnum.f32(float %304, float 0.000000e+00) %309 = call float @llvm.maxnum.f32(float %305, float 0.000000e+00) %310 = call float @llvm.maxnum.f32(float %306, float 0.000000e+00) %311 = call float @llvm.maxnum.f32(float %307, float 0.000000e+00) %312 = fmul float %273, %25 %313 = fadd float %312, 1.000000e+00 %314 = fmul float %275, %26 %315 = fadd float %314, 1.000000e+00 %316 = fmul float %277, %27 %317 = fadd float %316, 1.000000e+00 %318 = fmul float %279, %28 %319 = fadd float %318, 1.000000e+00 %320 = fdiv float 1.000000e+00, %313 %321 = fdiv float 1.000000e+00, %315 %322 = fdiv float 1.000000e+00, %317 %323 = fdiv float 1.000000e+00, %319 %324 = fmul float %308, %320 %325 = fmul float %309, %321 %326 = fmul float %310, %322 %327 = fmul float %311, %323 %328 = fmul float %203, %203 %329 = fmul float %202, %202 %330 = fsub float %329, %328 %331 = fmul float %65, %330 %332 = fadd float %331, %233 %333 = fmul float %66, %330 %334 = fadd float %333, %240 %335 = fmul float %67, %330 %336 = fadd float %335, %247 %337 = fadd float %332, %210 %338 = fadd float %334, %216 %339 = fadd float %336, %222 %340 = fmul float %29, %324 %341 = fmul float %30, %324 %342 = fmul float %31, %324 %343 = fmul float %32, %325 %344 = fadd float %343, %340 %345 = fmul float %33, %325 %346 = fadd float %345, %341 %347 = fmul float %34, %325 %348 = fadd float %347, %342 %349 = fmul float %35, %326 %350 = fadd float %349, %344 %351 = fmul float %36, %326 %352 = fadd float %351, %346 %353 = fmul float %37, %326 %354 = fadd float %353, %348 %355 = fmul float %38, %327 %356 = fadd float %355, %350 %357 = fmul float %39, %327 %358 = fadd float %357, %352 %359 = fmul float %40, %327 %360 = fadd float %359, %354 %361 = fadd float %337, %356 %362 = fadd float %338, %358 %363 = fadd float %339, %360 %364 = fmul float %130, %91 %365 = fadd float %364, %93 %366 = fmul float %131, %92 %367 = fadd float %366, %94 %368 = fmul float %157, %89 %369 = fadd float %368, %90 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %365, float %367, float %369, float %202) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %203, float %204, float %361, float %362) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %363, float %362, float %363, float %327) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %153, float %155, float %157, float %159) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[8:11], v0, s[8:11], 0 idxen ; E00C2000 80020800 s_load_dwordx4 s[76:79], s[2:3], 0x0 ; C0A60300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s33, s[76:79], 0x0 ; C210CD00 s_buffer_load_dword s26, s[76:79], 0x1 ; C20D4D01 s_buffer_load_dword s8, s[76:79], 0x2 ; C2044D02 s_buffer_load_dword s4, s[76:79], 0x3 ; C2024D03 s_buffer_load_dword s37, s[76:79], 0x4 ; C212CD04 s_buffer_load_dword s27, s[76:79], 0x5 ; C20DCD05 s_buffer_load_dword s11, s[76:79], 0x6 ; C205CD06 s_buffer_load_dword s5, s[76:79], 0x7 ; C202CD07 s_buffer_load_dword s38, s[76:79], 0x8 ; C2134D08 s_buffer_load_dword s31, s[76:79], 0x9 ; C20FCD09 s_buffer_load_dword s18, s[76:79], 0xa ; C2094D0A s_buffer_load_dword s6, s[76:79], 0xb ; C2034D0B s_buffer_load_dword s39, s[76:79], 0xc ; C213CD0C s_buffer_load_dword s32, s[76:79], 0xd ; C2104D0D s_buffer_load_dword s19, s[76:79], 0xe ; C209CD0E s_buffer_load_dword s0, s[76:79], 0xf ; C2004D0F s_buffer_load_dword s34, s[76:79], 0x10 ; C2114D10 s_buffer_load_dword s35, s[76:79], 0x11 ; C211CD11 s_buffer_load_dword s36, s[76:79], 0x12 ; C2124D12 s_buffer_load_dword s28, s[76:79], 0x14 ; C20E4D14 s_buffer_load_dword s29, s[76:79], 0x15 ; C20ECD15 s_buffer_load_dword s30, s[76:79], 0x16 ; C20F4D16 s_buffer_load_dword s14, s[76:79], 0x18 ; C2074D18 s_buffer_load_dword s15, s[76:79], 0x19 ; C207CD19 s_buffer_load_dword s16, s[76:79], 0x1a ; C2084D1A s_buffer_load_dword s1, s[76:79], 0x1c ; C200CD1C s_buffer_load_dword s2, s[76:79], 0x1d ; C2014D1D s_buffer_load_dword s3, s[76:79], 0x1e ; C201CD1E s_buffer_load_dword s12, s[76:79], 0x20 ; C2064D20 s_buffer_load_dword s20, s[76:79], 0x21 ; C20A4D21 s_buffer_load_dword s9, s[76:79], 0x22 ; C204CD22 s_buffer_load_dword s7, s[76:79], 0x23 ; C203CD23 s_buffer_load_dword s21, s[76:79], 0x24 ; C20ACD24 s_buffer_load_dword s23, s[76:79], 0x25 ; C20BCD25 s_buffer_load_dword s13, s[76:79], 0x26 ; C206CD26 s_buffer_load_dword s10, s[76:79], 0x27 ; C2054D27 s_buffer_load_dword s24, s[76:79], 0x28 ; C20C4D28 s_buffer_load_dword s25, s[76:79], 0x29 ; C20CCD29 s_buffer_load_dword s22, s[76:79], 0x2a ; C20B4D2A s_buffer_load_dword s17, s[76:79], 0x2b ; C208CD2B s_buffer_load_dword s46, s[76:79], 0x2c ; C2174D2C s_buffer_load_dword s51, s[76:79], 0x2d ; C219CD2D s_buffer_load_dword s41, s[76:79], 0x2e ; C214CD2E s_buffer_load_dword s40, s[76:79], 0x2f ; C2144D2F s_buffer_load_dword s49, s[76:79], 0x30 ; C218CD30 s_buffer_load_dword s53, s[76:79], 0x31 ; C21ACD31 s_buffer_load_dword s48, s[76:79], 0x32 ; C2184D32 s_buffer_load_dword s42, s[76:79], 0x33 ; C2154D33 s_buffer_load_dword s52, s[76:79], 0x34 ; C21A4D34 s_buffer_load_dword s54, s[76:79], 0x35 ; C21B4D35 s_buffer_load_dword s50, s[76:79], 0x36 ; C2194D36 s_buffer_load_dword s47, s[76:79], 0x37 ; C217CD37 s_buffer_load_dword s43, s[76:79], 0x38 ; C215CD38 s_buffer_load_dword s44, s[76:79], 0x39 ; C2164D39 s_buffer_load_dword s45, s[76:79], 0x3a ; C216CD3A s_buffer_load_dword s69, s[76:79], 0x3c ; C222CD3C s_buffer_load_dword s70, s[76:79], 0x3d ; C2234D3D s_buffer_load_dword s71, s[76:79], 0x3e ; C223CD3E s_buffer_load_dword s63, s[76:79], 0x40 ; C21FCD40 s_buffer_load_dword s64, s[76:79], 0x41 ; C2204D41 s_buffer_load_dword s65, s[76:79], 0x42 ; C220CD42 s_buffer_load_dword s59, s[76:79], 0x44 ; C21DCD44 s_buffer_load_dword s60, s[76:79], 0x45 ; C21E4D45 s_buffer_load_dword s61, s[76:79], 0x46 ; C21ECD46 s_buffer_load_dword s57, s[76:79], 0x48 ; C21CCD48 s_buffer_load_dword s55, s[76:79], 0x49 ; C21BCD49 s_buffer_load_dword s56, s[76:79], 0x4a ; C21C4D4A s_buffer_load_dword s74, s[76:79], 0x4c ; C2254D4C s_buffer_load_dword s68, s[76:79], 0x4d ; C2224D4D s_buffer_load_dword s62, s[76:79], 0x4e ; C21F4D4E s_buffer_load_dword s75, s[76:79], 0x50 ; C225CD50 s_buffer_load_dword s72, s[76:79], 0x51 ; C2244D51 s_buffer_load_dword s66, s[76:79], 0x52 ; C2214D52 s_buffer_load_dword s80, s[76:79], 0x54 ; C2284D54 s_buffer_load_dword s73, s[76:79], 0x55 ; C224CD55 s_buffer_load_dword s67, s[76:79], 0x56 ; C221CD56 s_buffer_load_dword s58, s[76:79], 0x5e ; C21D4D5E s_buffer_load_dword s81, s[76:79], 0x5f ; C228CD5F s_buffer_load_dword s82, s[76:79], 0x60 ; C2294D60 s_buffer_load_dword s83, s[76:79], 0x61 ; C229CD61 s_buffer_load_dword s84, s[76:79], 0x62 ; C22A4D62 s_buffer_load_dword s85, s[76:79], 0x63 ; C22ACD63 s_buffer_load_dword s86, s[76:79], 0x64 ; C22B4D64 s_buffer_load_dword s87, s[76:79], 0x65 ; C22BCD65 s_buffer_load_dword s88, s[76:79], 0x66 ; C22C4D66 s_buffer_load_dword s89, s[76:79], 0x67 ; C22CCD67 s_buffer_load_dword s90, s[76:79], 0x68 ; C22D4D68 s_buffer_load_dword s91, s[76:79], 0x69 ; C22DCD69 s_buffer_load_dword s92, s[76:79], 0x6a ; C22E4D6A s_buffer_load_dword s93, s[76:79], 0x6b ; C22ECD6B s_buffer_load_dword s94, s[76:79], 0x6c ; C22F4D6C s_buffer_load_dword s95, s[76:79], 0x6d ; C22FCD6D s_buffer_load_dword s96, s[76:79], 0x6e ; C2304D6E s_buffer_load_dword s97, s[76:79], 0x6f ; C230CD6F s_buffer_load_dword s98, s[76:79], 0x70 ; C2314D70 s_buffer_load_dword s99, s[76:79], 0x71 ; C231CD71 s_buffer_load_dword s100, s[76:79], 0x72 ; C2324D72 s_buffer_load_dword s76, s[76:79], 0x73 ; C2264D73 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s84 ; 7E000254 v_mov_b32_e32 v10, s85 ; 7E140255 v_mov_b32_e32 v11, s81 ; 7E160251 v_mul_f32_e32 v12, s86, v1 ; 10180256 v_mul_f32_e32 v13, s87, v1 ; 101A0257 v_mul_f32_e32 v14, s88, v1 ; 101C0258 v_mul_f32_e32 v15, s89, v1 ; 101E0259 v_mul_f32_e32 v16, s69, v1 ; 10200245 v_mul_f32_e32 v17, s70, v1 ; 10220246 v_mul_f32_e32 v1, s71, v1 ; 10020247 v_mul_f32_e32 v18, s74, v5 ; 10240A4A v_mul_f32_e32 v19, s75, v5 ; 10260A4B v_mul_f32_e32 v5, s80, v5 ; 100A0A50 v_mac_f32_e32 v0, s82, v8 ; 3E001052 v_mac_f32_e32 v10, s83, v9 ; 3E141253 v_mac_f32_e32 v12, s90, v2 ; 3E18045A v_mac_f32_e32 v13, s91, v2 ; 3E1A045B v_mac_f32_e32 v14, s92, v2 ; 3E1C045C v_mac_f32_e32 v15, s93, v2 ; 3E1E045D v_mac_f32_e32 v16, s63, v2 ; 3E20043F v_mac_f32_e32 v17, s64, v2 ; 3E220440 v_mac_f32_e32 v1, s65, v2 ; 3E020441 v_mac_f32_e32 v18, s68, v6 ; 3E240C44 v_mac_f32_e32 v19, s72, v6 ; 3E260C48 v_mac_f32_e32 v5, s73, v6 ; 3E0A0C49 v_mac_f32_e32 v12, s94, v3 ; 3E18065E v_mac_f32_e32 v13, s95, v3 ; 3E1A065F v_mac_f32_e32 v14, s96, v3 ; 3E1C0660 v_mac_f32_e32 v15, s97, v3 ; 3E1E0661 v_mac_f32_e32 v16, s59, v3 ; 3E20063B v_mac_f32_e32 v17, s60, v3 ; 3E22063C v_mac_f32_e32 v1, s61, v3 ; 3E02063D v_mac_f32_e32 v18, s62, v7 ; 3E240E3E v_mac_f32_e32 v19, s66, v7 ; 3E260E42 v_mac_f32_e32 v5, s67, v7 ; 3E0A0E43 v_mac_f32_e32 v12, s98, v4 ; 3E180862 v_mac_f32_e32 v13, s99, v4 ; 3E1A0863 v_mac_f32_e32 v14, s100, v4 ; 3E1C0864 v_mac_f32_e32 v15, s76, v4 ; 3E1E084C v_mac_f32_e32 v16, s57, v4 ; 3E200839 v_mul_f32_e32 v2, v18, v18 ; 10042512 v_mac_f32_e32 v2, v19, v19 ; 3E042713 v_mac_f32_e32 v2, v5, v5 ; 3E040B05 v_rsq_clamp_f32_e32 v2, v2 ; 7E045902 v_mac_f32_e32 v17, s55, v4 ; 3E220837 v_mac_f32_e32 v1, s56, v4 ; 3E020838 v_mac_f32_e32 v11, s58, v14 ; 3E161C3A v_mul_f32_e32 v3, v2, v19 ; 10062702 v_mul_f32_e32 v4, v2, v5 ; 10080B02 v_mul_f32_e32 v5, v4, v3 ; 100A0704 v_mul_f32_e32 v6, s51, v5 ; 100C0A33 v_mul_f32_e32 v7, s53, v5 ; 100E0A35 v_mul_f32_e32 v5, s54, v5 ; 100A0A36 v_mul_f32_e32 v2, v2, v18 ; 10042502 v_mul_f32_e32 v8, v3, v2 ; 10100503 v_mac_f32_e32 v6, s46, v8 ; 3E0C102E v_mac_f32_e32 v7, s49, v8 ; 3E0E1031 v_mac_f32_e32 v5, s52, v8 ; 3E0A1034 v_mul_f32_e32 v8, v4, v4 ; 10100904 v_mac_f32_e32 v6, s41, v8 ; 3E0C1029 v_mac_f32_e32 v7, s48, v8 ; 3E0E1030 v_mac_f32_e32 v5, s50, v8 ; 3E0A1032 v_mul_f32_e32 v8, v2, v4 ; 10100902 v_mac_f32_e32 v6, s40, v8 ; 3E0C1028 v_mac_f32_e32 v7, s42, v8 ; 3E0E102A exp 15, 32, 0, 0, 0, v0, v10, v11, v2 ; F800020F 020B0A00 v_mac_f32_e32 v5, s47, v8 ; 3E0A102F s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, v3, v3 ; 10000703 v_mad_f32 v0, v2, v2, -v0 ; D2820000 84020502 v_mac_f32_e32 v6, s43, v0 ; 3E0C002B v_mac_f32_e32 v7, s44, v0 ; 3E0E002C v_mac_f32_e32 v5, s45, v0 ; 3E0A002D v_sub_f32_e32 v0, s33, v16 ; 08002021 v_sub_f32_e32 v8, s37, v17 ; 08102225 v_sub_f32_e32 v9, s38, v1 ; 08120226 v_mul_f32_e32 v10, v0, v0 ; 10140100 v_mac_f32_e32 v10, v8, v8 ; 3E141108 v_mac_f32_e32 v10, v9, v9 ; 3E141309 v_mad_f32 v11, v10, s39, 1.0 ; D282000B 03C84F0A v_rsq_clamp_f32_e32 v10, v10 ; 7E14590A v_mul_f32_e32 v0, v2, v0 ; 10000102 v_mac_f32_e32 v0, v3, v8 ; 3E001103 v_rcp_f32_e32 v8, v11 ; 7E10550B v_mac_f32_e32 v0, v4, v9 ; 3E001304 v_mul_f32_e32 v0, v10, v0 ; 1000010A v_max_f32_e32 v0, 0, v0 ; 20000080 v_mul_f32_e32 v0, v8, v0 ; 10000108 v_mul_f32_e32 v8, s34, v0 ; 10100022 v_mul_f32_e32 v9, s35, v0 ; 10120023 v_mul_f32_e32 v0, s36, v0 ; 10000024 v_sub_f32_e32 v10, s26, v16 ; 0814201A v_sub_f32_e32 v11, s27, v17 ; 0816221B v_sub_f32_e32 v18, s31, v1 ; 0824021F v_mul_f32_e32 v19, v10, v10 ; 1026150A v_mac_f32_e32 v19, v11, v11 ; 3E26170B v_mac_f32_e32 v19, v18, v18 ; 3E262512 v_mad_f32 v20, v19, s32, 1.0 ; D2820014 03C84113 v_rsq_clamp_f32_e32 v19, v19 ; 7E265913 v_mul_f32_e32 v10, v2, v10 ; 10141502 v_mac_f32_e32 v10, v3, v11 ; 3E141703 v_rcp_f32_e32 v11, v20 ; 7E165514 v_mac_f32_e32 v10, v4, v18 ; 3E142504 v_mul_f32_e32 v10, v19, v10 ; 10141513 v_max_f32_e32 v10, 0, v10 ; 20141480 v_mul_f32_e32 v10, v11, v10 ; 1014150B v_mac_f32_e32 v8, s28, v10 ; 3E10141C v_mac_f32_e32 v9, s29, v10 ; 3E12141D v_mac_f32_e32 v0, s30, v10 ; 3E00141E v_sub_f32_e32 v10, s8, v16 ; 08142008 v_sub_f32_e32 v11, s11, v17 ; 0816220B v_sub_f32_e32 v18, s18, v1 ; 08240212 v_mul_f32_e32 v19, v10, v10 ; 1026150A v_mac_f32_e32 v19, v11, v11 ; 3E26170B v_mac_f32_e32 v19, v18, v18 ; 3E262512 v_mad_f32 v20, v19, s19, 1.0 ; D2820014 03C82713 v_rsq_clamp_f32_e32 v19, v19 ; 7E265913 v_mul_f32_e32 v10, v2, v10 ; 10141502 v_mac_f32_e32 v10, v3, v11 ; 3E141703 v_rcp_f32_e32 v11, v20 ; 7E165514 v_mac_f32_e32 v10, v4, v18 ; 3E142504 v_mul_f32_e32 v10, v19, v10 ; 10141513 v_max_f32_e32 v10, 0, v10 ; 20141480 v_mul_f32_e32 v10, v11, v10 ; 1014150B v_mac_f32_e32 v8, s14, v10 ; 3E10140E v_mac_f32_e32 v9, s15, v10 ; 3E12140F v_mac_f32_e32 v0, s16, v10 ; 3E001410 v_sub_f32_e32 v10, s4, v16 ; 08142004 v_sub_f32_e32 v11, s5, v17 ; 08162205 v_sub_f32_e32 v1, s6, v1 ; 08020206 v_mul_f32_e32 v16, s20, v3 ; 10200614 v_mac_f32_e32 v16, s12, v2 ; 3E20040C v_mul_f32_e32 v17, s23, v3 ; 10220617 v_mac_f32_e32 v17, s21, v2 ; 3E220415 v_mul_f32_e32 v18, s25, v3 ; 10240619 v_mac_f32_e32 v18, s24, v2 ; 3E240418 v_mac_f32_e32 v16, s9, v4 ; 3E200809 v_mac_f32_e32 v17, s13, v4 ; 3E22080D v_mac_f32_e32 v18, s22, v4 ; 3E240816 v_add_f32_e32 v16, s7, v16 ; 06202007 v_add_f32_e32 v17, s10, v17 ; 0622220A v_add_f32_e32 v18, s17, v18 ; 06242411 v_mul_f32_e32 v2, v2, v10 ; 10041502 v_mul_f32_e32 v10, v10, v10 ; 1014150A v_mac_f32_e32 v10, v11, v11 ; 3E14170B v_mac_f32_e32 v10, v1, v1 ; 3E140301 v_mad_f32 v19, v10, s0, 1.0 ; D2820013 03C8010A v_rsq_clamp_f32_e32 v10, v10 ; 7E14590A v_mac_f32_e32 v2, v3, v11 ; 3E041703 v_rcp_f32_e32 v11, v19 ; 7E165513 v_mac_f32_e32 v2, v4, v1 ; 3E040304 v_mul_f32_e32 v1, v10, v2 ; 1002050A v_max_f32_e32 v1, 0, v1 ; 20020280 v_mul_f32_e32 v1, v11, v1 ; 1002030B v_mac_f32_e32 v8, s1, v1 ; 3E100201 v_mac_f32_e32 v9, s2, v1 ; 3E120202 v_mac_f32_e32 v0, s3, v1 ; 3E000203 v_add_f32_e32 v2, v16, v6 ; 06040D10 v_add_f32_e32 v6, v17, v7 ; 060C0F11 v_add_f32_e32 v2, v8, v2 ; 06040508 v_add_f32_e32 v6, v9, v6 ; 060C0D09 exp 15, 33, 0, 0, 0, v3, v4, v2, v6 ; F800021F 06020403 s_waitcnt expcnt(0) ; BF8C070F v_add_f32_e32 v2, v18, v5 ; 06040B12 v_add_f32_e32 v0, v0, v2 ; 06000500 exp 15, 34, 0, 0, 0, v0, v6, v0, v1 ; F800022F 01000600 exp 15, 12, 0, 1, 0, v12, v13, v14, v15 ; F80008CF 0F0E0D0C s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 104 VGPRS: 24 Code Size: 1160 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[0..3] DCL CONST[5] DCL TEMP[0..4], LOCAL IMM[0] FLT32 { 2.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].x, IN[0].wwww 1: MOV TEMP[0].yz, IN[1].yxyy 2: MOV TEMP[1].xy, IN[1].zwzz 3: MOV TEMP[1].z, IN[2].xxxx 4: MOV TEMP[2].xy, IN[0].xyyy 5: TEX TEMP[2], TEMP[2], SAMP[0], 2D 6: ADD TEMP[3].x, IMM[0].xxxx, -CONST[3].wwww 7: MUL TEMP[3].xyz, TEMP[2].xyzz, TEMP[3].xxxx 8: FSLT TEMP[4].x, TEMP[2].wwww, CONST[5].xxxx 9: AND TEMP[4].x, TEMP[4].xxxx, IMM[0].yyyy 10: KILL_IF -TEMP[4].xxxx 11: MUL TEMP[4].xyz, TEMP[3].xyzz, CONST[2].xyzz 12: DP3 TEMP[0].x, TEMP[0].xyzz, CONST[0].xyzz 13: MAX TEMP[0].x, IMM[0].zzzz, TEMP[0].xxxx 14: MUL TEMP[0].xyz, TEMP[4].xyzz, TEMP[0].xxxx 15: MAD TEMP[0].xyz, TEMP[3].xyzz, TEMP[1].xyzz, TEMP[0].xyzz 16: MOV TEMP[1].w, TEMP[2].wwww 17: MOV_SAT TEMP[2].x, IN[0].zzzz 18: LRP TEMP[1].xyz, TEMP[2].xxxx, TEMP[0].xyzz, CONST[1].xyzz 19: MOV OUT[0], TEMP[1] 20: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %35 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %36 = load <32 x i8>, <32 x i8> addrspace(2)* %35, align 32, !tbaa !0 %37 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %40 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %41 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %42 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %43 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %44 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %45 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %46 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %47 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %48 = bitcast float %39 to i32 %49 = bitcast float %40 to i32 %50 = insertelement <2 x i32> undef, i32 %48, i32 0 %51 = insertelement <2 x i32> %50, i32 %49, i32 1 %52 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %51, <32 x i8> %36, <16 x i8> %38, i32 2) %53 = extractelement <4 x float> %52, i32 0 %54 = extractelement <4 x float> %52, i32 1 %55 = extractelement <4 x float> %52, i32 2 %56 = extractelement <4 x float> %52, i32 3 %57 = fsub float 2.000000e+00, %33 %58 = fmul float %53, %57 %59 = fmul float %54, %57 %60 = fmul float %55, %57 %61 = fcmp olt float %56, %34 %62 = select i1 %61, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %62) %63 = fmul float %58, %30 %64 = fmul float %59, %31 %65 = fmul float %60, %32 %66 = fmul float %42, %24 %67 = fmul float %43, %25 %68 = fadd float %67, %66 %69 = fmul float %44, %26 %70 = fadd float %68, %69 %71 = call float @llvm.maxnum.f32(float %70, float 0.000000e+00) %72 = fmul float %63, %71 %73 = fmul float %64, %71 %74 = fmul float %65, %71 %75 = fmul float %58, %45 %76 = fadd float %75, %72 %77 = fmul float %59, %46 %78 = fadd float %77, %73 %79 = fmul float %60, %47 %80 = fadd float %79, %74 %81 = call float @llvm.AMDIL.clamp.(float %41, float 0.000000e+00, float 1.000000e+00) %82 = call float @llvm.AMDGPU.lrp(float %81, float %76, float %27) %83 = call float @llvm.AMDGPU.lrp(float %81, float %78, float %28) %84 = call float @llvm.AMDGPU.lrp(float %81, float %80, float %29) %85 = call i32 @llvm.SI.packf16(float %82, float %83) %86 = bitcast i32 %85 to float %87 = call i32 @llvm.SI.packf16(float %84, float %56) %88 = bitcast i32 %87 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %86, float %88, float %86, float %88) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600 v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601 v_interp_p1_f32 v9, v0, 3, 1, [m0] ; C8240700 v_interp_p2_f32 v9, [v9], v1, 3, 1, [m0] ; C8250701 v_interp_p1_f32 v0, v0, 0, 2, [m0] ; C8000800 v_interp_p2_f32 v0, [v0], v1, 0, 2, [m0] ; C8010801 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[8:11] ; F0800F00 00430A02 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_buffer_load_dword s5, s[0:3], 0x14 ; C2028114 s_buffer_load_dword s6, s[0:3], 0x6 ; C2030106 s_buffer_load_dword s7, s[0:3], 0x8 ; C2038108 s_buffer_load_dword s8, s[0:3], 0x9 ; C2040109 s_buffer_load_dword s9, s[0:3], 0xa ; C204810A s_buffer_load_dword s10, s[0:3], 0x0 ; C2050100 s_buffer_load_dword s11, s[0:3], 0x1 ; C2058101 s_buffer_load_dword s12, s[0:3], 0x2 ; C2060102 s_buffer_load_dword s13, s[0:3], 0x4 ; C2068104 s_buffer_load_dword s0, s[0:3], 0x5 ; C2000105 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_cmp_gt_f32_e32 vcc, s5, v13 ; 7C081A05 v_cndmask_b32_e64 v1, 0, -1.0, vcc ; D2000001 01A9E680 v_sub_f32_e64 v2, 2.0, s4 ; D2080002 000008F4 v_mul_f32_e32 v3, v2, v10 ; 10061502 v_mul_f32_e32 v10, v2, v11 ; 10141702 v_mul_f32_e32 v2, v2, v12 ; 10041902 v_cmpx_le_f32_e32 vcc, 0, v1 ; 7C260280 v_mul_f32_e32 v1, s10, v5 ; 10020A0A v_mac_f32_e32 v1, s11, v6 ; 3E020C0B v_mac_f32_e32 v1, s12, v7 ; 3E020E0C v_mul_f32_e32 v5, s9, v2 ; 100A0409 v_max_f32_e32 v1, 0, v1 ; 20020280 v_mul_f32_e32 v5, v1, v5 ; 100A0B01 v_mac_f32_e32 v5, v0, v2 ; 3E0A0500 v_add_f32_e64 v0, 0, v4 clamp ; D2060800 00020880 v_sub_f32_e32 v2, 1.0, v0 ; 080400F2 v_mul_f32_e32 v4, s6, v2 ; 10080406 v_mac_f32_e32 v4, v5, v0 ; 3E080105 v_cvt_pkrtz_f16_f32_e32 v4, v4, v13 ; 5E081B04 v_mul_f32_e32 v5, s7, v3 ; 100A0607 v_mul_f32_e32 v5, v1, v5 ; 100A0B01 v_mac_f32_e32 v5, v8, v3 ; 3E0A0708 v_mul_f32_e32 v3, s8, v10 ; 10061408 v_mul_f32_e32 v1, v1, v3 ; 10020701 v_mac_f32_e32 v1, v9, v10 ; 3E021509 v_mul_f32_e32 v3, s13, v2 ; 1006040D v_mac_f32_e32 v3, v5, v0 ; 3E060105 v_mul_f32_e32 v2, s0, v2 ; 10040400 v_mac_f32_e32 v2, v1, v0 ; 3E040101 v_cvt_pkrtz_f16_f32_e32 v0, v3, v2 ; 5E000503 exp 15, 0, 1, 1, 1, v0, v4, v0, v4 ; F8001C0F 04000400 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 16 Code Size: 296 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL CONST[0..3] DCL TEMP[0], LOCAL 0: MUL TEMP[0], CONST[0], IN[0].xxxx 1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0] 4: MOV OUT[0], TEMP[0] 5: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = fmul float %13, %33 %38 = fmul float %14, %33 %39 = fmul float %15, %33 %40 = fmul float %16, %33 %41 = fmul float %17, %34 %42 = fadd float %41, %37 %43 = fmul float %18, %34 %44 = fadd float %43, %38 %45 = fmul float %19, %34 %46 = fadd float %45, %39 %47 = fmul float %20, %34 %48 = fadd float %47, %40 %49 = fmul float %21, %35 %50 = fadd float %49, %42 %51 = fmul float %22, %35 %52 = fadd float %51, %44 %53 = fmul float %23, %35 %54 = fadd float %53, %46 %55 = fmul float %24, %35 %56 = fadd float %55, %48 %57 = fmul float %25, %36 %58 = fadd float %57, %50 %59 = fmul float %26, %36 %60 = fadd float %59, %52 %61 = fmul float %27, %36 %62 = fadd float %61, %54 %63 = fmul float %28, %36 %64 = fadd float %63, %56 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %58, float %60, float %62, float %64) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v0 ; 10080004 v_mac_f32_e32 v4, s8, v1 ; 3E080208 v_mul_f32_e32 v5, s5, v0 ; 100A0005 v_mac_f32_e32 v5, s9, v1 ; 3E0A0209 v_mul_f32_e32 v6, s6, v0 ; 100C0006 v_mac_f32_e32 v6, s10, v1 ; 3E0C020A v_mul_f32_e32 v0, s7, v0 ; 10000007 v_mac_f32_e32 v0, s11, v1 ; 3E00020B v_mac_f32_e32 v4, s12, v2 ; 3E08040C v_mac_f32_e32 v5, s13, v2 ; 3E0A040D v_mac_f32_e32 v6, s14, v2 ; 3E0C040E v_mac_f32_e32 v0, s15, v2 ; 3E00040F v_mac_f32_e32 v4, s16, v3 ; 3E080610 v_mac_f32_e32 v5, s17, v3 ; 3E0A0611 v_mac_f32_e32 v6, s18, v3 ; 3E0C0612 v_mac_f32_e32 v0, s0, v3 ; 3E000600 exp 15, 12, 0, 1, 0, v4, v5, v6, v0 ; F80008CF 00060504 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 8 Code Size: 172 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL OUT[0], COLOR IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MOV OUT[0], IMM[0].xxxx 1: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00) %23 = bitcast i32 %22 to float %24 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00) %25 = bitcast i32 %24 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %23, float %25, float %23, float %25) ret void } ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: v_cvt_pkrtz_f16_f32_e64 v0, 0, 0 ; D25E0000 00010080 exp 15, 0, 1, 1, 1, v0, v0, v0, v0 ; F8001C0F 00000000 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 8 VGPRS: 4 Code Size: 20 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL OUT[5], GENERIC[4] DCL OUT[6], GENERIC[5] DCL OUT[7], GENERIC[6] DCL CONST[0..19] DCL TEMP[0..9], LOCAL IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MUL TEMP[0], CONST[5], IN[0].xxxx 1: MAD TEMP[0], CONST[6], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[7], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0].xyz, CONST[8], IN[0].wwww, TEMP[0] 4: MUL TEMP[1], CONST[16], IN[0].xxxx 5: MAD TEMP[1], CONST[17], IN[0].yyyy, TEMP[1] 6: MAD TEMP[1], CONST[18], IN[0].zzzz, TEMP[1] 7: MAD TEMP[1], CONST[19], IN[0].wwww, TEMP[1] 8: MAD TEMP[2].xy, IN[2].xyyy, CONST[13].xyyy, CONST[13].zwww 9: FSEQ TEMP[3].x, CONST[15].xxxx, IMM[0].xxxx 10: UIF TEMP[3].xxxx :0 11: MOV TEMP[3].xy, IN[2].xyxx 12: ELSE :0 13: MOV TEMP[3].xy, IN[3].xyxx 14: ENDIF 15: MAD TEMP[3].xy, TEMP[3].xyyy, CONST[14].xyyy, CONST[14].zwww 16: MOV TEMP[2].zw, TEMP[3].yyxy 17: MOV TEMP[3].x, CONST[9].xxxx 18: MOV TEMP[3].y, CONST[10].xxxx 19: MOV TEMP[3].z, CONST[11].xxxx 20: MOV TEMP[4].x, CONST[9].yyyy 21: MOV TEMP[4].y, CONST[10].yyyy 22: MOV TEMP[4].z, CONST[11].yyyy 23: MOV TEMP[5].x, CONST[9].zzzz 24: MOV TEMP[5].y, CONST[10].zzzz 25: MOV TEMP[5].z, CONST[11].zzzz 26: MUL TEMP[3].xyz, TEMP[3].xyzz, IN[1].xxxx 27: MAD TEMP[3].xyz, TEMP[4].xyzz, IN[1].yyyy, TEMP[3].xyzz 28: MAD TEMP[3].xyz, TEMP[5].xyzz, IN[1].zzzz, TEMP[3].xyzz 29: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz 30: RSQ TEMP[4].x, TEMP[4].xxxx 31: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx 32: MUL TEMP[4].xyz, CONST[5].xyzz, IN[4].xxxx 33: MAD TEMP[4].xyz, CONST[6].xyzz, IN[4].yyyy, TEMP[4].xyzz 34: MAD TEMP[4].xyz, CONST[7].xyzz, IN[4].zzzz, TEMP[4].xyzz 35: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz 36: RSQ TEMP[5].x, TEMP[5].xxxx 37: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx 38: MUL TEMP[5].xyz, TEMP[3].zxyy, TEMP[4].yzxx 39: MAD TEMP[5].xyz, TEMP[3].yzxx, TEMP[4].zxyy, -TEMP[5].xyzz 40: MUL TEMP[5].xyz, TEMP[5].xyzz, IN[4].wwww 41: MOV TEMP[4].xyz, TEMP[4].xyzx 42: MOV TEMP[5].xyz, TEMP[5].xyzx 43: MOV TEMP[6].xyz, TEMP[3].xyzx 44: MUL TEMP[7], TEMP[3].xyzz, TEMP[3].yzzx 45: DP4 TEMP[8].x, CONST[1], TEMP[7] 46: DP4 TEMP[9].x, CONST[2], TEMP[7] 47: MOV TEMP[8].y, TEMP[9].xxxx 48: DP4 TEMP[7].x, CONST[3], TEMP[7] 49: MOV TEMP[8].z, TEMP[7].xxxx 50: MUL TEMP[7].x, TEMP[3].yyyy, TEMP[3].yyyy 51: MAD TEMP[3].x, TEMP[3].xxxx, TEMP[3].xxxx, -TEMP[7].xxxx 52: MAD TEMP[3].xyz, CONST[4].xyzz, TEMP[3].xxxx, TEMP[8].xyzz 53: ADD TEMP[7].xyz, TEMP[0].xyzz, -CONST[0].xyzz 54: MOV TEMP[7].yzw, TEMP[7].yxyz 55: MOV TEMP[7].x, TEMP[1].zzzz 56: MOV TEMP[0].xyz, TEMP[0].xyzx 57: MOV OUT[7], TEMP[0] 58: MOV OUT[1], TEMP[2] 59: MOV OUT[3], TEMP[5] 60: MOV OUT[2], TEMP[4] 61: MOV OUT[4], TEMP[6] 62: MOV OUT[5], TEMP[3] 63: MOV OUT[0], TEMP[1] 64: MOV OUT[6], TEMP[7] 65: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236) %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240) %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256) %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260) %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264) %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 268) %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272) %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276) %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280) %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284) %72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288) %73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292) %74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296) %75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300) %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304) %77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308) %78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312) %79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316) %80 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %81 = load <16 x i8>, <16 x i8> addrspace(2)* %80, align 16, !tbaa !0 %82 = add i32 %5, %7 %83 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %81, i32 0, i32 %82) %84 = extractelement <4 x float> %83, i32 0 %85 = extractelement <4 x float> %83, i32 1 %86 = extractelement <4 x float> %83, i32 2 %87 = extractelement <4 x float> %83, i32 3 %88 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %89 = load <16 x i8>, <16 x i8> addrspace(2)* %88, align 16, !tbaa !0 %90 = add i32 %5, %7 %91 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %89, i32 0, i32 %90) %92 = extractelement <4 x float> %91, i32 0 %93 = extractelement <4 x float> %91, i32 1 %94 = extractelement <4 x float> %91, i32 2 %95 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %96 = load <16 x i8>, <16 x i8> addrspace(2)* %95, align 16, !tbaa !0 %97 = add i32 %5, %7 %98 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %96, i32 0, i32 %97) %99 = extractelement <4 x float> %98, i32 0 %100 = extractelement <4 x float> %98, i32 1 %101 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %102 = load <16 x i8>, <16 x i8> addrspace(2)* %101, align 16, !tbaa !0 %103 = add i32 %5, %7 %104 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %102, i32 0, i32 %103) %105 = extractelement <4 x float> %104, i32 0 %106 = extractelement <4 x float> %104, i32 1 %107 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4 %108 = load <16 x i8>, <16 x i8> addrspace(2)* %107, align 16, !tbaa !0 %109 = add i32 %5, %7 %110 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %108, i32 0, i32 %109) %111 = extractelement <4 x float> %110, i32 0 %112 = extractelement <4 x float> %110, i32 1 %113 = extractelement <4 x float> %110, i32 2 %114 = extractelement <4 x float> %110, i32 3 %115 = fmul float %31, %84 %116 = fmul float %32, %84 %117 = fmul float %33, %84 %118 = fmul float %34, %84 %119 = fmul float %35, %85 %120 = fadd float %119, %115 %121 = fmul float %36, %85 %122 = fadd float %121, %116 %123 = fmul float %37, %85 %124 = fadd float %123, %117 %125 = fmul float %38, %85 %126 = fadd float %125, %118 %127 = fmul float %39, %86 %128 = fadd float %127, %120 %129 = fmul float %40, %86 %130 = fadd float %129, %122 %131 = fmul float %41, %86 %132 = fadd float %131, %124 %133 = fmul float %42, %86 %134 = fadd float %133, %126 %135 = fmul float %43, %87 %136 = fadd float %135, %128 %137 = fmul float %44, %87 %138 = fadd float %137, %130 %139 = fmul float %45, %87 %140 = fadd float %139, %132 %141 = fmul float %64, %84 %142 = fmul float %65, %84 %143 = fmul float %66, %84 %144 = fmul float %67, %84 %145 = fmul float %68, %85 %146 = fadd float %145, %141 %147 = fmul float %69, %85 %148 = fadd float %147, %142 %149 = fmul float %70, %85 %150 = fadd float %149, %143 %151 = fmul float %71, %85 %152 = fadd float %151, %144 %153 = fmul float %72, %86 %154 = fadd float %153, %146 %155 = fmul float %73, %86 %156 = fadd float %155, %148 %157 = fmul float %74, %86 %158 = fadd float %157, %150 %159 = fmul float %75, %86 %160 = fadd float %159, %152 %161 = fmul float %76, %87 %162 = fadd float %161, %154 %163 = fmul float %77, %87 %164 = fadd float %163, %156 %165 = fmul float %78, %87 %166 = fadd float %165, %158 %167 = fmul float %79, %87 %168 = fadd float %167, %160 %169 = fmul float %99, %55 %170 = fadd float %169, %57 %171 = fmul float %100, %56 %172 = fadd float %171, %58 %173 = fcmp oeq float %63, 0.000000e+00 %. = select i1 %173, float %99, float %105 %.40 = select i1 %173, float %100, float %106 %174 = fmul float %., %59 %175 = fadd float %174, %61 %176 = fmul float %.40, %60 %177 = fadd float %176, %62 %178 = fmul float %46, %92 %179 = fmul float %49, %92 %180 = fmul float %52, %92 %181 = fmul float %47, %93 %182 = fadd float %181, %178 %183 = fmul float %50, %93 %184 = fadd float %183, %179 %185 = fmul float %53, %93 %186 = fadd float %185, %180 %187 = fmul float %48, %94 %188 = fadd float %187, %182 %189 = fmul float %51, %94 %190 = fadd float %189, %184 %191 = fmul float %54, %94 %192 = fadd float %191, %186 %193 = fmul float %188, %188 %194 = fmul float %190, %190 %195 = fadd float %194, %193 %196 = fmul float %192, %192 %197 = fadd float %195, %196 %198 = call float @llvm.AMDGPU.rsq.clamped.f32(float %197) %199 = fmul float %188, %198 %200 = fmul float %190, %198 %201 = fmul float %192, %198 %202 = fmul float %31, %111 %203 = fmul float %32, %111 %204 = fmul float %33, %111 %205 = fmul float %35, %112 %206 = fadd float %205, %202 %207 = fmul float %36, %112 %208 = fadd float %207, %203 %209 = fmul float %37, %112 %210 = fadd float %209, %204 %211 = fmul float %39, %113 %212 = fadd float %211, %206 %213 = fmul float %40, %113 %214 = fadd float %213, %208 %215 = fmul float %41, %113 %216 = fadd float %215, %210 %217 = fmul float %212, %212 %218 = fmul float %214, %214 %219 = fadd float %218, %217 %220 = fmul float %216, %216 %221 = fadd float %219, %220 %222 = call float @llvm.AMDGPU.rsq.clamped.f32(float %221) %223 = fmul float %212, %222 %224 = fmul float %214, %222 %225 = fmul float %216, %222 %226 = fmul float %201, %224 %227 = fmul float %199, %225 %228 = fmul float %200, %223 %229 = fmul float %200, %225 %230 = fsub float %229, %226 %231 = fmul float %201, %223 %232 = fsub float %231, %227 %233 = fmul float %199, %224 %234 = fsub float %233, %228 %235 = fmul float %230, %114 %236 = fmul float %232, %114 %237 = fmul float %234, %114 %238 = fmul float %199, %200 %239 = fmul float %200, %201 %240 = fmul float %201, %201 %241 = fmul float %201, %199 %242 = fmul float %16, %238 %243 = fmul float %17, %239 %244 = fadd float %242, %243 %245 = fmul float %18, %240 %246 = fadd float %244, %245 %247 = fmul float %19, %241 %248 = fadd float %246, %247 %249 = fmul float %20, %238 %250 = fmul float %21, %239 %251 = fadd float %249, %250 %252 = fmul float %22, %240 %253 = fadd float %251, %252 %254 = fmul float %23, %241 %255 = fadd float %253, %254 %256 = fmul float %24, %238 %257 = fmul float %25, %239 %258 = fadd float %256, %257 %259 = fmul float %26, %240 %260 = fadd float %258, %259 %261 = fmul float %27, %241 %262 = fadd float %260, %261 %263 = fmul float %200, %200 %264 = fmul float %199, %199 %265 = fsub float %264, %263 %266 = fmul float %28, %265 %267 = fadd float %266, %248 %268 = fmul float %29, %265 %269 = fadd float %268, %255 %270 = fmul float %30, %265 %271 = fadd float %270, %262 %272 = fsub float %136, %13 %273 = fsub float %138, %14 %274 = fsub float %140, %15 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %170, float %172, float %175, float %177) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %223, float %224, float %225, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %235, float %236, float %237, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %199, float %200, float %201, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %267, float %269, float %271, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %166, float %272, float %273, float %274) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %136, float %138, float %140, float %134) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %162, float %164, float %166, float %168) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0 ; 7E020280 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[28:31], s[2:3], 0x0 ; C08E0300 s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 s_load_dwordx4 s[12:15], s[8:9], 0x8 ; C0860908 s_load_dwordx4 s[16:19], s[8:9], 0xc ; C088090C s_load_dwordx4 s[8:11], s[8:9], 0x10 ; C0840910 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s27, s[28:31], 0x20 ; C20D9D20 buffer_load_format_xyzw v[2:5], v0, s[0:3], 0 idxen ; E00C2000 80000200 buffer_load_format_xyzw v[6:9], v0, s[4:7], 0 idxen ; E00C2000 80010600 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[9:12], v0, s[12:15], 0 idxen ; E00C2000 80030900 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[11:14], v0, s[16:19], 0 idxen ; E00C2000 80040B00 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[13:16], v0, s[8:11], 0 idxen ; E00C2000 80020D00 s_buffer_load_dword s32, s[28:31], 0x21 ; C2101D21 s_buffer_load_dword s33, s[28:31], 0x22 ; C2109D22 s_buffer_load_dword s34, s[28:31], 0x24 ; C2111D24 s_buffer_load_dword s35, s[28:31], 0x25 ; C2119D25 s_buffer_load_dword s1, s[28:31], 0x10 ; C2009D10 s_buffer_load_dword s2, s[28:31], 0x11 ; C2011D11 s_buffer_load_dword s0, s[28:31], 0x12 ; C2001D12 s_buffer_load_dword s18, s[28:31], 0x14 ; C2091D14 s_buffer_load_dword s17, s[28:31], 0x15 ; C2089D15 s_buffer_load_dword s36, s[28:31], 0x26 ; C2121D26 s_buffer_load_dword s37, s[28:31], 0x28 ; C2129D28 s_buffer_load_dword s38, s[28:31], 0x29 ; C2131D29 s_buffer_load_dword s39, s[28:31], 0x2a ; C2139D2A s_buffer_load_dword s40, s[28:31], 0x2c ; C2141D2C s_buffer_load_dword s20, s[28:31], 0x16 ; C20A1D16 s_buffer_load_dword s41, s[28:31], 0x17 ; C2149D17 s_buffer_load_dword s26, s[28:31], 0x18 ; C20D1D18 s_buffer_load_dword s25, s[28:31], 0x19 ; C20C9D19 s_buffer_load_dword s21, s[28:31], 0x1a ; C20A9D1A s_buffer_load_dword s42, s[28:31], 0x1b ; C2151D1B s_buffer_load_dword s23, s[28:31], 0x1c ; C20B9D1C s_buffer_load_dword s24, s[28:31], 0x1d ; C20C1D1D s_buffer_load_dword s22, s[28:31], 0x1e ; C20B1D1E s_buffer_load_dword s43, s[28:31], 0x1f ; C2159D1F s_buffer_load_dword s44, s[28:31], 0x2d ; C2161D2D s_buffer_load_dword s45, s[28:31], 0x2e ; C2169D2E s_buffer_load_dword s46, s[28:31], 0x34 ; C2171D34 s_buffer_load_dword s47, s[28:31], 0x35 ; C2179D35 s_buffer_load_dword s3, s[28:31], 0x36 ; C2019D36 s_buffer_load_dword s4, s[28:31], 0x3c ; C2021D3C s_buffer_load_dword s48, s[28:31], 0x40 ; C2181D40 s_buffer_load_dword s49, s[28:31], 0x41 ; C2189D41 s_buffer_load_dword s50, s[28:31], 0x42 ; C2191D42 s_buffer_load_dword s51, s[28:31], 0x43 ; C2199D43 s_buffer_load_dword s6, s[28:31], 0x37 ; C2031D37 s_buffer_load_dword s52, s[28:31], 0x38 ; C21A1D38 s_buffer_load_dword s53, s[28:31], 0x39 ; C21A9D39 s_buffer_load_dword s8, s[28:31], 0x3a ; C2041D3A s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v0, s3 ; 7E000203 s_buffer_load_dword s10, s[28:31], 0x3b ; C2051D3B v_cmp_eq_f32_e64 vcc, 0, s4 ; D004006A 00000880 s_buffer_load_dword s3, s[28:31], 0x0 ; C2019D00 s_buffer_load_dword s4, s[28:31], 0x1 ; C2021D01 s_buffer_load_dword s5, s[28:31], 0x2 ; C2029D02 s_buffer_load_dword s9, s[28:31], 0x4 ; C2049D04 v_mov_b32_e32 v17, s6 ; 7E220206 s_buffer_load_dword s14, s[28:31], 0x5 ; C2071D05 s_buffer_load_dword s7, s[28:31], 0x6 ; C2039D06 s_buffer_load_dword s6, s[28:31], 0x7 ; C2031D07 v_mov_b32_e32 v18, s8 ; 7E240208 s_buffer_load_dword s12, s[28:31], 0x8 ; C2061D08 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v19, s10 ; 7E26020A s_buffer_load_dword s16, s[28:31], 0x9 ; C2081D09 s_buffer_load_dword s10, s[28:31], 0xa ; C2051D0A s_buffer_load_dword s8, s[28:31], 0xb ; C2041D0B s_buffer_load_dword s15, s[28:31], 0xc ; C2079D0C s_buffer_load_dword s19, s[28:31], 0xd ; C2099D0D s_buffer_load_dword s13, s[28:31], 0xe ; C2069D0E s_buffer_load_dword s11, s[28:31], 0xf ; C2059D0F s_buffer_load_dword s54, s[28:31], 0x44 ; C21B1D44 s_buffer_load_dword s55, s[28:31], 0x45 ; C21B9D45 s_buffer_load_dword s56, s[28:31], 0x46 ; C21C1D46 s_buffer_load_dword s57, s[28:31], 0x47 ; C21C9D47 s_buffer_load_dword s58, s[28:31], 0x48 ; C21D1D48 s_buffer_load_dword s59, s[28:31], 0x49 ; C21D9D49 s_buffer_load_dword s60, s[28:31], 0x4a ; C21E1D4A s_buffer_load_dword s61, s[28:31], 0x4b ; C21E9D4B s_buffer_load_dword s62, s[28:31], 0x4c ; C21F1D4C s_buffer_load_dword s63, s[28:31], 0x4d ; C21F9D4D s_buffer_load_dword s64, s[28:31], 0x4e ; C2201D4E s_buffer_load_dword s28, s[28:31], 0x4f ; C20E1D4F v_mul_f32_e32 v20, s41, v2 ; 10280429 v_mac_f32_e32 v20, s42, v3 ; 3E28062A v_mac_f32_e32 v20, s43, v4 ; 3E28082B v_mac_f32_e32 v0, s46, v9 ; 3E00122E v_mac_f32_e32 v17, s47, v10 ; 3E22142F v_mul_f32_e32 v21, s48, v2 ; 102A0430 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v21, s54, v3 ; 3E2A0636 v_mac_f32_e32 v21, s58, v4 ; 3E2A083A v_mac_f32_e32 v21, s62, v5 ; 3E2A0A3E v_mul_f32_e32 v22, s49, v2 ; 102C0431 v_mac_f32_e32 v22, s55, v3 ; 3E2C0637 v_mac_f32_e32 v22, s59, v4 ; 3E2C083B v_mac_f32_e32 v22, s63, v5 ; 3E2C0A3F v_mul_f32_e32 v23, s50, v2 ; 102E0432 v_mac_f32_e32 v23, s56, v3 ; 3E2E0638 v_mac_f32_e32 v23, s60, v4 ; 3E2E083C v_mac_f32_e32 v23, s64, v5 ; 3E2E0A40 v_mul_f32_e32 v24, s51, v2 ; 10300433 v_mac_f32_e32 v24, s57, v3 ; 3E300639 v_mac_f32_e32 v24, s61, v4 ; 3E30083D v_mac_f32_e32 v24, s28, v5 ; 3E300A1C v_cndmask_b32_e32 v9, v11, v9 ; 0012130B v_cndmask_b32_e32 v10, v12, v10 ; 0014150C v_mul_f32_e32 v11, s34, v6 ; 10160C22 v_mac_f32_e32 v11, s35, v7 ; 3E160E23 v_mul_f32_e32 v12, s37, v6 ; 10180C25 v_mac_f32_e32 v12, s38, v7 ; 3E180E26 v_mul_f32_e32 v6, s40, v6 ; 100C0C28 v_mac_f32_e32 v6, s44, v7 ; 3E0C0E2C v_mac_f32_e32 v11, s36, v8 ; 3E161024 v_mac_f32_e32 v12, s39, v8 ; 3E181027 v_mac_f32_e32 v6, s45, v8 ; 3E0C102D v_mul_f32_e32 v7, s18, v2 ; 100E0412 v_mac_f32_e32 v7, s26, v3 ; 3E0E061A v_mac_f32_e32 v7, s23, v4 ; 3E0E0817 v_mac_f32_e32 v7, s27, v5 ; 3E0E0A1B v_mul_f32_e32 v8, s17, v2 ; 10100411 v_mac_f32_e32 v8, s25, v3 ; 3E100619 v_mac_f32_e32 v8, s24, v4 ; 3E100818 v_mac_f32_e32 v8, s32, v5 ; 3E100A20 v_mul_f32_e32 v2, s20, v2 ; 10040414 v_mac_f32_e32 v2, s21, v3 ; 3E040615 v_mac_f32_e32 v2, s22, v4 ; 3E040816 v_mac_f32_e32 v2, s33, v5 ; 3E040A21 v_mac_f32_e32 v18, s52, v9 ; 3E241234 v_mac_f32_e32 v19, s53, v10 ; 3E261435 exp 15, 32, 0, 0, 0, v0, v17, v18, v19 ; F800020F 13121100 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s18, v13 ; 10001A12 v_mac_f32_e32 v0, s26, v14 ; 3E001C1A v_mul_f32_e32 v3, s17, v13 ; 10061A11 v_mac_f32_e32 v3, s25, v14 ; 3E061C19 v_mul_f32_e32 v4, s20, v13 ; 10081A14 v_mac_f32_e32 v4, s21, v14 ; 3E081C15 v_mac_f32_e32 v0, s23, v15 ; 3E001E17 v_mac_f32_e32 v3, s24, v15 ; 3E061E18 v_mac_f32_e32 v4, s22, v15 ; 3E081E16 v_mul_f32_e32 v5, v11, v11 ; 100A170B v_mac_f32_e32 v5, v12, v12 ; 3E0A190C v_mac_f32_e32 v5, v6, v6 ; 3E0A0D06 v_rsq_clamp_f32_e32 v5, v5 ; 7E0A5905 v_mul_f32_e32 v9, v0, v0 ; 10120100 v_mac_f32_e32 v9, v3, v3 ; 3E120703 v_mac_f32_e32 v9, v4, v4 ; 3E120904 v_rsq_clamp_f32_e32 v9, v9 ; 7E125909 v_mul_f32_e32 v10, v5, v11 ; 10141705 v_mul_f32_e32 v11, v5, v12 ; 10161905 v_mul_f32_e32 v5, v5, v6 ; 100A0D05 v_mul_f32_e32 v0, v9, v0 ; 10000109 v_mul_f32_e32 v3, v9, v3 ; 10060709 v_mul_f32_e32 v4, v9, v4 ; 10080909 v_mul_f32_e32 v6, v3, v5 ; 100C0B03 v_mad_f32 v6, v11, v4, -v6 ; D2820006 841A090B v_mul_f32_e32 v9, v4, v10 ; 10121504 v_mad_f32 v9, v5, v0, -v9 ; D2820009 84260105 v_mul_f32_e32 v12, v0, v11 ; 10181700 v_mad_f32 v12, v10, v3, -v12 ; D282000C 8432070A v_mul_f32_e32 v6, v16, v6 ; 100C0D10 v_mul_f32_e32 v9, v16, v9 ; 10121310 v_mul_f32_e32 v12, v16, v12 ; 10181910 exp 15, 33, 0, 0, 0, v0, v3, v4, v1 ; F800021F 01040300 exp 15, 34, 0, 0, 0, v6, v9, v12, v1 ; F800022F 010C0906 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, v5, v11 ; 10001705 v_mul_f32_e32 v3, s14, v0 ; 1006000E v_mul_f32_e32 v4, s16, v0 ; 10080010 v_mul_f32_e32 v0, s19, v0 ; 10000013 v_mul_f32_e32 v6, v11, v10 ; 100C150B v_mac_f32_e32 v3, s9, v6 ; 3E060C09 v_mac_f32_e32 v4, s12, v6 ; 3E080C0C v_mac_f32_e32 v0, s15, v6 ; 3E000C0F v_mul_f32_e32 v6, v5, v5 ; 100C0B05 v_mac_f32_e32 v3, s7, v6 ; 3E060C07 v_mac_f32_e32 v4, s10, v6 ; 3E080C0A v_mac_f32_e32 v0, s13, v6 ; 3E000C0D v_mul_f32_e32 v6, v10, v5 ; 100C0B0A v_mac_f32_e32 v3, s6, v6 ; 3E060C06 v_mac_f32_e32 v4, s8, v6 ; 3E080C08 v_mac_f32_e32 v0, s11, v6 ; 3E000C0B v_mul_f32_e32 v6, v11, v11 ; 100C170B v_mad_f32 v6, v10, v10, -v6 ; D2820006 841A150A v_mac_f32_e32 v3, s1, v6 ; 3E060C01 v_mac_f32_e32 v4, s2, v6 ; 3E080C02 v_mac_f32_e32 v0, s0, v6 ; 3E000C00 v_subrev_f32_e32 v6, s3, v7 ; 0A0C0E03 v_subrev_f32_e32 v9, s4, v8 ; 0A121004 v_subrev_f32_e32 v12, s5, v2 ; 0A180405 exp 15, 35, 0, 0, 0, v10, v11, v5, v1 ; F800023F 01050B0A exp 15, 36, 0, 0, 0, v3, v4, v0, v1 ; F800024F 01000403 exp 15, 37, 0, 0, 0, v23, v6, v9, v12 ; F800025F 0C090617 exp 15, 38, 0, 0, 0, v7, v8, v2, v20 ; F800026F 14020807 exp 15, 12, 0, 1, 0, v21, v22, v23, v24 ; F80008CF 18171615 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 72 VGPRS: 28 Code Size: 892 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL IN[4], GENERIC[4], PERSPECTIVE DCL IN[5], GENERIC[5], PERSPECTIVE DCL IN[6], GENERIC[6], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SVIEW[0], CUBE, FLOAT DCL SVIEW[1], CUBE, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL SVIEW[4], 2D, FLOAT DCL CONST[0..5] DCL CONST[8..19] DCL CONST[22..24] DCL CONST[26] DCL TEMP[0..17], LOCAL IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 0.0000} IMM[1] FLT32 { 0.5000, 0.7500, 7.0000, 1.0000} IMM[2] FLT32 { 10.0000, 0.9680, 0.0300, 0.0001} 0: MOV TEMP[0].x, IN[1].xxxx 1: MOV TEMP[0].y, IN[2].xxxx 2: MOV TEMP[0].z, IN[3].xxxx 3: MOV TEMP[1].x, IN[1].yyyy 4: MOV TEMP[1].y, IN[2].yyyy 5: MOV TEMP[1].z, IN[3].yyyy 6: MOV TEMP[2].x, IN[1].zzzz 7: MOV TEMP[2].y, IN[2].zzzz 8: MOV TEMP[2].z, IN[3].zzzz 9: MOV TEMP[3].xy, IN[0].xyyy 10: TEX TEMP[3].yw, TEMP[3], SAMP[3], 2D 11: MAD TEMP[3].xy, TEMP[3].wyyy, IMM[0].xxxx, IMM[0].yyyy 12: MUL TEMP[3].xy, TEMP[3].xyyy, CONST[22].xxxx 13: DP2 TEMP[4].x, TEMP[3].xyyy, TEMP[3].xyyy 14: MOV_SAT TEMP[4].x, TEMP[4].xxxx 15: ADD TEMP[4].x, IMM[0].zzzz, -TEMP[4].xxxx 16: SQRT TEMP[4].x, TEMP[4].xxxx 17: MOV TEMP[3].z, TEMP[4].xxxx 18: DP3 TEMP[0].x, TEMP[3].xyzz, TEMP[0].xyzz 19: DP3 TEMP[1].x, TEMP[3].xyzz, TEMP[1].xyzz 20: MOV TEMP[0].y, TEMP[1].xxxx 21: DP3 TEMP[1].x, TEMP[3].xyzz, TEMP[2].xyzz 22: MOV TEMP[0].z, TEMP[1].xxxx 23: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[0].xyzz 24: RSQ TEMP[1].x, TEMP[1].xxxx 25: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xxxx 26: DP3 TEMP[1].x, IN[5].yzww, IN[5].yzww 27: RSQ TEMP[1].x, TEMP[1].xxxx 28: MUL TEMP[1].xyz, IN[5].yzww, TEMP[1].xxxx 29: MOV TEMP[2].xy, IN[0].xyyy 30: TEX TEMP[2].xyz, TEMP[2], SAMP[2], 2D 31: MUL TEMP[2].xyz, CONST[19].xyzz, TEMP[2].xyzz 32: LRP TEMP[3].xyz, CONST[23].xxxx, TEMP[2].xyzz, CONST[16].xyzz 33: MUL TEMP[4].x, CONST[23].xxxx, CONST[16].wwww 34: ADD TEMP[4].x, CONST[16].wwww, -TEMP[4].xxxx 35: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[4].xxxx 36: MOV TEMP[5].xy, IN[0].xyyy 37: TEX TEMP[5].y, TEMP[5], SAMP[4], 2D 38: ADD TEMP[6].x, IMM[0].zzzz, -CONST[26].xxxx 39: MAD TEMP[5].x, TEMP[5].yyyy, CONST[26].xxxx, TEMP[6].xxxx 40: DP3 TEMP[6].x, TEMP[0].xyzz, CONST[0].xyzz 41: MAX TEMP[6].x, IMM[0].wwww, TEMP[6].xxxx 42: MOV TEMP[7].xyz, IMM[0].wwww 43: MOV TEMP[8].w, IMM[0].zzzz 44: MOV TEMP[8].xyz, TEMP[0].xyzx 45: DP4 TEMP[9].x, CONST[1], TEMP[8] 46: DP4 TEMP[10].x, CONST[2], TEMP[8] 47: MOV TEMP[9].y, TEMP[10].xxxx 48: DP4 TEMP[8].x, CONST[3], TEMP[8] 49: MOV TEMP[9].z, TEMP[8].xxxx 50: ADD TEMP[8].xyz, IN[4].xyzz, TEMP[9].xyzz 51: MUL TEMP[8].xyz, TEMP[8].xyzz, TEMP[5].xxxx 52: DP3 TEMP[9].x, TEMP[0].xyzz, TEMP[1].xyzz 53: MUL TEMP[9].xyz, TEMP[9].xxxx, TEMP[0].xyzz 54: MUL TEMP[9].xyz, IMM[0].xxxx, TEMP[9].xyzz 55: ADD TEMP[9].xyz, TEMP[1].xyzz, -TEMP[9].xyzz 56: MOV TEMP[10].xyz, TEMP[9].xyzx 57: FSLT TEMP[11].x, IMM[0].wwww, CONST[10].wwww 58: UIF TEMP[11].xxxx :0 59: DP3 TEMP[11].x, TEMP[9].xyzz, TEMP[9].xyzz 60: RSQ TEMP[11].x, TEMP[11].xxxx 61: MUL TEMP[11].xyz, TEMP[9].xyzz, TEMP[11].xxxx 62: MOV TEMP[12].xyz, -IN[6].xyzx 63: ADD TEMP[13].xyz, CONST[8].xyzz, TEMP[12].xyzz 64: RCP TEMP[14].x, TEMP[11].xxxx 65: RCP TEMP[14].y, TEMP[11].yyyy 66: RCP TEMP[14].z, TEMP[11].zzzz 67: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[14].xyzz 68: ADD TEMP[12].xyz, CONST[9].xyzz, TEMP[12].xyzz 69: RCP TEMP[14].x, TEMP[11].xxxx 70: RCP TEMP[14].y, TEMP[11].yyyy 71: RCP TEMP[14].z, TEMP[11].zzzz 72: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[14].xyzz 73: FSLT TEMP[14].xyz, IMM[0].wwww, TEMP[11].xyzz 74: UIF TEMP[14].xxxx :0 75: MOV TEMP[15].x, TEMP[13].xxxx 76: ELSE :0 77: MOV TEMP[15].x, TEMP[12].xxxx 78: ENDIF 79: UIF TEMP[14].yyyy :0 80: MOV TEMP[16].x, TEMP[13].yyyy 81: ELSE :0 82: MOV TEMP[16].x, TEMP[12].yyyy 83: ENDIF 84: UIF TEMP[14].zzzz :0 85: MOV TEMP[13].x, TEMP[13].zzzz 86: ELSE :0 87: MOV TEMP[13].x, TEMP[12].zzzz 88: ENDIF 89: ADD TEMP[12].xyz, CONST[8].xyzz, CONST[9].xyzz 90: MUL TEMP[12].xyz, TEMP[12].xyzz, IMM[1].xxxx 91: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[16].xxxx 92: MIN TEMP[13].x, TEMP[14].xxxx, TEMP[13].xxxx 93: ADD TEMP[14].xyz, TEMP[12].xyzz, -CONST[10].xyzz 94: ADD TEMP[14].xyz, TEMP[14].xyzz, IN[6].xyzz 95: MAD TEMP[11].xyz, TEMP[11].xyzz, TEMP[13].xxxx, TEMP[14].xyzz 96: ADD TEMP[10].xyz, TEMP[11].xyzz, -TEMP[12].xyzz 97: ENDIF 98: ADD TEMP[11].x, IMM[0].zzzz, -CONST[24].xxxx 99: POW TEMP[11].x, TEMP[11].xxxx, IMM[1].yyyy 100: MUL TEMP[11].x, TEMP[11].xxxx, IMM[1].zzzz 101: MOV TEMP[10].xyz, TEMP[10].xyzz 102: MOV TEMP[10].w, TEMP[11].xxxx 103: TXL TEMP[10], TEMP[10], SAMP[0], CUBE 104: POW TEMP[11].x, TEMP[10].wwww, CONST[11].yyyy 105: MUL TEMP[11].x, CONST[11].xxxx, TEMP[11].xxxx 106: MUL TEMP[10].xyz, TEMP[11].xxxx, TEMP[10].xyzz 107: FSLT TEMP[11].x, CONST[9].wwww, IMM[1].wwww 108: UIF TEMP[11].xxxx :0 109: MOV TEMP[11].xyz, TEMP[9].xyzx 110: FSLT TEMP[12].x, IMM[0].wwww, CONST[14].wwww 111: UIF TEMP[12].xxxx :0 112: DP3 TEMP[12].x, TEMP[9].xyzz, TEMP[9].xyzz 113: RSQ TEMP[12].x, TEMP[12].xxxx 114: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[12].xxxx 115: MOV TEMP[12].xyz, -IN[6].xyzx 116: ADD TEMP[13].xyz, CONST[12].xyzz, TEMP[12].xyzz 117: RCP TEMP[14].x, TEMP[9].xxxx 118: RCP TEMP[14].y, TEMP[9].yyyy 119: RCP TEMP[14].z, TEMP[9].zzzz 120: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[14].xyzz 121: ADD TEMP[12].xyz, CONST[13].xyzz, TEMP[12].xyzz 122: RCP TEMP[14].x, TEMP[9].xxxx 123: RCP TEMP[14].y, TEMP[9].yyyy 124: RCP TEMP[14].z, TEMP[9].zzzz 125: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[14].xyzz 126: FSLT TEMP[14].xyz, IMM[0].wwww, TEMP[9].xyzz 127: UIF TEMP[14].xxxx :0 128: MOV TEMP[15].x, TEMP[13].xxxx 129: ELSE :0 130: MOV TEMP[15].x, TEMP[12].xxxx 131: ENDIF 132: UIF TEMP[14].yyyy :0 133: MOV TEMP[16].x, TEMP[13].yyyy 134: ELSE :0 135: MOV TEMP[16].x, TEMP[12].yyyy 136: ENDIF 137: UIF TEMP[14].zzzz :0 138: MOV TEMP[13].x, TEMP[13].zzzz 139: ELSE :0 140: MOV TEMP[13].x, TEMP[12].zzzz 141: ENDIF 142: ADD TEMP[12].xyz, CONST[12].xyzz, CONST[13].xyzz 143: MUL TEMP[12].xyz, TEMP[12].xyzz, IMM[1].xxxx 144: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[16].xxxx 145: MIN TEMP[13].x, TEMP[14].xxxx, TEMP[13].xxxx 146: ADD TEMP[14].xyz, TEMP[12].xyzz, -CONST[14].xyzz 147: ADD TEMP[14].xyz, TEMP[14].xyzz, IN[6].xyzz 148: MAD TEMP[9].xyz, TEMP[9].xyzz, TEMP[13].xxxx, TEMP[14].xyzz 149: ADD TEMP[11].xyz, TEMP[9].xyzz, -TEMP[12].xyzz 150: ENDIF 151: ADD TEMP[9].x, IMM[0].zzzz, -CONST[24].xxxx 152: POW TEMP[9].x, TEMP[9].xxxx, IMM[1].yyyy 153: MUL TEMP[9].x, TEMP[9].xxxx, IMM[1].zzzz 154: MOV TEMP[11].xyz, TEMP[11].xyzz 155: MOV TEMP[11].w, TEMP[9].xxxx 156: TXL TEMP[9], TEMP[11], SAMP[1], CUBE 157: POW TEMP[11].x, TEMP[9].wwww, CONST[15].yyyy 158: MUL TEMP[11].x, CONST[15].xxxx, TEMP[11].xxxx 159: MUL TEMP[9].xyz, TEMP[11].xxxx, TEMP[9].xyzz 160: LRP TEMP[7].xyz, CONST[9].wwww, TEMP[10].xyzz, TEMP[9].xyzz 161: ELSE :0 162: MOV TEMP[7].xyz, TEMP[10].xyzx 163: ENDIF 164: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[5].xxxx 165: MOV TEMP[1].xyz, -TEMP[1].xyzx 166: ADD TEMP[5].x, IMM[0].zzzz, -CONST[24].xxxx 167: ADD TEMP[9].xyz, CONST[0].xyzz, TEMP[1].xyzz 168: DP3 TEMP[10].x, TEMP[9].xyzz, TEMP[9].xyzz 169: RSQ TEMP[10].x, TEMP[10].xxxx 170: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[10].xxxx 171: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[1].xyzz 172: MAX TEMP[1].x, IMM[0].wwww, TEMP[1].xxxx 173: DP3 TEMP[10].x, CONST[0].xyzz, TEMP[9].xyzz 174: MAX TEMP[10].x, IMM[0].wwww, TEMP[10].xxxx 175: MUL TEMP[11].x, TEMP[5].xxxx, TEMP[5].xxxx 176: MUL TEMP[11].x, TEMP[11].xxxx, CONST[18].wwww 177: ADD TEMP[12].x, IMM[0].zzzz, -TEMP[5].xxxx 178: MAD TEMP[12].x, TEMP[12].xxxx, IMM[2].yyyy, IMM[2].zzzz 179: LG2 TEMP[12].x, TEMP[12].xxxx 180: RCP TEMP[12].x, TEMP[12].xxxx 181: MUL TEMP[12].x, IMM[2].xxxx, TEMP[12].xxxx 182: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[12].xxxx 183: ADD TEMP[13].x, IMM[0].zzzz, -TEMP[6].xxxx 184: ADD TEMP[14].x, IMM[0].zzzz, -TEMP[1].xxxx 185: MUL TEMP[15].x, IMM[0].xxxx, TEMP[10].xxxx 186: MUL TEMP[5].x, TEMP[10].xxxx, TEMP[5].xxxx 187: MAD TEMP[5].x, TEMP[15].xxxx, TEMP[5].xxxx, IMM[1].xxxx 188: ADD TEMP[10].x, IMM[0].zzzz, -TEMP[10].xxxx 189: ADD TEMP[15].x, IMM[0].zzzz, -TEMP[1].xxxx 190: ADD TEMP[4].x, IMM[0].zzzz, -TEMP[4].xxxx 191: ADD TEMP[4].x, CONST[24].xxxx, TEMP[4].xxxx 192: MOV_SAT TEMP[4].x, TEMP[4].xxxx 193: MUL TEMP[16].x, TEMP[15].xxxx, TEMP[15].xxxx 194: MUL TEMP[17].x, TEMP[15].xxxx, TEMP[15].xxxx 195: MUL TEMP[15].x, TEMP[17].xxxx, TEMP[15].xxxx 196: MUL TEMP[15].x, TEMP[16].xxxx, TEMP[15].xxxx 197: LRP TEMP[4].xyz, TEMP[15].xxxx, TEMP[4].xxxx, TEMP[3].xyzz 198: LRP TEMP[15].x, TEMP[6].xxxx, IMM[0].zzzz, TEMP[11].xxxx 199: LRP TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz, TEMP[11].xxxx 200: MAD TEMP[1].x, TEMP[15].xxxx, TEMP[1].xxxx, IMM[2].wwww 201: RCP TEMP[1].x, TEMP[1].xxxx 202: DP3 TEMP[9].x, TEMP[0].xyzz, TEMP[9].xyzz 203: MAX TEMP[9].x, IMM[0].wwww, TEMP[9].xxxx 204: POW TEMP[9].x, TEMP[9].xxxx, TEMP[12].xxxx 205: ADD TEMP[11].x, TEMP[12].xxxx, IMM[0].zzzz 206: MUL TEMP[11].x, TEMP[11].xxxx, CONST[18].yyyy 207: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[11].xxxx 208: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[9].xxxx 209: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[6].xxxx 210: MUL TEMP[1].x, TEMP[1].xxxx, CONST[18].xxxx 211: MAX TEMP[1].x, IMM[0].wwww, TEMP[1].xxxx 212: MUL TEMP[1].xyz, TEMP[1].xxxx, CONST[17].xyzz 213: ADD TEMP[9].xyz, IMM[0].zzzz, -TEMP[3].xyzz 214: MUL TEMP[11].x, TEMP[10].xxxx, TEMP[10].xxxx 215: MUL TEMP[12].x, TEMP[10].xxxx, TEMP[10].xxxx 216: MUL TEMP[10].x, TEMP[12].xxxx, TEMP[10].xxxx 217: MUL TEMP[10].x, TEMP[11].xxxx, TEMP[10].xxxx 218: MAD TEMP[3].xyz, TEMP[9].xyzz, TEMP[10].xxxx, TEMP[3].xyzz 219: ADD TEMP[9].x, TEMP[5].xxxx, IMM[0].yyyy 220: MUL TEMP[10].x, TEMP[13].xxxx, TEMP[13].xxxx 221: MUL TEMP[11].x, TEMP[13].xxxx, TEMP[13].xxxx 222: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[13].xxxx 223: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[11].xxxx 224: MAD TEMP[9].x, TEMP[9].xxxx, TEMP[10].xxxx, IMM[0].zzzz 225: ADD TEMP[5].x, TEMP[5].xxxx, IMM[0].yyyy 226: MUL TEMP[10].x, TEMP[14].xxxx, TEMP[14].xxxx 227: MUL TEMP[11].x, TEMP[14].xxxx, TEMP[14].xxxx 228: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[14].xxxx 229: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[11].xxxx 230: MAD TEMP[5].x, TEMP[5].xxxx, TEMP[10].xxxx, IMM[0].zzzz 231: MUL TEMP[5].x, TEMP[9].xxxx, TEMP[5].xxxx 232: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[6].xxxx 233: MAD TEMP[5].xyz, CONST[17].xyzz, TEMP[5].xxxx, TEMP[8].xyzz 234: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[5].xyzz 235: MAD TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xyzz, TEMP[2].xyzz 236: MAD TEMP[0].xyz, TEMP[7].xyzz, TEMP[4].xyzz, TEMP[1].xyzz 237: MOV TEMP[0].xyz, TEMP[0].xyzx 238: MAD TEMP[1].x, IN[5].xxxx, CONST[5].zzzz, CONST[5].wwww 239: MOV_SAT TEMP[1].x, TEMP[1].xxxx 240: LRP TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[4].xyzz 241: MOV TEMP[0].xyz, TEMP[0].xyzx 242: MOV TEMP[0].w, IMM[0].zzzz 243: MOV OUT[0], TEMP[0] 244: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 172) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200) %57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208) %58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212) %59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216) %60 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224) %61 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228) %62 = call float @llvm.SI.load.const(<16 x i8> %23, i32 232) %63 = call float @llvm.SI.load.const(<16 x i8> %23, i32 236) %64 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240) %65 = call float @llvm.SI.load.const(<16 x i8> %23, i32 244) %66 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256) %67 = call float @llvm.SI.load.const(<16 x i8> %23, i32 260) %68 = call float @llvm.SI.load.const(<16 x i8> %23, i32 264) %69 = call float @llvm.SI.load.const(<16 x i8> %23, i32 268) %70 = call float @llvm.SI.load.const(<16 x i8> %23, i32 272) %71 = call float @llvm.SI.load.const(<16 x i8> %23, i32 276) %72 = call float @llvm.SI.load.const(<16 x i8> %23, i32 280) %73 = call float @llvm.SI.load.const(<16 x i8> %23, i32 288) %74 = call float @llvm.SI.load.const(<16 x i8> %23, i32 292) %75 = call float @llvm.SI.load.const(<16 x i8> %23, i32 300) %76 = call float @llvm.SI.load.const(<16 x i8> %23, i32 304) %77 = call float @llvm.SI.load.const(<16 x i8> %23, i32 308) %78 = call float @llvm.SI.load.const(<16 x i8> %23, i32 312) %79 = call float @llvm.SI.load.const(<16 x i8> %23, i32 352) %80 = call float @llvm.SI.load.const(<16 x i8> %23, i32 368) %81 = call float @llvm.SI.load.const(<16 x i8> %23, i32 384) %82 = call float @llvm.SI.load.const(<16 x i8> %23, i32 416) %83 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %84 = load <32 x i8>, <32 x i8> addrspace(2)* %83, align 32, !tbaa !0 %85 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %86 = load <16 x i8>, <16 x i8> addrspace(2)* %85, align 16, !tbaa !0 %87 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %88 = bitcast <8 x i32> addrspace(2)* %87 to <32 x i8> addrspace(2)* %89 = load <32 x i8>, <32 x i8> addrspace(2)* %88, align 32, !tbaa !0 %90 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %91 = bitcast <4 x i32> addrspace(2)* %90 to <16 x i8> addrspace(2)* %92 = load <16 x i8>, <16 x i8> addrspace(2)* %91, align 16, !tbaa !0 %93 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %94 = bitcast <8 x i32> addrspace(2)* %93 to <32 x i8> addrspace(2)* %95 = load <32 x i8>, <32 x i8> addrspace(2)* %94, align 32, !tbaa !0 %96 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %97 = bitcast <4 x i32> addrspace(2)* %96 to <16 x i8> addrspace(2)* %98 = load <16 x i8>, <16 x i8> addrspace(2)* %97, align 16, !tbaa !0 %99 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %100 = bitcast <8 x i32> addrspace(2)* %99 to <32 x i8> addrspace(2)* %101 = load <32 x i8>, <32 x i8> addrspace(2)* %100, align 32, !tbaa !0 %102 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %103 = bitcast <4 x i32> addrspace(2)* %102 to <16 x i8> addrspace(2)* %104 = load <16 x i8>, <16 x i8> addrspace(2)* %103, align 16, !tbaa !0 %105 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %106 = bitcast <8 x i32> addrspace(2)* %105 to <32 x i8> addrspace(2)* %107 = load <32 x i8>, <32 x i8> addrspace(2)* %106, align 32, !tbaa !0 %108 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %109 = bitcast <4 x i32> addrspace(2)* %108 to <16 x i8> addrspace(2)* %110 = load <16 x i8>, <16 x i8> addrspace(2)* %109, align 16, !tbaa !0 %111 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %112 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %113 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %114 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %115 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %116 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %117 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %118 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %119 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %120 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %121 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %122 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %123 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %124 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %125 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7) %126 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %127 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7) %128 = call float @llvm.SI.fs.interp(i32 3, i32 5, i32 %5, <2 x i32> %7) %129 = call float @llvm.SI.fs.interp(i32 0, i32 6, i32 %5, <2 x i32> %7) %130 = call float @llvm.SI.fs.interp(i32 1, i32 6, i32 %5, <2 x i32> %7) %131 = call float @llvm.SI.fs.interp(i32 2, i32 6, i32 %5, <2 x i32> %7) %132 = bitcast float %111 to i32 %133 = bitcast float %112 to i32 %134 = insertelement <2 x i32> undef, i32 %132, i32 0 %135 = insertelement <2 x i32> %134, i32 %133, i32 1 %136 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %135, <32 x i8> %101, <16 x i8> %104, i32 2) %137 = extractelement <4 x float> %136, i32 1 %138 = extractelement <4 x float> %136, i32 3 %139 = fmul float %138, 2.000000e+00 %140 = fadd float %139, -1.000000e+00 %141 = fmul float %137, 2.000000e+00 %142 = fadd float %141, -1.000000e+00 %143 = fmul float %140, %79 %144 = fmul float %142, %79 %145 = fmul float %143, %143 %146 = fmul float %144, %144 %147 = fadd float %145, %146 %148 = call float @llvm.AMDIL.clamp.(float %147, float 0.000000e+00, float 1.000000e+00) %149 = fsub float 1.000000e+00, %148 %150 = call float @llvm.sqrt.f32(float %149) %151 = fmul float %143, %113 %152 = fmul float %144, %116 %153 = fadd float %152, %151 %154 = fmul float %150, %119 %155 = fadd float %153, %154 %156 = fmul float %143, %114 %157 = fmul float %144, %117 %158 = fadd float %157, %156 %159 = fmul float %150, %120 %160 = fadd float %158, %159 %161 = fmul float %143, %115 %162 = fmul float %144, %118 %163 = fadd float %162, %161 %164 = fmul float %150, %121 %165 = fadd float %163, %164 %166 = fmul float %155, %155 %167 = fmul float %160, %160 %168 = fadd float %167, %166 %169 = fmul float %165, %165 %170 = fadd float %168, %169 %171 = call float @llvm.AMDGPU.rsq.clamped.f32(float %170) %172 = fmul float %155, %171 %173 = fmul float %160, %171 %174 = fmul float %165, %171 %175 = fmul float %126, %126 %176 = fmul float %127, %127 %177 = fadd float %176, %175 %178 = fmul float %128, %128 %179 = fadd float %177, %178 %180 = call float @llvm.AMDGPU.rsq.clamped.f32(float %179) %181 = fmul float %126, %180 %182 = fmul float %127, %180 %183 = fmul float %128, %180 %184 = bitcast float %111 to i32 %185 = bitcast float %112 to i32 %186 = insertelement <2 x i32> undef, i32 %184, i32 0 %187 = insertelement <2 x i32> %186, i32 %185, i32 1 %188 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %187, <32 x i8> %95, <16 x i8> %98, i32 2) %189 = extractelement <4 x float> %188, i32 0 %190 = extractelement <4 x float> %188, i32 1 %191 = extractelement <4 x float> %188, i32 2 %192 = fmul float %76, %189 %193 = fmul float %77, %190 %194 = fmul float %78, %191 %195 = call float @llvm.AMDGPU.lrp(float %80, float %192, float %66) %196 = call float @llvm.AMDGPU.lrp(float %80, float %193, float %67) %197 = call float @llvm.AMDGPU.lrp(float %80, float %194, float %68) %198 = fmul float %80, %69 %199 = fsub float %69, %198 %200 = fmul float %192, %199 %201 = fmul float %193, %199 %202 = fmul float %194, %199 %203 = bitcast float %111 to i32 %204 = bitcast float %112 to i32 %205 = insertelement <2 x i32> undef, i32 %203, i32 0 %206 = insertelement <2 x i32> %205, i32 %204, i32 1 %207 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %206, <32 x i8> %107, <16 x i8> %110, i32 2) %208 = extractelement <4 x float> %207, i32 1 %209 = fsub float 1.000000e+00, %82 %210 = fmul float %208, %82 %211 = fadd float %210, %209 %212 = fmul float %172, %24 %213 = fmul float %173, %25 %214 = fadd float %213, %212 %215 = fmul float %174, %26 %216 = fadd float %214, %215 %217 = call float @llvm.maxnum.f32(float %216, float 0.000000e+00) %218 = fmul float %27, %172 %219 = fmul float %28, %173 %220 = fadd float %218, %219 %221 = fmul float %29, %174 %222 = fadd float %220, %221 %223 = fadd float %222, %30 %224 = fmul float %31, %172 %225 = fmul float %32, %173 %226 = fadd float %224, %225 %227 = fmul float %33, %174 %228 = fadd float %226, %227 %229 = fadd float %228, %34 %230 = fmul float %35, %172 %231 = fmul float %36, %173 %232 = fadd float %230, %231 %233 = fmul float %37, %174 %234 = fadd float %232, %233 %235 = fadd float %234, %38 %236 = fadd float %122, %223 %237 = fadd float %123, %229 %238 = fadd float %124, %235 %239 = fmul float %236, %211 %240 = fmul float %237, %211 %241 = fmul float %238, %211 %242 = fmul float %172, %181 %243 = fmul float %173, %182 %244 = fadd float %243, %242 %245 = fmul float %174, %183 %246 = fadd float %244, %245 %247 = fmul float %246, %172 %248 = fmul float %246, %173 %249 = fmul float %246, %174 %250 = fmul float %247, 2.000000e+00 %251 = fmul float %248, 2.000000e+00 %252 = fmul float %249, 2.000000e+00 %253 = fsub float %181, %250 %254 = fsub float %182, %251 %255 = fsub float %183, %252 %256 = fcmp ogt float %51, 0.000000e+00 br i1 %256, label %IF, label %ENDIF IF: ; preds = %main_body %257 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %258 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %259 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %260 = fmul float %253, %253 %261 = fmul float %254, %254 %262 = fadd float %261, %260 %263 = fmul float %255, %255 %264 = fadd float %262, %263 %265 = call float @llvm.AMDGPU.rsq.clamped.f32(float %264) %266 = fmul float %253, %265 %267 = fmul float %254, %265 %268 = fmul float %255, %265 %269 = fsub float %44, %129 %270 = fsub float %45, %130 %271 = fsub float %46, %131 %272 = fdiv float 1.000000e+00, %266 %273 = fdiv float 1.000000e+00, %267 %274 = fdiv float 1.000000e+00, %268 %275 = fmul float %269, %272 %276 = fmul float %270, %273 %277 = fmul float %271, %274 %278 = fsub float %47, %129 %279 = fsub float %48, %130 %280 = fsub float %49, %131 %281 = fdiv float 1.000000e+00, %266 %282 = fdiv float 1.000000e+00, %267 %283 = fdiv float 1.000000e+00, %268 %284 = fmul float %278, %281 %285 = fmul float %279, %282 %286 = fmul float %280, %283 %287 = fcmp ogt float %266, 0.000000e+00 %288 = fcmp ogt float %267, 0.000000e+00 %289 = fcmp ogt float %268, 0.000000e+00 %. = select i1 %287, float %275, float %284 %temp64.0 = select i1 %288, float %276, float %285 %.96 = select i1 %289, float %277, float %286 %290 = fadd float %44, %47 %291 = fadd float %45, %48 %292 = fadd float %46, %49 %293 = fmul float %290, 5.000000e-01 %294 = fmul float %291, 5.000000e-01 %295 = fmul float %292, 5.000000e-01 %296 = call float @llvm.minnum.f32(float %., float %temp64.0) %297 = call float @llvm.minnum.f32(float %296, float %.96) %298 = fsub float %293, %259 %299 = fsub float %294, %258 %300 = fsub float %295, %257 %301 = fadd float %298, %129 %302 = fadd float %299, %130 %303 = fadd float %300, %131 %304 = fmul float %266, %297 %305 = fadd float %304, %301 %306 = fmul float %267, %297 %307 = fadd float %306, %302 %308 = fmul float %268, %297 %309 = fadd float %308, %303 %310 = fsub float %305, %293 %311 = fsub float %307, %294 %312 = fsub float %309, %295 br label %ENDIF ENDIF: ; preds = %main_body, %IF %temp40.0 = phi float [ %310, %IF ], [ %253, %main_body ] %temp41.0 = phi float [ %311, %IF ], [ %254, %main_body ] %temp42.0 = phi float [ %312, %IF ], [ %255, %main_body ] %313 = fsub float 1.000000e+00, %81 %314 = call float @llvm.pow.f32(float %313, float 7.500000e-01) %315 = fmul float %314, 7.000000e+00 %316 = insertelement <4 x float> undef, float %temp40.0, i32 0 %317 = insertelement <4 x float> %316, float %temp41.0, i32 1 %318 = insertelement <4 x float> %317, float %temp42.0, i32 2 %319 = insertelement <4 x float> %318, float %315, i32 3 %320 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %319) %321 = extractelement <4 x float> %320, i32 0 %322 = extractelement <4 x float> %320, i32 1 %323 = extractelement <4 x float> %320, i32 2 %324 = extractelement <4 x float> %320, i32 3 %325 = call float @llvm.fabs.f32(float %323) %326 = fdiv float 1.000000e+00, %325 %327 = fmul float %321, %326 %328 = fadd float %327, 1.500000e+00 %329 = fmul float %322, %326 %330 = fadd float %329, 1.500000e+00 %331 = bitcast float %330 to i32 %332 = bitcast float %328 to i32 %333 = bitcast float %324 to i32 %334 = bitcast float %315 to i32 %335 = insertelement <4 x i32> undef, i32 %331, i32 0 %336 = insertelement <4 x i32> %335, i32 %332, i32 1 %337 = insertelement <4 x i32> %336, i32 %333, i32 2 %338 = insertelement <4 x i32> %337, i32 %334, i32 3 %339 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %338, <32 x i8> %84, <16 x i8> %86, i32 4) %340 = extractelement <4 x float> %339, i32 0 %341 = extractelement <4 x float> %339, i32 1 %342 = extractelement <4 x float> %339, i32 2 %343 = extractelement <4 x float> %339, i32 3 %344 = call float @llvm.pow.f32(float %343, float %53) %345 = fmul float %52, %344 %346 = fmul float %345, %340 %347 = fmul float %345, %341 %348 = fmul float %345, %342 %349 = fcmp olt float %50, 0x3FEFFFEB00000000 br i1 %349, label %IF82, label %ENDIF81 IF82: ; preds = %ENDIF %350 = fcmp ogt float %63, 0.000000e+00 br i1 %350, label %IF85, label %ENDIF84 ENDIF81: ; preds = %ENDIF, %ENDIF84 %temp28.0 = phi float [ %578, %ENDIF84 ], [ %346, %ENDIF ] %temp29.0 = phi float [ %579, %ENDIF84 ], [ %347, %ENDIF ] %temp30.0 = phi float [ %580, %ENDIF84 ], [ %348, %ENDIF ] %351 = fmul float %temp28.0, %211 %352 = fmul float %temp29.0, %211 %353 = fmul float %temp30.0, %211 %354 = fsub float 1.000000e+00, %81 %355 = fsub float %24, %181 %356 = fsub float %25, %182 %357 = fsub float %26, %183 %358 = fmul float %355, %355 %359 = fmul float %356, %356 %360 = fadd float %359, %358 %361 = fmul float %357, %357 %362 = fadd float %360, %361 %363 = call float @llvm.AMDGPU.rsq.clamped.f32(float %362) %364 = fmul float %355, %363 %365 = fmul float %356, %363 %366 = fmul float %357, %363 %367 = fmul float %181, %172 %368 = fsub float -0.000000e+00, %367 %369 = fmul float %182, %173 %370 = fsub float %368, %369 %371 = fmul float %183, %174 %372 = fsub float %370, %371 %373 = call float @llvm.maxnum.f32(float %372, float 0.000000e+00) %374 = fmul float %24, %364 %375 = fmul float %25, %365 %376 = fadd float %375, %374 %377 = fmul float %26, %366 %378 = fadd float %376, %377 %379 = call float @llvm.maxnum.f32(float %378, float 0.000000e+00) %380 = fmul float %354, %354 %381 = fmul float %380, %75 %382 = fsub float 1.000000e+00, %354 %383 = fmul float %382, 0x3FEEF9DB20000000 %384 = fadd float %383, 0x3F9EB851E0000000 %385 = call float @llvm.log2.f32(float %384) %386 = fdiv float 1.000000e+00, %385 %387 = fmul float %386, 1.000000e+01 %388 = fmul float %387, %387 %389 = fsub float 1.000000e+00, %217 %390 = fsub float 1.000000e+00, %373 %391 = fmul float %379, 2.000000e+00 %392 = fmul float %379, %354 %393 = fmul float %391, %392 %394 = fadd float %393, 5.000000e-01 %395 = fsub float 1.000000e+00, %379 %396 = fsub float 1.000000e+00, %373 %397 = fsub float 1.000000e+00, %199 %398 = fadd float %81, %397 %399 = call float @llvm.AMDIL.clamp.(float %398, float 0.000000e+00, float 1.000000e+00) %400 = fmul float %396, %396 %401 = fmul float %396, %396 %402 = fmul float %401, %396 %403 = fmul float %400, %402 %404 = call float @llvm.AMDGPU.lrp(float %403, float %399, float %195) %405 = call float @llvm.AMDGPU.lrp(float %403, float %399, float %196) %406 = call float @llvm.AMDGPU.lrp(float %403, float %399, float %197) %407 = call float @llvm.AMDGPU.lrp(float %217, float 1.000000e+00, float %381) %408 = call float @llvm.AMDGPU.lrp(float %373, float 1.000000e+00, float %381) %409 = fmul float %407, %408 %410 = fadd float %409, 0x3F1A36E2E0000000 %411 = fdiv float 1.000000e+00, %410 %412 = fmul float %172, %364 %413 = fmul float %173, %365 %414 = fadd float %413, %412 %415 = fmul float %174, %366 %416 = fadd float %414, %415 %417 = call float @llvm.maxnum.f32(float %416, float 0.000000e+00) %418 = call float @llvm.pow.f32(float %417, float %388) %419 = fadd float %388, 1.000000e+00 %420 = fmul float %419, %74 %421 = fmul float %418, %420 %422 = fmul float %411, %421 %423 = fmul float %422, %217 %424 = fmul float %423, %73 %425 = call float @llvm.maxnum.f32(float %424, float 0.000000e+00) %426 = fmul float %425, %70 %427 = fmul float %425, %71 %428 = fmul float %425, %72 %429 = fsub float 1.000000e+00, %195 %430 = fsub float 1.000000e+00, %196 %431 = fsub float 1.000000e+00, %197 %432 = fmul float %395, %395 %433 = fmul float %395, %395 %434 = fmul float %433, %395 %435 = fmul float %432, %434 %436 = fmul float %429, %435 %437 = fadd float %436, %195 %438 = fmul float %430, %435 %439 = fadd float %438, %196 %440 = fmul float %431, %435 %441 = fadd float %440, %197 %442 = fadd float %394, -1.000000e+00 %443 = fmul float %389, %389 %444 = fmul float %389, %389 %445 = fmul float %444, %389 %446 = fmul float %443, %445 %447 = fmul float %442, %446 %448 = fadd float %447, 1.000000e+00 %449 = fadd float %394, -1.000000e+00 %450 = fmul float %390, %390 %451 = fmul float %390, %390 %452 = fmul float %451, %390 %453 = fmul float %450, %452 %454 = fmul float %449, %453 %455 = fadd float %454, 1.000000e+00 %456 = fmul float %448, %455 %457 = fmul float %456, %217 %458 = fmul float %70, %457 %459 = fadd float %458, %239 %460 = fmul float %71, %457 %461 = fadd float %460, %240 %462 = fmul float %72, %457 %463 = fadd float %462, %241 %464 = fmul float %200, %459 %465 = fmul float %201, %461 %466 = fmul float %202, %463 %467 = fmul float %426, %437 %468 = fadd float %467, %464 %469 = fmul float %427, %439 %470 = fadd float %469, %465 %471 = fmul float %428, %441 %472 = fadd float %471, %466 %473 = fmul float %351, %404 %474 = fadd float %473, %468 %475 = fmul float %352, %405 %476 = fadd float %475, %470 %477 = fmul float %353, %406 %478 = fadd float %477, %472 %479 = fmul float %125, %42 %480 = fadd float %479, %43 %481 = call float @llvm.AMDIL.clamp.(float %480, float 0.000000e+00, float 1.000000e+00) %482 = call float @llvm.AMDGPU.lrp(float %481, float %474, float %39) %483 = call float @llvm.AMDGPU.lrp(float %481, float %476, float %40) %484 = call float @llvm.AMDGPU.lrp(float %481, float %478, float %41) %485 = call i32 @llvm.SI.packf16(float %482, float %483) %486 = bitcast i32 %485 to float %487 = call i32 @llvm.SI.packf16(float %484, float 1.000000e+00) %488 = bitcast i32 %487 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %486, float %488, float %486, float %488) ret void IF85: ; preds = %IF82 %489 = fmul float %253, %253 %490 = fmul float %254, %254 %491 = fadd float %490, %489 %492 = fmul float %255, %255 %493 = fadd float %491, %492 %494 = call float @llvm.AMDGPU.rsq.clamped.f32(float %493) %495 = fmul float %253, %494 %496 = fmul float %254, %494 %497 = fmul float %255, %494 %498 = fsub float %54, %129 %499 = fsub float %55, %130 %500 = fsub float %56, %131 %501 = fdiv float 1.000000e+00, %495 %502 = fdiv float 1.000000e+00, %496 %503 = fdiv float 1.000000e+00, %497 %504 = fmul float %498, %501 %505 = fmul float %499, %502 %506 = fmul float %500, %503 %507 = fsub float %57, %129 %508 = fsub float %58, %130 %509 = fsub float %59, %131 %510 = fdiv float 1.000000e+00, %495 %511 = fdiv float 1.000000e+00, %496 %512 = fdiv float 1.000000e+00, %497 %513 = fmul float %507, %510 %514 = fmul float %508, %511 %515 = fmul float %509, %512 %516 = fcmp ogt float %495, 0.000000e+00 %517 = fcmp ogt float %496, 0.000000e+00 %518 = fcmp ogt float %497, 0.000000e+00 %.97 = select i1 %516, float %504, float %513 %temp64.1 = select i1 %517, float %505, float %514 %.98 = select i1 %518, float %506, float %515 %519 = fadd float %54, %57 %520 = fadd float %55, %58 %521 = fadd float %56, %59 %522 = fmul float %519, 5.000000e-01 %523 = fmul float %520, 5.000000e-01 %524 = fmul float %521, 5.000000e-01 %525 = call float @llvm.minnum.f32(float %.97, float %temp64.1) %526 = call float @llvm.minnum.f32(float %525, float %.98) %527 = fsub float %522, %60 %528 = fsub float %523, %61 %529 = fsub float %524, %62 %530 = fadd float %527, %129 %531 = fadd float %528, %130 %532 = fadd float %529, %131 %533 = fmul float %495, %526 %534 = fadd float %533, %530 %535 = fmul float %496, %526 %536 = fadd float %535, %531 %537 = fmul float %497, %526 %538 = fadd float %537, %532 %539 = fsub float %534, %522 %540 = fsub float %536, %523 %541 = fsub float %538, %524 br label %ENDIF84 ENDIF84: ; preds = %IF82, %IF85 %temp44.0 = phi float [ %539, %IF85 ], [ %253, %IF82 ] %temp45.0 = phi float [ %540, %IF85 ], [ %254, %IF82 ] %temp46.0 = phi float [ %541, %IF85 ], [ %255, %IF82 ] %542 = fsub float 1.000000e+00, %81 %543 = call float @llvm.pow.f32(float %542, float 7.500000e-01) %544 = fmul float %543, 7.000000e+00 %545 = insertelement <4 x float> undef, float %temp44.0, i32 0 %546 = insertelement <4 x float> %545, float %temp45.0, i32 1 %547 = insertelement <4 x float> %546, float %temp46.0, i32 2 %548 = insertelement <4 x float> %547, float %544, i32 3 %549 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %548) %550 = extractelement <4 x float> %549, i32 0 %551 = extractelement <4 x float> %549, i32 1 %552 = extractelement <4 x float> %549, i32 2 %553 = extractelement <4 x float> %549, i32 3 %554 = call float @llvm.fabs.f32(float %552) %555 = fdiv float 1.000000e+00, %554 %556 = fmul float %550, %555 %557 = fadd float %556, 1.500000e+00 %558 = fmul float %551, %555 %559 = fadd float %558, 1.500000e+00 %560 = bitcast float %559 to i32 %561 = bitcast float %557 to i32 %562 = bitcast float %553 to i32 %563 = bitcast float %544 to i32 %564 = insertelement <4 x i32> undef, i32 %560, i32 0 %565 = insertelement <4 x i32> %564, i32 %561, i32 1 %566 = insertelement <4 x i32> %565, i32 %562, i32 2 %567 = insertelement <4 x i32> %566, i32 %563, i32 3 %568 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %567, <32 x i8> %89, <16 x i8> %92, i32 4) %569 = extractelement <4 x float> %568, i32 0 %570 = extractelement <4 x float> %568, i32 1 %571 = extractelement <4 x float> %568, i32 2 %572 = extractelement <4 x float> %568, i32 3 %573 = call float @llvm.pow.f32(float %572, float %65) %574 = fmul float %64, %573 %575 = fmul float %574, %569 %576 = fmul float %574, %570 %577 = fmul float %574, %571 %578 = call float @llvm.AMDGPU.lrp(float %50, float %346, float %575) %579 = call float @llvm.AMDGPU.lrp(float %50, float %347, float %576) %580 = call float @llvm.AMDGPU.lrp(float %50, float %348, float %577) br label %ENDIF81 } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v13, v0, 0, 0, [m0] ; C8340000 v_interp_p2_f32 v13, [v13], v1, 0, 0, [m0] ; C8350001 v_interp_p1_f32 v14, v0, 1, 0, [m0] ; C8380100 v_interp_p2_f32 v14, [v14], v1, 1, 0, [m0] ; C8390101 v_interp_p1_f32 v2, v0, 0, 1, [m0] ; C8080400 v_interp_p2_f32 v2, [v2], v1, 0, 1, [m0] ; C8090401 v_interp_p1_f32 v4, v0, 1, 1, [m0] ; C8100500 v_interp_p2_f32 v4, [v4], v1, 1, 1, [m0] ; C8110501 v_interp_p1_f32 v5, v0, 2, 1, [m0] ; C8140600 v_interp_p2_f32 v5, [v5], v1, 2, 1, [m0] ; C8150601 v_interp_p1_f32 v6, v0, 0, 2, [m0] ; C8180800 v_interp_p2_f32 v6, [v6], v1, 0, 2, [m0] ; C8190801 v_interp_p1_f32 v7, v0, 1, 2, [m0] ; C81C0900 v_interp_p2_f32 v7, [v7], v1, 1, 2, [m0] ; C81D0901 v_interp_p1_f32 v11, v0, 2, 2, [m0] ; C82C0A00 v_interp_p2_f32 v11, [v11], v1, 2, 2, [m0] ; C82D0A01 v_interp_p1_f32 v12, v0, 0, 3, [m0] ; C8300C00 v_interp_p2_f32 v12, [v12], v1, 0, 3, [m0] ; C8310C01 v_interp_p1_f32 v15, v0, 1, 3, [m0] ; C83C0D00 v_interp_p2_f32 v15, [v15], v1, 1, 3, [m0] ; C83D0D01 v_interp_p1_f32 v16, v0, 2, 3, [m0] ; C8400E00 v_interp_p2_f32 v16, [v16], v1, 2, 3, [m0] ; C8410E01 v_interp_p1_f32 v8, v0, 0, 4, [m0] ; C8201000 v_interp_p2_f32 v8, [v8], v1, 0, 4, [m0] ; C8211001 v_interp_p1_f32 v9, v0, 1, 4, [m0] ; C8241100 v_interp_p2_f32 v9, [v9], v1, 1, 4, [m0] ; C8251101 v_interp_p1_f32 v10, v0, 2, 4, [m0] ; C8281200 v_interp_p2_f32 v10, [v10], v1, 2, 4, [m0] ; C8291201 v_interp_p1_f32 v3, v0, 0, 5, [m0] ; C80C1400 v_interp_p2_f32 v3, [v3], v1, 0, 5, [m0] ; C80D1401 v_interp_p1_f32 v18, v0, 1, 5, [m0] ; C8481500 v_interp_p2_f32 v18, [v18], v1, 1, 5, [m0] ; C8491501 v_interp_p1_f32 v19, v0, 2, 5, [m0] ; C84C1600 s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300 v_interp_p2_f32 v19, [v19], v1, 2, 5, [m0] ; C84D1601 v_interp_p1_f32 v20, v0, 3, 5, [m0] ; C8501700 v_interp_p2_f32 v20, [v20], v1, 3, 5, [m0] ; C8511701 s_load_dwordx4 s[0:3], s[4:5], 0xc ; C080050C s_load_dwordx8 s[20:27], s[6:7], 0x18 ; C0CA0718 v_interp_p1_f32 v21, v0, 0, 6, [m0] ; C8541800 v_interp_p2_f32 v21, [v21], v1, 0, 6, [m0] ; C8551801 v_interp_p1_f32 v17, v0, 1, 6, [m0] ; C8441900 v_interp_p2_f32 v17, [v17], v1, 1, 6, [m0] ; C8451901 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[8:11], 0x58 ; C2060958 v_interp_p1_f32 v22, v0, 2, 6, [m0] ; C8581A00 v_interp_p2_f32 v22, [v22], v1, 2, 6, [m0] ; C8591A01 s_load_dwordx4 s[16:19], s[4:5], 0x10 ; C0880510 s_load_dwordx8 s[32:39], s[6:7], 0x20 ; C0D00720 image_sample v[0:1], 10, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[20:27], s[0:3] ; F0800A00 0005000D s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mad_f32 v1, 2.0, v1, -1.0 ; D2820001 03CE02F4 v_mad_f32 v0, 2.0, v0, -1.0 ; D2820000 03CE00F4 s_buffer_load_dword s21, s[8:11], 0x5c ; C20A895C s_buffer_load_dword s0, s[8:11], 0x60 ; C2000960 v_mul_f32_e32 v1, s12, v1 ; 1002020C v_mul_f32_e32 v0, s12, v0 ; 1000000C v_mul_f32_e32 v2, v2, v1 ; 10040302 v_mac_f32_e32 v2, v6, v0 ; 3E040106 v_mul_f32_e32 v4, v4, v1 ; 10080304 v_mac_f32_e32 v4, v7, v0 ; 3E080107 v_mul_f32_e32 v7, v5, v1 ; 100E0305 v_mac_f32_e32 v7, v11, v0 ; 3E0E010B v_mul_f32_e32 v0, v0, v0 ; 10000100 v_mac_f32_e32 v0, v1, v1 ; 3E000301 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_sub_f32_e32 v0, 1.0, v0 ; 080000F2 v_sqrt_f32_e32 v0, v0 ; 7E006700 v_mac_f32_e32 v2, v12, v0 ; 3E04010C v_mac_f32_e32 v4, v15, v0 ; 3E08010F v_mac_f32_e32 v7, v16, v0 ; 3E0E0110 v_mul_f32_e32 v0, v2, v2 ; 10000502 v_mac_f32_e32 v0, v4, v4 ; 3E000904 v_mac_f32_e32 v0, v7, v7 ; 3E000F07 v_rsq_clamp_f32_e32 v0, v0 ; 7E005900 v_mul_f32_e32 v1, v18, v18 ; 10022512 v_mac_f32_e32 v1, v19, v19 ; 3E022713 v_mac_f32_e32 v1, v20, v20 ; 3E022914 v_rsq_clamp_f32_e32 v1, v1 ; 7E025901 v_mul_f32_e32 v6, v0, v2 ; 100C0500 v_mul_f32_e32 v5, v0, v4 ; 100A0900 v_mul_f32_e32 v4, v0, v7 ; 10080F00 v_mul_f32_e32 v12, v1, v18 ; 10182501 v_mul_f32_e32 v11, v1, v19 ; 10162701 v_mul_f32_e32 v0, v12, v6 ; 10000D0C v_mac_f32_e32 v0, v11, v5 ; 3E000B0B v_mul_f32_e32 v7, v1, v20 ; 100E2901 v_mac_f32_e32 v0, v7, v4 ; 3E000907 v_mul_f32_e32 v2, v6, v0 ; 10040106 v_mac_f32_e32 v2, v6, v0 ; 3E040106 v_mul_f32_e32 v15, v5, v0 ; 101E0105 s_load_dwordx4 s[12:15], s[4:5], 0x8 ; C0860508 s_load_dwordx8 s[24:31], s[6:7], 0x10 ; C0CC0710 v_mac_f32_e32 v15, v5, v0 ; 3E1E0105 v_mad_f32 v23, v18, v1, -v2 ; D2820017 840A0312 v_mad_f32 v24, v19, v1, -v15 ; D2820018 843E0313 s_buffer_load_dword s1, s[8:11], 0x4c ; C200894C s_buffer_load_dword s2, s[8:11], 0x4d ; C201094D s_buffer_load_dword s3, s[8:11], 0x4e ; C201894E v_mul_f32_e32 v2, v4, v0 ; 10040104 v_mac_f32_e32 v2, v4, v0 ; 3E040104 v_mad_f32 v25, v20, v1, -v2 ; D2820019 840A0314 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:2], 7, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[24:31], s[12:15] ; F0800700 0066000D s_buffer_load_dword s13, s[8:11], 0x40 ; C2068940 s_buffer_load_dword s14, s[8:11], 0x41 ; C2070941 s_buffer_load_dword s15, s[8:11], 0x42 ; C2078942 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v18, s1, v0 ; 10240001 v_mul_f32_e32 v19, s2, v1 ; 10260202 v_mul_f32_e32 v20, s3, v2 ; 10280403 s_buffer_load_dword s12, s[8:11], 0x27 ; C2060927 s_buffer_load_dword s1, s[8:11], 0x2b ; C200892B s_buffer_load_dword s29, s[8:11], 0x2c ; C20E892C s_buffer_load_dword s30, s[8:11], 0x2d ; C20F092D v_sub_f32_e64 v0, 1.0, s21 ; D2080000 00002AF2 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s13, v0 ; 1004000D v_mul_f32_e32 v1, s14, v0 ; 1002000E v_mul_f32_e32 v0, s15, v0 ; 1000000F v_mac_f32_e32 v2, s21, v18 ; 3E042415 v_mov_b32_e32 v26, v23 ; 7E340317 v_mac_f32_e32 v1, s21, v19 ; 3E022615 v_mov_b32_e32 v27, v24 ; 7E360318 v_mac_f32_e32 v0, s21, v20 ; 3E002815 v_mov_b32_e32 v28, v25 ; 7E380319 v_cmp_lt_f32_e64 s[2:3], 0, s1 ; D0020002 00000280 image_sample v[13:16], 15, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[32:39], s[16:19] ; F0800F00 00880D0D s_waitcnt vmcnt(0) ; BF8C0770 s_and_saveexec_b64 s[22:23], s[2:3] ; BE962402 s_xor_b64 s[22:23], exec, s[22:23] ; 8996167E s_cbranch_execz BB0_2 ; BF880000 s_buffer_load_dword s1, s[8:11], 0x20 ; C2008920 s_buffer_load_dword s2, s[8:11], 0x21 ; C2010921 s_buffer_load_dword s3, s[8:11], 0x22 ; C2018922 s_buffer_load_dword s13, s[8:11], 0x24 ; C2068924 s_buffer_load_dword s14, s[8:11], 0x25 ; C2070925 v_mul_f32_e32 v13, v23, v23 ; 101A2F17 v_mac_f32_e32 v13, v24, v24 ; 3E1A3118 v_mac_f32_e32 v13, v25, v25 ; 3E1A3319 v_rsq_clamp_f32_e32 v13, v13 ; 7E1A590D s_buffer_load_dword s15, s[8:11], 0x26 ; C2078926 s_buffer_load_dword s16, s[8:11], 0x28 ; C2080928 s_buffer_load_dword s17, s[8:11], 0x29 ; C2088929 s_buffer_load_dword s18, s[8:11], 0x2a ; C209092A v_mul_f32_e32 v15, v13, v23 ; 101E2F0D v_mul_f32_e32 v16, v13, v24 ; 1020310D v_mul_f32_e32 v13, v13, v25 ; 101A330D v_rcp_f32_e32 v26, v15 ; 7E34550F s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v27, s1, v21 ; 08362A01 v_sub_f32_e32 v28, s2, v17 ; 08382202 v_rcp_f32_e32 v29, v16 ; 7E3A5510 v_mul_f32_e32 v27, v26, v27 ; 1036371A v_sub_f32_e32 v30, s13, v21 ; 083C2A0D v_mul_f32_e32 v26, v26, v30 ; 10343D1A v_cmp_lt_f32_e32 vcc, 0, v15 ; 7C021E80 v_cndmask_b32_e32 v26, v26, v27 ; 0034371A v_rcp_f32_e32 v27, v13 ; 7E36550D v_mul_f32_e32 v28, v29, v28 ; 1038391D v_sub_f32_e32 v30, s14, v17 ; 083C220E v_mul_f32_e32 v29, v29, v30 ; 103A3D1D v_cmp_lt_f32_e32 vcc, 0, v16 ; 7C022080 v_cndmask_b32_e32 v28, v29, v28 ; 0038391D v_sub_f32_e32 v29, s3, v22 ; 083A2C03 v_mul_f32_e32 v29, v27, v29 ; 103A3B1B v_sub_f32_e32 v30, s15, v22 ; 083C2C0F v_mul_f32_e32 v27, v27, v30 ; 10363D1B v_cmp_lt_f32_e32 vcc, 0, v13 ; 7C021A80 v_cndmask_b32_e32 v27, v27, v29 ; 00363B1B v_min3_f32 v26, v26, v28, v27 ; D2A2001A 046E391A v_mov_b32_e32 v27, s13 ; 7E36020D v_add_f32_e32 v27, s1, v27 ; 06363601 v_mov_b32_e32 v28, s14 ; 7E38020E v_add_f32_e32 v28, s2, v28 ; 06383802 v_mov_b32_e32 v29, s15 ; 7E3A020F v_add_f32_e32 v29, s3, v29 ; 063A3A03 v_mad_f32 v30, 0.5, v27, -s16 ; D282001E 804236F0 v_add_f32_e32 v30, v21, v30 ; 063C3D15 v_mac_f32_e32 v30, v26, v15 ; 3E3C1F1A v_mad_f32 v15, 0.5, v28, -s17 ; D282000F 804638F0 v_add_f32_e32 v15, v17, v15 ; 061E1F11 v_mac_f32_e32 v15, v26, v16 ; 3E1E211A v_mad_f32 v16, 0.5, v29, -s18 ; D2820010 804A3AF0 v_add_f32_e32 v16, v22, v16 ; 06202116 v_mac_f32_e32 v16, v26, v13 ; 3E201B1A v_mad_f32 v26, 0.5, -v27, v30 ; D282001A 447A36F0 v_mad_f32 v27, 0.5, -v28, v15 ; D282001B 443E38F0 v_mad_f32 v28, 0.5, -v29, v16 ; D282001C 44423AF0 s_or_b64 exec, exec, s[22:23] ; 88FE167E s_buffer_load_dword s14, s[8:11], 0x17 ; C2070917 s_buffer_load_dword s15, s[8:11], 0x43 ; C2078943 s_buffer_load_dword s13, s[8:11], 0x68 ; C2068968 s_buffer_load_dword s1, s[8:11], 0x0 ; C2008900 s_buffer_load_dword s2, s[8:11], 0x1 ; C2010901 s_buffer_load_dword s3, s[8:11], 0x2 ; C2018902 s_buffer_load_dword s16, s[8:11], 0x4 ; C2080904 s_buffer_load_dword s17, s[8:11], 0x5 ; C2088905 s_buffer_load_dword s18, s[8:11], 0x6 ; C2090906 s_buffer_load_dword s20, s[8:11], 0x7 ; C20A0907 s_buffer_load_dword s19, s[8:11], 0x8 ; C2098908 s_buffer_load_dword s22, s[8:11], 0x9 ; C20B0909 s_buffer_load_dword s23, s[8:11], 0xa ; C20B890A s_buffer_load_dword s24, s[8:11], 0xb ; C20C090B s_buffer_load_dword s25, s[8:11], 0xc ; C20C890C s_buffer_load_dword s26, s[8:11], 0xd ; C20D090D s_buffer_load_dword s27, s[8:11], 0xe ; C20D890E s_buffer_load_dword s28, s[8:11], 0xf ; C20E090F v_sub_f32_e64 v13, 1.0, s0 ; D208000D 000000F2 v_log_f32_e32 v13, v13 ; 7E1A4F0D s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500 s_load_dwordx8 s[36:43], s[6:7], 0x0 ; C0D20700 v_mul_legacy_f32_e32 v13, 0x3f400000, v13 ; 0E1A1AFF 3F400000 v_exp_f32_e32 v13, v13 ; 7E1A4B0D v_mul_f32_e32 v29, 0x40e00000, v13 ; 103A1AFF 40E00000 v_cubeid_f32 v33, v26, v27, v28 ; D2880021 0472371A v_cubema_f32 v32, v26, v27, v28 ; D28E0020 0472371A v_cubesc_f32 v31, v26, v27, v28 ; D28A001F 0472371A v_cubetc_f32 v30, v26, v27, v28 ; D28C001E 0472371A v_mov_b32_e32 v26, 0x3fc00000 ; 7E3402FF 3FC00000 v_rcp_f32_e64 v13, |v32| ; D354010D 00000120 v_mad_f32 v27, v13, v30, v26 ; D282001B 046A3D0D v_mac_f32_e32 v26, v13, v31 ; 3E343F0D v_mov_b32_e32 v28, v33 ; 7E380321 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[26:29], 15, 0, 0, 0, 0, 0, 0, 0, v[26:29], s[36:43], s[32:35] ; F0900F00 01091A1A s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v13, v29 ; 7E1A4F1D v_mul_legacy_f32_e32 v13, s30, v13 ; 0E1A1A1E v_exp_f32_e32 v13, v13 ; 7E1A4B0D v_mul_f32_e32 v13, s29, v13 ; 101A1A1D v_mul_f32_e32 v16, v26, v13 ; 10201B1A v_mul_f32_e32 v15, v27, v13 ; 101E1B1B v_mul_f32_e32 v13, v28, v13 ; 101A1B1C v_mov_b32_e32 v27, s21 ; 7E360215 v_mov_b32_e32 v26, 0x3f7fff58 ; 7E3402FF 3F7FFF58 v_cmp_lt_f32_e32 vcc, s12, v26 ; 7C02340C s_and_saveexec_b64 s[30:31], vcc ; BE9E246A s_xor_b64 s[30:31], exec, s[30:31] ; 899E1E7E s_cbranch_execz BB0_6 ; BF880000 s_buffer_load_dword s32, s[8:11], 0x3b ; C210093B s_buffer_load_dword s21, s[8:11], 0x3c ; C20A893C s_buffer_load_dword s29, s[8:11], 0x3d ; C20E893D s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_lt_f32_e64 s[32:33], 0, s32 ; D0020020 00004080 s_and_saveexec_b64 s[32:33], s[32:33] ; BEA02420 s_xor_b64 s[32:33], exec, s[32:33] ; 89A0207E s_cbranch_execz BB0_7 ; BF880000 s_buffer_load_dword s34, s[8:11], 0x36 ; C2110936 s_buffer_load_dword s35, s[8:11], 0x38 ; C2118938 s_buffer_load_dword s36, s[8:11], 0x39 ; C2120939 s_buffer_load_dword s37, s[8:11], 0x3a ; C212893A s_buffer_load_dword s38, s[8:11], 0x30 ; C2130930 s_buffer_load_dword s39, s[8:11], 0x31 ; C2138931 s_buffer_load_dword s40, s[8:11], 0x32 ; C2140932 s_buffer_load_dword s41, s[8:11], 0x34 ; C2148934 s_buffer_load_dword s42, s[8:11], 0x35 ; C2150935 v_mul_f32_e32 v26, v23, v23 ; 10342F17 v_mac_f32_e32 v26, v24, v24 ; 3E343118 v_mac_f32_e32 v26, v25, v25 ; 3E343319 v_rsq_clamp_f32_e32 v26, v26 ; 7E34591A s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v28, s34, v22 ; 08382C22 v_mov_b32_e32 v29, s34 ; 7E3A0222 v_sub_f32_e32 v30, s38, v21 ; 083C2A26 v_sub_f32_e32 v31, s39, v17 ; 083E2227 v_add_f32_e32 v29, s40, v29 ; 063A3A28 v_sub_f32_e32 v32, s40, v22 ; 08402C28 v_mad_f32 v33, 0.5, v29, -s37 ; D2820021 80963AF0 v_add_f32_e32 v22, v22, v33 ; 062C4316 v_mul_f32_e32 v23, v26, v23 ; 102E2F1A v_mul_f32_e32 v24, v26, v24 ; 1030311A v_mul_f32_e32 v25, v26, v25 ; 1032331A v_rcp_f32_e32 v26, v23 ; 7E345517 v_rcp_f32_e32 v33, v24 ; 7E425518 v_rcp_f32_e32 v34, v25 ; 7E445519 v_sub_f32_e32 v35, s41, v21 ; 08462A29 v_mov_b32_e32 v36, s41 ; 7E480229 v_add_f32_e32 v36, s38, v36 ; 06484826 v_mul_f32_e32 v30, v26, v30 ; 103C3D1A v_mul_f32_e32 v26, v26, v35 ; 1034471A v_mul_f32_e32 v31, v33, v31 ; 103E3F21 v_mul_f32_e32 v32, v34, v32 ; 10404122 v_mul_f32_e32 v28, v34, v28 ; 10383922 v_mad_f32 v34, 0.5, v36, -s35 ; D2820022 808E48F0 v_add_f32_e32 v21, v21, v34 ; 062A4515 v_sub_f32_e32 v34, s42, v17 ; 0844222A v_mov_b32_e32 v35, s42 ; 7E46022A v_mul_f32_e32 v33, v33, v34 ; 10424521 v_add_f32_e32 v34, s39, v35 ; 06444627 v_cmp_lt_f32_e32 vcc, 0, v23 ; 7C022E80 v_cndmask_b32_e32 v26, v26, v30 ; 00343D1A v_cmp_lt_f32_e32 vcc, 0, v24 ; 7C023080 v_cndmask_b32_e32 v30, v33, v31 ; 003C3F21 v_cmp_lt_f32_e32 vcc, 0, v25 ; 7C023280 v_cndmask_b32_e32 v28, v28, v32 ; 0038411C v_min3_f32 v26, v26, v30, v28 ; D2A2001A 04723D1A v_mad_f32 v28, 0.5, v34, -s36 ; D282001C 809244F0 v_add_f32_e32 v17, v17, v28 ; 06223911 v_mac_f32_e32 v21, v26, v23 ; 3E2A2F1A v_mac_f32_e32 v17, v26, v24 ; 3E22311A v_mac_f32_e32 v22, v26, v25 ; 3E2C331A v_mad_f32 v23, 0.5, -v36, v21 ; D2820017 445648F0 v_mad_f32 v24, 0.5, -v34, v17 ; D2820018 444644F0 v_mad_f32 v25, 0.5, -v29, v22 ; D2820019 445A3AF0 s_or_b64 exec, exec, s[32:33] ; 88FE207E v_sub_f32_e64 v17, 1.0, s0 ; D2080011 000000F2 v_log_f32_e32 v17, v17 ; 7E224F11 s_load_dwordx4 s[32:35], s[4:5], 0x4 ; C0900504 v_mul_legacy_f32_e32 v17, 0x3f400000, v17 ; 0E2222FF 3F400000 v_exp_f32_e32 v17, v17 ; 7E224B11 v_mul_f32_e32 v26, 0x40e00000, v17 ; 103422FF 40E00000 v_cubeid_f32 v31, v23, v24, v25 ; D288001F 04663117 v_cubema_f32 v30, v23, v24, v25 ; D28E001E 04663117 s_load_dwordx8 s[36:43], s[6:7], 0x8 ; C0D20708 v_cubesc_f32 v29, v23, v24, v25 ; D28A001D 04663117 v_cubetc_f32 v28, v23, v24, v25 ; D28C001C 04663117 v_rcp_f32_e64 v17, |v30| ; D3540111 0000011E v_mov_b32_e32 v23, 0x3fc00000 ; 7E2E02FF 3FC00000 v_mad_f32 v24, v17, v28, v23 ; D2820018 045E3911 v_mac_f32_e32 v23, v17, v29 ; 3E2E3B11 v_mov_b32_e32 v25, v31 ; 7E32031F s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[21:24], 15, 0, 0, 0, 0, 0, 0, 0, v[23:26], s[36:43], s[32:35] ; F0900F00 01091517 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v17, v24 ; 7E224F18 v_sub_f32_e64 v24, 1.0, s12 ; D2080018 000018F2 v_mul_legacy_f32_e32 v17, s29, v17 ; 0E22221D v_exp_f32_e32 v17, v17 ; 7E224B11 v_mul_f32_e32 v17, s21, v17 ; 10222215 v_mul_f32_e32 v21, v21, v17 ; 102A2315 v_mul_f32_e32 v22, v22, v17 ; 102C2316 v_mul_f32_e32 v17, v23, v17 ; 10222317 v_mul_f32_e32 v21, v21, v24 ; 102A3115 v_mul_f32_e32 v22, v22, v24 ; 102C3116 v_mul_f32_e32 v17, v17, v24 ; 10223111 v_mac_f32_e32 v21, s12, v16 ; 3E2A200C v_mac_f32_e32 v22, s12, v15 ; 3E2C1E0C v_mac_f32_e32 v17, s12, v13 ; 3E221A0C v_mov_b32_e32 v13, v17 ; 7E1A0311 v_mov_b32_e32 v15, v22 ; 7E1E0316 v_mov_b32_e32 v16, v21 ; 7E200315 s_or_b64 exec, exec, s[30:31] ; 88FE1E7E v_mad_f32 v22, -v27, s15, s15 ; D2820016 203C1F1B v_mov_b32_e32 v17, s14 ; 7E22020E v_mul_f32_e32 v21, v22, v18 ; 102A2516 v_mul_f32_e32 v19, v22, v19 ; 10262716 v_mul_f32_e32 v18, v22, v20 ; 10242916 v_mul_f32_e32 v20, s17, v5 ; 10280A11 v_mac_f32_e32 v20, s16, v6 ; 3E280C10 v_mac_f32_e32 v20, s18, v4 ; 3E280812 v_add_f32_e32 v20, s20, v20 ; 06282814 v_add_f32_e32 v23, v20, v8 ; 062E1114 v_mul_f32_e32 v8, s22, v5 ; 10100A16 v_mac_f32_e32 v8, s19, v6 ; 3E100C13 v_mac_f32_e32 v8, s23, v4 ; 3E100817 v_add_f32_e32 v8, s24, v8 ; 06101018 v_add_f32_e32 v9, v8, v9 ; 06121308 v_mul_f32_e32 v8, s26, v5 ; 10100A1A v_mac_f32_e32 v8, s25, v6 ; 3E100C19 v_mac_f32_e32 v8, s27, v4 ; 3E10081B v_add_f32_e32 v8, s28, v8 ; 0610101C v_add_f32_e32 v10, v8, v10 ; 06141508 s_buffer_load_dword s6, s[8:11], 0x10 ; C2030910 s_buffer_load_dword s5, s[8:11], 0x11 ; C2028911 s_buffer_load_dword s4, s[8:11], 0x12 ; C2020912 s_buffer_load_dword s17, s[8:11], 0x16 ; C2088916 s_buffer_load_dword s14, s[8:11], 0x44 ; C2070944 s_buffer_load_dword s7, s[8:11], 0x45 ; C2038945 s_buffer_load_dword s12, s[8:11], 0x46 ; C2060946 s_buffer_load_dword s15, s[8:11], 0x48 ; C2078948 s_buffer_load_dword s16, s[8:11], 0x49 ; C2080949 s_buffer_load_dword s8, s[8:11], 0x4b ; C204094B v_sub_f32_e64 v20, 1.0, s13 ; D2080014 00001AF2 v_mac_f32_e32 v20, s13, v14 ; 3E281C0D v_mul_f32_e32 v8, s1, v6 ; 10100C01 v_mac_f32_e32 v8, s2, v5 ; 3E100A02 v_mac_f32_e32 v8, s3, v4 ; 3E100803 v_max_f32_e32 v8, 0, v8 ; 20101080 v_mul_f32_e32 v14, v20, v23 ; 101C2F14 v_mul_f32_e32 v9, v20, v9 ; 10121314 v_mul_f32_e32 v10, v20, v10 ; 10141514 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v17, s17, v3 ; 3E220611 v_mul_f32_e32 v3, v20, v16 ; 10062114 v_mul_f32_e32 v15, v20, v15 ; 101E1F14 v_mul_f32_e32 v13, v20, v13 ; 101A1B14 v_sub_f32_e32 v16, 1.0, v22 ; 08202CF2 v_add_f32_e32 v16, s0, v16 ; 06202000 v_sub_f32_e64 v20, 1.0, s0 ; D2080014 000000F2 v_add_f32_e64 v16, 0, v16 clamp ; D2060810 00022080 v_sub_f32_e32 v22, s1, v12 ; 082C1801 v_sub_f32_e32 v23, s2, v11 ; 082E1602 v_mul_f32_e32 v24, v22, v22 ; 10302D16 v_mac_f32_e32 v24, v23, v23 ; 3E302F17 v_sub_f32_e32 v25, s3, v7 ; 08320E03 v_mac_f32_e32 v24, v25, v25 ; 3E303319 v_rsq_clamp_f32_e32 v24, v24 ; 7E305918 v_mul_f32_e32 v22, v24, v22 ; 102C2D18 v_mul_f32_e32 v23, v24, v23 ; 102E2F18 v_mul_f32_e32 v24, v24, v25 ; 10303318 v_mul_f32_e32 v12, v12, v6 ; 10180D0C v_mad_f32 v11, -v11, v5, -v12 ; D282000B A4320B0B v_mul_f32_e32 v6, v22, v6 ; 100C0D16 v_mac_f32_e32 v6, v23, v5 ; 3E0C0B17 v_mul_f32_e32 v5, s1, v22 ; 100A2C01 v_mac_f32_e32 v5, s2, v23 ; 3E0A2E02 v_mad_f32 v7, -v7, v4, v11 ; D2820007 242E0907 v_mac_f32_e32 v5, s3, v24 ; 3E0A3003 v_mac_f32_e32 v6, v24, v4 ; 3E0C0918 v_max_f32_e32 v4, 0, v5 ; 20080A80 v_sub_f32_e32 v5, 1.0, v4 ; 080A08F2 v_mul_f32_e32 v11, v5, v5 ; 10160B05 v_mul_f32_e32 v5, v5, v11 ; 100A1705 v_mul_f32_e32 v5, v5, v11 ; 100A1705 v_max_f32_e32 v7, 0, v7 ; 200E0E80 v_sub_f32_e32 v11, 1.0, v7 ; 08160EF2 v_mul_f32_e32 v12, v11, v11 ; 1018170B v_mul_f32_e32 v22, v11, v12 ; 102C190B v_mad_f32 v23, -v12, v22, 1.0 ; D2820017 23CA2D0C v_mul_f32_e32 v24, v2, v23 ; 10302F02 v_sub_f32_e32 v25, 1.0, v2 ; 083204F2 v_mac_f32_e32 v2, v5, v25 ; 3E043305 v_mul_f32_e32 v25, v1, v23 ; 10322F01 v_sub_f32_e32 v26, 1.0, v1 ; 083402F2 v_mac_f32_e32 v1, v5, v26 ; 3E023505 v_mul_f32_e32 v23, v0, v23 ; 102E2F00 v_sub_f32_e32 v26, 1.0, v0 ; 083400F2 v_mac_f32_e32 v0, v5, v26 ; 3E003505 v_sub_f32_e32 v5, 1.0, v20 ; 080A28F2 v_mov_b32_e32 v26, 0x3cf5c28f ; 7E3402FF 3CF5C28F v_madmk_f32_e32 v5, v5, v26, 0x3f77ced9 ; 400A3505 3F77CED9 v_add_f32_e32 v26, v4, v4 ; 06340904 v_mul_f32_e32 v4, v20, v4 ; 10080914 v_mad_f32 v4, v26, v4, 0.5 ; D2820004 03C2091A v_mul_f32_e32 v12, v22, v12 ; 10181916 v_mac_f32_e32 v24, v16, v12 ; 3E301910 v_mac_f32_e32 v25, v16, v12 ; 3E321910 v_mac_f32_e32 v23, v16, v12 ; 3E2E1910 v_mul_f32_e32 v16, v20, v20 ; 10202914 v_log_f32_e32 v5, v5 ; 7E0A4F05 v_mul_f32_e32 v16, s8, v16 ; 10202008 v_mul_f32_e32 v11, v16, v11 ; 10161710 v_mac_f32_e32 v11, 1.0, v7 ; 3E160EF2 v_rcp_f32_e32 v5, v5 ; 7E0A5505 v_sub_f32_e32 v7, 1.0, v8 ; 080E10F2 v_mul_f32_e32 v16, v16, v7 ; 10200F10 v_mac_f32_e32 v16, 1.0, v8 ; 3E2010F2 v_max_f32_e32 v6, 0, v6 ; 200C0C80 v_log_f32_e32 v6, v6 ; 7E0C4F06 v_madak_f32_e32 v11, v16, v11, 0x38d1b717 ; 42161710 38D1B717 v_mul_f32_e32 v5, 0x41200000, v5 ; 100A0AFF 41200000 v_mul_f32_e32 v16, v5, v5 ; 10200B05 v_mul_legacy_f32_e32 v6, v16, v6 ; 0E0C0D10 v_rcp_f32_e32 v11, v11 ; 7E16550B v_mad_f32 v5, v5, v5, 1.0 ; D2820005 03CA0B05 v_mul_f32_e32 v5, s16, v5 ; 100A0A10 v_exp_f32_e32 v6, v6 ; 7E0C4B06 v_mul_f32_e32 v5, v5, v6 ; 100A0D05 v_mul_f32_e32 v5, v5, v11 ; 100A1705 v_mul_f32_e32 v5, v8, v5 ; 100A0B08 v_mul_f32_e32 v5, s15, v5 ; 100A0A0F v_mul_f32_e32 v6, v7, v7 ; 100C0F07 v_mul_f32_e32 v7, v7, v6 ; 100E0D07 v_mul_f32_e32 v6, v7, v6 ; 100C0D07 v_add_f32_e32 v4, -1.0, v4 ; 060808F3 v_mad_f32 v6, v4, v6, 1.0 ; D2820006 03CA0D04 v_mad_f32 v4, v4, v12, 1.0 ; D2820004 03CA1904 v_mul_f32_e32 v4, v4, v6 ; 10080D04 v_mul_f32_e32 v4, v8, v4 ; 10080908 v_mac_f32_e32 v14, s14, v4 ; 3E1C080E v_mul_f32_e32 v6, v14, v21 ; 100C2B0E v_max_f32_e32 v5, 0, v5 ; 200A0A80 v_mul_f32_e32 v7, s14, v5 ; 100E0A0E v_mac_f32_e32 v6, v2, v7 ; 3E0C0F02 v_mac_f32_e32 v9, s7, v4 ; 3E120807 v_mac_f32_e32 v10, s12, v4 ; 3E14080C v_mul_f32_e32 v2, s7, v5 ; 10040A07 v_mul_f32_e32 v4, s12, v5 ; 10080A0C v_mul_f32_e32 v5, v9, v19 ; 100A2709 v_mul_f32_e32 v7, v10, v18 ; 100E250A v_mac_f32_e32 v5, v1, v2 ; 3E0A0501 v_mac_f32_e32 v7, v0, v4 ; 3E0E0900 v_mac_f32_e32 v6, v24, v3 ; 3E0C0718 v_mac_f32_e32 v5, v25, v15 ; 3E0A1F19 v_mac_f32_e32 v7, v23, v13 ; 3E0E1B17 v_add_f32_e64 v0, 0, v17 clamp ; D2060800 00022280 v_sub_f32_e32 v1, 1.0, v0 ; 080200F2 v_mul_f32_e32 v2, s6, v1 ; 10040206 v_mac_f32_e32 v2, v6, v0 ; 3E040106 v_mul_f32_e32 v3, s5, v1 ; 10060205 v_mac_f32_e32 v3, v5, v0 ; 3E060105 v_mul_f32_e32 v1, s4, v1 ; 10020204 v_mac_f32_e32 v1, v7, v0 ; 3E020107 v_cvt_pkrtz_f16_f32_e32 v0, v2, v3 ; 5E000702 v_cvt_pkrtz_f16_f32_e64 v1, v1, 1.0 ; D25E0001 0001E501 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 48 VGPRS: 40 Code Size: 2272 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL OUT[5], GENERIC[4] DCL CONST[0..19] DCL TEMP[0..7], LOCAL IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MUL TEMP[0], CONST[5], IN[0].xxxx 1: MAD TEMP[0], CONST[6], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[7], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0].xyz, CONST[8], IN[0].wwww, TEMP[0] 4: MUL TEMP[1], CONST[16], IN[0].xxxx 5: MAD TEMP[1], CONST[17], IN[0].yyyy, TEMP[1] 6: MAD TEMP[1], CONST[18], IN[0].zzzz, TEMP[1] 7: MAD TEMP[1], CONST[19], IN[0].wwww, TEMP[1] 8: MAD TEMP[2].xy, IN[2].xyyy, CONST[13].xyyy, CONST[13].zwww 9: FSEQ TEMP[3].x, CONST[15].xxxx, IMM[0].xxxx 10: UIF TEMP[3].xxxx :0 11: MOV TEMP[3].xy, IN[2].xyxx 12: ELSE :0 13: MOV TEMP[3].xy, IN[3].xyxx 14: ENDIF 15: MAD TEMP[3].xy, TEMP[3].xyyy, CONST[14].xyyy, CONST[14].zwww 16: MOV TEMP[2].zw, TEMP[3].yyxy 17: MOV TEMP[3].x, CONST[9].xxxx 18: MOV TEMP[3].y, CONST[10].xxxx 19: MOV TEMP[3].z, CONST[11].xxxx 20: MOV TEMP[4].x, CONST[9].yyyy 21: MOV TEMP[4].y, CONST[10].yyyy 22: MOV TEMP[4].z, CONST[11].yyyy 23: MOV TEMP[5].x, CONST[9].zzzz 24: MOV TEMP[5].y, CONST[10].zzzz 25: MOV TEMP[5].z, CONST[11].zzzz 26: MUL TEMP[3].xyz, TEMP[3].xyzz, IN[1].xxxx 27: MAD TEMP[3].xyz, TEMP[4].xyzz, IN[1].yyyy, TEMP[3].xyzz 28: MAD TEMP[3].xyz, TEMP[5].xyzz, IN[1].zzzz, TEMP[3].xyzz 29: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz 30: RSQ TEMP[4].x, TEMP[4].xxxx 31: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx 32: MOV TEMP[4].xyz, TEMP[3].xyzx 33: MUL TEMP[5], TEMP[3].xyzz, TEMP[3].yzzx 34: DP4 TEMP[6].x, CONST[1], TEMP[5] 35: DP4 TEMP[7].x, CONST[2], TEMP[5] 36: MOV TEMP[6].y, TEMP[7].xxxx 37: DP4 TEMP[5].x, CONST[3], TEMP[5] 38: MOV TEMP[6].z, TEMP[5].xxxx 39: MUL TEMP[5].x, TEMP[3].yyyy, TEMP[3].yyyy 40: MAD TEMP[3].x, TEMP[3].xxxx, TEMP[3].xxxx, -TEMP[5].xxxx 41: MAD TEMP[3].xyz, CONST[4].xyzz, TEMP[3].xxxx, TEMP[6].xyzz 42: ADD TEMP[5].xyz, TEMP[0].xyzz, -CONST[0].xyzz 43: MOV TEMP[5].yzw, TEMP[5].yxyz 44: MOV TEMP[5].x, TEMP[1].zzzz 45: MOV TEMP[0].xyz, TEMP[0].xyzx 46: MOV OUT[5], TEMP[0] 47: MOV OUT[1], TEMP[2] 48: MOV OUT[2], TEMP[4] 49: MOV OUT[3], TEMP[3] 50: MOV OUT[0], TEMP[1] 51: MOV OUT[4], TEMP[5] 52: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236) %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240) %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256) %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260) %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264) %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 268) %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272) %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276) %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280) %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284) %72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288) %73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292) %74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296) %75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300) %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304) %77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308) %78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312) %79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316) %80 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %81 = load <16 x i8>, <16 x i8> addrspace(2)* %80, align 16, !tbaa !0 %82 = add i32 %5, %7 %83 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %81, i32 0, i32 %82) %84 = extractelement <4 x float> %83, i32 0 %85 = extractelement <4 x float> %83, i32 1 %86 = extractelement <4 x float> %83, i32 2 %87 = extractelement <4 x float> %83, i32 3 %88 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %89 = load <16 x i8>, <16 x i8> addrspace(2)* %88, align 16, !tbaa !0 %90 = add i32 %5, %7 %91 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %89, i32 0, i32 %90) %92 = extractelement <4 x float> %91, i32 0 %93 = extractelement <4 x float> %91, i32 1 %94 = extractelement <4 x float> %91, i32 2 %95 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %96 = load <16 x i8>, <16 x i8> addrspace(2)* %95, align 16, !tbaa !0 %97 = add i32 %5, %7 %98 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %96, i32 0, i32 %97) %99 = extractelement <4 x float> %98, i32 0 %100 = extractelement <4 x float> %98, i32 1 %101 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %102 = load <16 x i8>, <16 x i8> addrspace(2)* %101, align 16, !tbaa !0 %103 = add i32 %5, %7 %104 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %102, i32 0, i32 %103) %105 = extractelement <4 x float> %104, i32 0 %106 = extractelement <4 x float> %104, i32 1 %107 = fmul float %31, %84 %108 = fmul float %32, %84 %109 = fmul float %33, %84 %110 = fmul float %34, %84 %111 = fmul float %35, %85 %112 = fadd float %111, %107 %113 = fmul float %36, %85 %114 = fadd float %113, %108 %115 = fmul float %37, %85 %116 = fadd float %115, %109 %117 = fmul float %38, %85 %118 = fadd float %117, %110 %119 = fmul float %39, %86 %120 = fadd float %119, %112 %121 = fmul float %40, %86 %122 = fadd float %121, %114 %123 = fmul float %41, %86 %124 = fadd float %123, %116 %125 = fmul float %42, %86 %126 = fadd float %125, %118 %127 = fmul float %43, %87 %128 = fadd float %127, %120 %129 = fmul float %44, %87 %130 = fadd float %129, %122 %131 = fmul float %45, %87 %132 = fadd float %131, %124 %133 = fmul float %64, %84 %134 = fmul float %65, %84 %135 = fmul float %66, %84 %136 = fmul float %67, %84 %137 = fmul float %68, %85 %138 = fadd float %137, %133 %139 = fmul float %69, %85 %140 = fadd float %139, %134 %141 = fmul float %70, %85 %142 = fadd float %141, %135 %143 = fmul float %71, %85 %144 = fadd float %143, %136 %145 = fmul float %72, %86 %146 = fadd float %145, %138 %147 = fmul float %73, %86 %148 = fadd float %147, %140 %149 = fmul float %74, %86 %150 = fadd float %149, %142 %151 = fmul float %75, %86 %152 = fadd float %151, %144 %153 = fmul float %76, %87 %154 = fadd float %153, %146 %155 = fmul float %77, %87 %156 = fadd float %155, %148 %157 = fmul float %78, %87 %158 = fadd float %157, %150 %159 = fmul float %79, %87 %160 = fadd float %159, %152 %161 = fmul float %99, %55 %162 = fadd float %161, %57 %163 = fmul float %100, %56 %164 = fadd float %163, %58 %165 = fcmp oeq float %63, 0.000000e+00 %. = select i1 %165, float %99, float %105 %.32 = select i1 %165, float %100, float %106 %166 = fmul float %., %59 %167 = fadd float %166, %61 %168 = fmul float %.32, %60 %169 = fadd float %168, %62 %170 = fmul float %46, %92 %171 = fmul float %49, %92 %172 = fmul float %52, %92 %173 = fmul float %47, %93 %174 = fadd float %173, %170 %175 = fmul float %50, %93 %176 = fadd float %175, %171 %177 = fmul float %53, %93 %178 = fadd float %177, %172 %179 = fmul float %48, %94 %180 = fadd float %179, %174 %181 = fmul float %51, %94 %182 = fadd float %181, %176 %183 = fmul float %54, %94 %184 = fadd float %183, %178 %185 = fmul float %180, %180 %186 = fmul float %182, %182 %187 = fadd float %186, %185 %188 = fmul float %184, %184 %189 = fadd float %187, %188 %190 = call float @llvm.AMDGPU.rsq.clamped.f32(float %189) %191 = fmul float %180, %190 %192 = fmul float %182, %190 %193 = fmul float %184, %190 %194 = fmul float %191, %192 %195 = fmul float %192, %193 %196 = fmul float %193, %193 %197 = fmul float %193, %191 %198 = fmul float %16, %194 %199 = fmul float %17, %195 %200 = fadd float %198, %199 %201 = fmul float %18, %196 %202 = fadd float %200, %201 %203 = fmul float %19, %197 %204 = fadd float %202, %203 %205 = fmul float %20, %194 %206 = fmul float %21, %195 %207 = fadd float %205, %206 %208 = fmul float %22, %196 %209 = fadd float %207, %208 %210 = fmul float %23, %197 %211 = fadd float %209, %210 %212 = fmul float %24, %194 %213 = fmul float %25, %195 %214 = fadd float %212, %213 %215 = fmul float %26, %196 %216 = fadd float %214, %215 %217 = fmul float %27, %197 %218 = fadd float %216, %217 %219 = fmul float %192, %192 %220 = fmul float %191, %191 %221 = fsub float %220, %219 %222 = fmul float %28, %221 %223 = fadd float %222, %204 %224 = fmul float %29, %221 %225 = fadd float %224, %211 %226 = fmul float %30, %221 %227 = fadd float %226, %218 %228 = fsub float %128, %13 %229 = fsub float %130, %14 %230 = fsub float %132, %15 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %162, float %164, float %167, float %169) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %191, float %192, float %193, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %223, float %225, float %227, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %158, float %228, float %229, float %230) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %128, float %130, float %132, float %126) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %154, float %156, float %158, float %160) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0 ; 7E020280 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[20:23], s[2:3], 0x0 ; C08A0300 s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 s_load_dwordx4 s[12:15], s[8:9], 0x8 ; C0860908 s_load_dwordx4 s[8:11], s[8:9], 0xc ; C084090C s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s18, s[20:23], 0x20 ; C2091520 buffer_load_format_xyzw v[2:5], v0, s[0:3], 0 idxen ; E00C2000 80000200 buffer_load_format_xyzw v[6:9], v0, s[4:7], 0 idxen ; E00C2000 80010600 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[9:12], v0, s[12:15], 0 idxen ; E00C2000 80030900 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[11:14], v0, s[8:11], 0 idxen ; E00C2000 80020B00 s_buffer_load_dword s19, s[20:23], 0x21 ; C2099521 s_buffer_load_dword s24, s[20:23], 0x22 ; C20C1522 s_buffer_load_dword s25, s[20:23], 0x24 ; C20C9524 s_buffer_load_dword s26, s[20:23], 0x25 ; C20D1525 s_buffer_load_dword s27, s[20:23], 0x26 ; C20D9526 s_buffer_load_dword s28, s[20:23], 0x28 ; C20E1528 s_buffer_load_dword s29, s[20:23], 0x29 ; C20E9529 s_buffer_load_dword s30, s[20:23], 0x2a ; C20F152A s_buffer_load_dword s31, s[20:23], 0x2c ; C20F952C s_buffer_load_dword s32, s[20:23], 0x2d ; C210152D s_buffer_load_dword s33, s[20:23], 0x2e ; C210952E s_buffer_load_dword s34, s[20:23], 0x34 ; C2111534 s_buffer_load_dword s35, s[20:23], 0x35 ; C2119535 s_buffer_load_dword s5, s[20:23], 0x36 ; C2029536 s_buffer_load_dword s0, s[20:23], 0x0 ; C2001500 s_buffer_load_dword s1, s[20:23], 0x1 ; C2009501 s_buffer_load_dword s2, s[20:23], 0x2 ; C2011502 s_buffer_load_dword s6, s[20:23], 0x4 ; C2031504 s_buffer_load_dword s14, s[20:23], 0x5 ; C2071505 s_buffer_load_dword s4, s[20:23], 0x6 ; C2021506 s_buffer_load_dword s3, s[20:23], 0x7 ; C2019507 s_buffer_load_dword s12, s[20:23], 0x8 ; C2061508 s_buffer_load_dword s16, s[20:23], 0x9 ; C2081509 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v0, s5 ; 7E000205 s_buffer_load_dword s7, s[20:23], 0xa ; C203950A s_buffer_load_dword s5, s[20:23], 0xb ; C202950B s_buffer_load_dword s15, s[20:23], 0xc ; C207950C s_buffer_load_dword s17, s[20:23], 0xd ; C208950D s_buffer_load_dword s13, s[20:23], 0xe ; C206950E s_buffer_load_dword s8, s[20:23], 0xf ; C204150F s_buffer_load_dword s36, s[20:23], 0x3c ; C212153C s_buffer_load_dword s37, s[20:23], 0x40 ; C2129540 s_buffer_load_dword s38, s[20:23], 0x41 ; C2131541 s_buffer_load_dword s39, s[20:23], 0x42 ; C2139542 s_buffer_load_dword s40, s[20:23], 0x43 ; C2141543 s_buffer_load_dword s9, s[20:23], 0x10 ; C2049510 s_buffer_load_dword s10, s[20:23], 0x11 ; C2051511 s_buffer_load_dword s11, s[20:23], 0x12 ; C2059512 s_buffer_load_dword s41, s[20:23], 0x14 ; C2149514 s_buffer_load_dword s42, s[20:23], 0x15 ; C2151515 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_eq_f32_e64 vcc, 0, s36 ; D004006A 00004880 s_buffer_load_dword s36, s[20:23], 0x16 ; C2121516 s_buffer_load_dword s43, s[20:23], 0x17 ; C2159517 s_buffer_load_dword s44, s[20:23], 0x18 ; C2161518 s_buffer_load_dword s45, s[20:23], 0x19 ; C2169519 s_buffer_load_dword s46, s[20:23], 0x1a ; C217151A s_buffer_load_dword s47, s[20:23], 0x37 ; C2179537 s_buffer_load_dword s48, s[20:23], 0x38 ; C2181538 s_buffer_load_dword s49, s[20:23], 0x39 ; C2189539 s_buffer_load_dword s50, s[20:23], 0x3a ; C219153A s_buffer_load_dword s51, s[20:23], 0x3b ; C219953B s_buffer_load_dword s52, s[20:23], 0x1b ; C21A151B s_buffer_load_dword s53, s[20:23], 0x1c ; C21A951C s_buffer_load_dword s54, s[20:23], 0x1d ; C21B151D s_buffer_load_dword s55, s[20:23], 0x1e ; C21B951E s_buffer_load_dword s56, s[20:23], 0x1f ; C21C151F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v13, s47 ; 7E1A022F s_buffer_load_dword s47, s[20:23], 0x44 ; C2179544 s_buffer_load_dword s57, s[20:23], 0x45 ; C21C9545 s_buffer_load_dword s58, s[20:23], 0x46 ; C21D1546 v_mov_b32_e32 v14, s50 ; 7E1C0232 s_buffer_load_dword s50, s[20:23], 0x47 ; C2191547 v_mov_b32_e32 v15, s51 ; 7E1E0233 s_buffer_load_dword s51, s[20:23], 0x48 ; C2199548 s_buffer_load_dword s59, s[20:23], 0x49 ; C21D9549 s_buffer_load_dword s60, s[20:23], 0x4a ; C21E154A s_buffer_load_dword s61, s[20:23], 0x4b ; C21E954B s_buffer_load_dword s62, s[20:23], 0x4c ; C21F154C s_buffer_load_dword s63, s[20:23], 0x4d ; C21F954D s_buffer_load_dword s64, s[20:23], 0x4e ; C220154E s_buffer_load_dword s20, s[20:23], 0x4f ; C20A154F v_mul_f32_e32 v16, s41, v2 ; 10200429 v_mac_f32_e32 v0, s34, v9 ; 3E001222 v_mul_f32_e32 v17, s42, v2 ; 1022042A v_mul_f32_e32 v18, s36, v2 ; 10240424 v_mul_f32_e32 v19, s43, v2 ; 1026042B v_mac_f32_e32 v13, s35, v10 ; 3E1A1423 v_mul_f32_e32 v20, s25, v6 ; 10280C19 v_mul_f32_e32 v21, s28, v6 ; 102A0C1C v_mul_f32_e32 v6, s31, v6 ; 100C0C1F v_mac_f32_e32 v16, s44, v3 ; 3E20062C v_mac_f32_e32 v17, s45, v3 ; 3E22062D v_mac_f32_e32 v18, s46, v3 ; 3E24062E v_mac_f32_e32 v20, s26, v7 ; 3E280E1A v_mac_f32_e32 v21, s29, v7 ; 3E2A0E1D v_cndmask_b32_e32 v9, v11, v9 ; 0012130B v_cndmask_b32_e32 v10, v12, v10 ; 0014150C v_mac_f32_e32 v6, s32, v7 ; 3E0C0E20 v_mac_f32_e32 v20, s27, v8 ; 3E28101B v_mac_f32_e32 v21, s30, v8 ; 3E2A101E v_mac_f32_e32 v6, s33, v8 ; 3E0C1021 v_mac_f32_e32 v19, s52, v3 ; 3E260634 v_mul_f32_e32 v7, s37, v2 ; 100E0425 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v7, s47, v3 ; 3E0E062F v_mul_f32_e32 v8, s38, v2 ; 10100426 v_mac_f32_e32 v8, s57, v3 ; 3E100639 v_mul_f32_e32 v11, s39, v2 ; 10160427 v_mac_f32_e32 v11, s58, v3 ; 3E16063A v_mul_f32_e32 v2, s40, v2 ; 10040428 v_mac_f32_e32 v2, s50, v3 ; 3E040632 v_mac_f32_e32 v16, s53, v4 ; 3E200835 v_mac_f32_e32 v17, s54, v4 ; 3E220836 v_mac_f32_e32 v18, s55, v4 ; 3E240837 v_mac_f32_e32 v19, s56, v4 ; 3E260838 v_mac_f32_e32 v7, s51, v4 ; 3E0E0833 v_mac_f32_e32 v8, s59, v4 ; 3E10083B v_mac_f32_e32 v11, s60, v4 ; 3E16083C v_mac_f32_e32 v2, s61, v4 ; 3E04083D v_mac_f32_e32 v16, s18, v5 ; 3E200A12 v_mac_f32_e32 v17, s19, v5 ; 3E220A13 v_mac_f32_e32 v18, s24, v5 ; 3E240A18 v_mac_f32_e32 v7, s62, v5 ; 3E0E0A3E v_mac_f32_e32 v8, s63, v5 ; 3E100A3F v_mac_f32_e32 v11, s64, v5 ; 3E160A40 v_mac_f32_e32 v2, s20, v5 ; 3E040A14 v_mul_f32_e32 v3, v20, v20 ; 10062914 v_mac_f32_e32 v3, v21, v21 ; 3E062B15 v_mac_f32_e32 v3, v6, v6 ; 3E060D06 v_rsq_clamp_f32_e32 v3, v3 ; 7E065903 v_mac_f32_e32 v14, s48, v9 ; 3E1C1230 v_mac_f32_e32 v15, s49, v10 ; 3E1E1431 exp 15, 32, 0, 0, 0, v0, v13, v14, v15 ; F800020F 0F0E0D00 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, v3, v20 ; 10002903 v_mul_f32_e32 v4, v3, v21 ; 10082B03 v_mul_f32_e32 v3, v3, v6 ; 10060D03 v_mul_f32_e32 v5, v3, v4 ; 100A0903 v_mul_f32_e32 v6, s14, v5 ; 100C0A0E v_mul_f32_e32 v9, s16, v5 ; 10120A10 v_mul_f32_e32 v5, s17, v5 ; 100A0A11 v_mul_f32_e32 v10, v4, v0 ; 10140104 v_mac_f32_e32 v6, s6, v10 ; 3E0C1406 v_mac_f32_e32 v9, s12, v10 ; 3E12140C v_mac_f32_e32 v5, s15, v10 ; 3E0A140F v_mul_f32_e32 v10, v3, v3 ; 10140703 v_mac_f32_e32 v6, s4, v10 ; 3E0C1404 v_mac_f32_e32 v9, s7, v10 ; 3E121407 v_mac_f32_e32 v5, s13, v10 ; 3E0A140D v_mul_f32_e32 v10, v0, v3 ; 10140700 v_mac_f32_e32 v6, s3, v10 ; 3E0C1403 v_mac_f32_e32 v9, s5, v10 ; 3E121405 v_mac_f32_e32 v5, s8, v10 ; 3E0A1408 v_mul_f32_e32 v10, v4, v4 ; 10140904 v_mad_f32 v10, v0, v0, -v10 ; D282000A 842A0100 v_mac_f32_e32 v6, s9, v10 ; 3E0C1409 v_mac_f32_e32 v9, s10, v10 ; 3E12140A v_mac_f32_e32 v5, s11, v10 ; 3E0A140B v_subrev_f32_e32 v10, s0, v16 ; 0A142000 v_subrev_f32_e32 v12, s1, v17 ; 0A182201 v_subrev_f32_e32 v13, s2, v18 ; 0A1A2402 exp 15, 33, 0, 0, 0, v0, v4, v3, v1 ; F800021F 01030400 exp 15, 34, 0, 0, 0, v6, v9, v5, v1 ; F800022F 01050906 exp 15, 35, 0, 0, 0, v11, v10, v12, v13 ; F800023F 0D0C0A0B exp 15, 36, 0, 0, 0, v16, v17, v18, v19 ; F800024F 13121110 exp 15, 12, 0, 1, 0, v7, v8, v11, v2 ; F80008CF 020B0807 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 72 VGPRS: 24 Code Size: 748 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL IN[4], GENERIC[4], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SVIEW[0], CUBE, FLOAT DCL SVIEW[1], CUBE, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL SVIEW[4], 2D, FLOAT DCL CONST[0..5] DCL CONST[8..20] DCL CONST[23..24] DCL CONST[26] DCL TEMP[0..17], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 2.0000, 0.5000} IMM[1] FLT32 { 0.7500, 7.0000, 1.0000, 10.0000} IMM[2] FLT32 { 0.9680, 0.0300, 0.0001, -1.0000} 0: DP3 TEMP[0].x, IN[1].xyzz, IN[1].xyzz 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MUL TEMP[0].xyz, IN[1].xyzz, TEMP[0].xxxx 3: DP3 TEMP[1].x, IN[3].yzww, IN[3].yzww 4: RSQ TEMP[1].x, TEMP[1].xxxx 5: MUL TEMP[1].xyz, IN[3].yzww, TEMP[1].xxxx 6: MOV TEMP[2].xy, IN[0].xyyy 7: TEX TEMP[2].x, TEMP[2], SAMP[3], 2D 8: MOV TEMP[3].xyz, IMM[0].xxxx 9: FSLT TEMP[2].x, IMM[0].yyyy, TEMP[2].xxxx 10: UIF TEMP[2].xxxx :0 11: MUL TEMP[2].xyz, CONST[20].xyzz, CONST[19].xyzz 12: MOV TEMP[4].xy, IN[0].xyyy 13: TEX TEMP[4].xyz, TEMP[4], SAMP[2], 2D 14: MUL TEMP[3].xyz, TEMP[2].xyzz, TEMP[4].xyzz 15: ELSE :0 16: MOV TEMP[2].xy, IN[0].xyyy 17: TEX TEMP[2].xyz, TEMP[2], SAMP[2], 2D 18: MUL TEMP[3].xyz, CONST[19].xyzz, TEMP[2].xyzz 19: ENDIF 20: LRP TEMP[2].xyz, CONST[23].xxxx, TEMP[3].xyzz, CONST[16].xyzz 21: MUL TEMP[4].x, CONST[23].xxxx, CONST[16].wwww 22: ADD TEMP[4].x, CONST[16].wwww, -TEMP[4].xxxx 23: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx 24: MOV TEMP[5].xy, IN[0].xyyy 25: TEX TEMP[5].y, TEMP[5], SAMP[4], 2D 26: ADD TEMP[6].x, IMM[0].xxxx, -CONST[26].xxxx 27: MAD TEMP[5].x, TEMP[5].yyyy, CONST[26].xxxx, TEMP[6].xxxx 28: DP3 TEMP[6].x, TEMP[0].xyzz, CONST[0].xyzz 29: MAX TEMP[6].x, IMM[0].yyyy, TEMP[6].xxxx 30: MOV TEMP[7].xyz, IMM[0].yyyy 31: MOV TEMP[8].w, IMM[0].xxxx 32: MOV TEMP[8].xyz, TEMP[0].xyzx 33: DP4 TEMP[9].x, CONST[1], TEMP[8] 34: DP4 TEMP[10].x, CONST[2], TEMP[8] 35: MOV TEMP[9].y, TEMP[10].xxxx 36: DP4 TEMP[8].x, CONST[3], TEMP[8] 37: MOV TEMP[9].z, TEMP[8].xxxx 38: ADD TEMP[8].xyz, IN[2].xyzz, TEMP[9].xyzz 39: MUL TEMP[8].xyz, TEMP[8].xyzz, TEMP[5].xxxx 40: DP3 TEMP[9].x, TEMP[0].xyzz, TEMP[1].xyzz 41: MUL TEMP[9].xyz, TEMP[9].xxxx, TEMP[0].xyzz 42: MUL TEMP[9].xyz, IMM[0].zzzz, TEMP[9].xyzz 43: ADD TEMP[9].xyz, TEMP[1].xyzz, -TEMP[9].xyzz 44: MOV TEMP[10].xyz, TEMP[9].xyzx 45: FSLT TEMP[11].x, IMM[0].yyyy, CONST[10].wwww 46: UIF TEMP[11].xxxx :0 47: DP3 TEMP[11].x, TEMP[9].xyzz, TEMP[9].xyzz 48: RSQ TEMP[11].x, TEMP[11].xxxx 49: MUL TEMP[11].xyz, TEMP[9].xyzz, TEMP[11].xxxx 50: MOV TEMP[12].xyz, -IN[4].xyzx 51: ADD TEMP[13].xyz, CONST[8].xyzz, TEMP[12].xyzz 52: RCP TEMP[14].x, TEMP[11].xxxx 53: RCP TEMP[14].y, TEMP[11].yyyy 54: RCP TEMP[14].z, TEMP[11].zzzz 55: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[14].xyzz 56: ADD TEMP[12].xyz, CONST[9].xyzz, TEMP[12].xyzz 57: RCP TEMP[14].x, TEMP[11].xxxx 58: RCP TEMP[14].y, TEMP[11].yyyy 59: RCP TEMP[14].z, TEMP[11].zzzz 60: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[14].xyzz 61: FSLT TEMP[14].xyz, IMM[0].yyyy, TEMP[11].xyzz 62: UIF TEMP[14].xxxx :0 63: MOV TEMP[15].x, TEMP[13].xxxx 64: ELSE :0 65: MOV TEMP[15].x, TEMP[12].xxxx 66: ENDIF 67: UIF TEMP[14].yyyy :0 68: MOV TEMP[16].x, TEMP[13].yyyy 69: ELSE :0 70: MOV TEMP[16].x, TEMP[12].yyyy 71: ENDIF 72: UIF TEMP[14].zzzz :0 73: MOV TEMP[13].x, TEMP[13].zzzz 74: ELSE :0 75: MOV TEMP[13].x, TEMP[12].zzzz 76: ENDIF 77: ADD TEMP[12].xyz, CONST[8].xyzz, CONST[9].xyzz 78: MUL TEMP[12].xyz, TEMP[12].xyzz, IMM[0].wwww 79: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[16].xxxx 80: MIN TEMP[13].x, TEMP[14].xxxx, TEMP[13].xxxx 81: ADD TEMP[14].xyz, TEMP[12].xyzz, -CONST[10].xyzz 82: ADD TEMP[14].xyz, TEMP[14].xyzz, IN[4].xyzz 83: MAD TEMP[11].xyz, TEMP[11].xyzz, TEMP[13].xxxx, TEMP[14].xyzz 84: ADD TEMP[10].xyz, TEMP[11].xyzz, -TEMP[12].xyzz 85: ENDIF 86: ADD TEMP[11].x, IMM[0].xxxx, -CONST[24].xxxx 87: POW TEMP[11].x, TEMP[11].xxxx, IMM[1].xxxx 88: MUL TEMP[11].x, TEMP[11].xxxx, IMM[1].yyyy 89: MOV TEMP[10].xyz, TEMP[10].xyzz 90: MOV TEMP[10].w, TEMP[11].xxxx 91: TXL TEMP[10], TEMP[10], SAMP[0], CUBE 92: POW TEMP[11].x, TEMP[10].wwww, CONST[11].yyyy 93: MUL TEMP[11].x, CONST[11].xxxx, TEMP[11].xxxx 94: MUL TEMP[10].xyz, TEMP[11].xxxx, TEMP[10].xyzz 95: FSLT TEMP[11].x, CONST[9].wwww, IMM[1].zzzz 96: UIF TEMP[11].xxxx :0 97: MOV TEMP[11].xyz, TEMP[9].xyzx 98: FSLT TEMP[12].x, IMM[0].yyyy, CONST[14].wwww 99: UIF TEMP[12].xxxx :0 100: DP3 TEMP[12].x, TEMP[9].xyzz, TEMP[9].xyzz 101: RSQ TEMP[12].x, TEMP[12].xxxx 102: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[12].xxxx 103: MOV TEMP[12].xyz, -IN[4].xyzx 104: ADD TEMP[13].xyz, CONST[12].xyzz, TEMP[12].xyzz 105: RCP TEMP[14].x, TEMP[9].xxxx 106: RCP TEMP[14].y, TEMP[9].yyyy 107: RCP TEMP[14].z, TEMP[9].zzzz 108: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[14].xyzz 109: ADD TEMP[12].xyz, CONST[13].xyzz, TEMP[12].xyzz 110: RCP TEMP[14].x, TEMP[9].xxxx 111: RCP TEMP[14].y, TEMP[9].yyyy 112: RCP TEMP[14].z, TEMP[9].zzzz 113: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[14].xyzz 114: FSLT TEMP[14].xyz, IMM[0].yyyy, TEMP[9].xyzz 115: UIF TEMP[14].xxxx :0 116: MOV TEMP[15].x, TEMP[13].xxxx 117: ELSE :0 118: MOV TEMP[15].x, TEMP[12].xxxx 119: ENDIF 120: UIF TEMP[14].yyyy :0 121: MOV TEMP[16].x, TEMP[13].yyyy 122: ELSE :0 123: MOV TEMP[16].x, TEMP[12].yyyy 124: ENDIF 125: UIF TEMP[14].zzzz :0 126: MOV TEMP[13].x, TEMP[13].zzzz 127: ELSE :0 128: MOV TEMP[13].x, TEMP[12].zzzz 129: ENDIF 130: ADD TEMP[12].xyz, CONST[12].xyzz, CONST[13].xyzz 131: MUL TEMP[12].xyz, TEMP[12].xyzz, IMM[0].wwww 132: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[16].xxxx 133: MIN TEMP[13].x, TEMP[14].xxxx, TEMP[13].xxxx 134: ADD TEMP[14].xyz, TEMP[12].xyzz, -CONST[14].xyzz 135: ADD TEMP[14].xyz, TEMP[14].xyzz, IN[4].xyzz 136: MAD TEMP[9].xyz, TEMP[9].xyzz, TEMP[13].xxxx, TEMP[14].xyzz 137: ADD TEMP[11].xyz, TEMP[9].xyzz, -TEMP[12].xyzz 138: ENDIF 139: ADD TEMP[9].x, IMM[0].xxxx, -CONST[24].xxxx 140: POW TEMP[9].x, TEMP[9].xxxx, IMM[1].xxxx 141: MUL TEMP[9].x, TEMP[9].xxxx, IMM[1].yyyy 142: MOV TEMP[11].xyz, TEMP[11].xyzz 143: MOV TEMP[11].w, TEMP[9].xxxx 144: TXL TEMP[9], TEMP[11], SAMP[1], CUBE 145: POW TEMP[11].x, TEMP[9].wwww, CONST[15].yyyy 146: MUL TEMP[11].x, CONST[15].xxxx, TEMP[11].xxxx 147: MUL TEMP[9].xyz, TEMP[11].xxxx, TEMP[9].xyzz 148: LRP TEMP[7].xyz, CONST[9].wwww, TEMP[10].xyzz, TEMP[9].xyzz 149: ELSE :0 150: MOV TEMP[7].xyz, TEMP[10].xyzx 151: ENDIF 152: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[5].xxxx 153: MOV TEMP[1].xyz, -TEMP[1].xyzx 154: ADD TEMP[5].x, IMM[0].xxxx, -CONST[24].xxxx 155: ADD TEMP[9].xyz, CONST[0].xyzz, TEMP[1].xyzz 156: DP3 TEMP[10].x, TEMP[9].xyzz, TEMP[9].xyzz 157: RSQ TEMP[10].x, TEMP[10].xxxx 158: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[10].xxxx 159: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[1].xyzz 160: MAX TEMP[1].x, IMM[0].yyyy, TEMP[1].xxxx 161: DP3 TEMP[10].x, CONST[0].xyzz, TEMP[9].xyzz 162: MAX TEMP[10].x, IMM[0].yyyy, TEMP[10].xxxx 163: MUL TEMP[11].x, TEMP[5].xxxx, TEMP[5].xxxx 164: MUL TEMP[11].x, TEMP[11].xxxx, CONST[18].wwww 165: ADD TEMP[12].x, IMM[0].xxxx, -TEMP[5].xxxx 166: MAD TEMP[12].x, TEMP[12].xxxx, IMM[2].xxxx, IMM[2].yyyy 167: LG2 TEMP[12].x, TEMP[12].xxxx 168: RCP TEMP[12].x, TEMP[12].xxxx 169: MUL TEMP[12].x, IMM[1].wwww, TEMP[12].xxxx 170: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[12].xxxx 171: ADD TEMP[13].x, IMM[0].xxxx, -TEMP[6].xxxx 172: ADD TEMP[14].x, IMM[0].xxxx, -TEMP[1].xxxx 173: MUL TEMP[15].x, IMM[0].zzzz, TEMP[10].xxxx 174: MUL TEMP[5].x, TEMP[10].xxxx, TEMP[5].xxxx 175: MAD TEMP[5].x, TEMP[15].xxxx, TEMP[5].xxxx, IMM[0].wwww 176: ADD TEMP[10].x, IMM[0].xxxx, -TEMP[10].xxxx 177: ADD TEMP[15].x, IMM[0].xxxx, -TEMP[1].xxxx 178: ADD TEMP[4].x, IMM[0].xxxx, -TEMP[4].xxxx 179: ADD TEMP[4].x, CONST[24].xxxx, TEMP[4].xxxx 180: MOV_SAT TEMP[4].x, TEMP[4].xxxx 181: MUL TEMP[16].x, TEMP[15].xxxx, TEMP[15].xxxx 182: MUL TEMP[17].x, TEMP[15].xxxx, TEMP[15].xxxx 183: MUL TEMP[15].x, TEMP[17].xxxx, TEMP[15].xxxx 184: MUL TEMP[15].x, TEMP[16].xxxx, TEMP[15].xxxx 185: LRP TEMP[4].xyz, TEMP[15].xxxx, TEMP[4].xxxx, TEMP[2].xyzz 186: LRP TEMP[15].x, TEMP[6].xxxx, IMM[0].xxxx, TEMP[11].xxxx 187: LRP TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx, TEMP[11].xxxx 188: MAD TEMP[1].x, TEMP[15].xxxx, TEMP[1].xxxx, IMM[2].zzzz 189: RCP TEMP[1].x, TEMP[1].xxxx 190: DP3 TEMP[9].x, TEMP[0].xyzz, TEMP[9].xyzz 191: MAX TEMP[9].x, IMM[0].yyyy, TEMP[9].xxxx 192: POW TEMP[9].x, TEMP[9].xxxx, TEMP[12].xxxx 193: ADD TEMP[11].x, TEMP[12].xxxx, IMM[0].xxxx 194: MUL TEMP[11].x, TEMP[11].xxxx, CONST[18].yyyy 195: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[11].xxxx 196: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[9].xxxx 197: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[6].xxxx 198: MUL TEMP[1].x, TEMP[1].xxxx, CONST[18].xxxx 199: MAX TEMP[1].x, IMM[0].yyyy, TEMP[1].xxxx 200: MUL TEMP[1].xyz, TEMP[1].xxxx, CONST[17].xyzz 201: ADD TEMP[9].xyz, IMM[0].xxxx, -TEMP[2].xyzz 202: MUL TEMP[11].x, TEMP[10].xxxx, TEMP[10].xxxx 203: MUL TEMP[12].x, TEMP[10].xxxx, TEMP[10].xxxx 204: MUL TEMP[10].x, TEMP[12].xxxx, TEMP[10].xxxx 205: MUL TEMP[10].x, TEMP[11].xxxx, TEMP[10].xxxx 206: MAD TEMP[2].xyz, TEMP[9].xyzz, TEMP[10].xxxx, TEMP[2].xyzz 207: ADD TEMP[9].x, TEMP[5].xxxx, IMM[2].wwww 208: MUL TEMP[10].x, TEMP[13].xxxx, TEMP[13].xxxx 209: MUL TEMP[11].x, TEMP[13].xxxx, TEMP[13].xxxx 210: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[13].xxxx 211: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[11].xxxx 212: MAD TEMP[9].x, TEMP[9].xxxx, TEMP[10].xxxx, IMM[0].xxxx 213: ADD TEMP[5].x, TEMP[5].xxxx, IMM[2].wwww 214: MUL TEMP[10].x, TEMP[14].xxxx, TEMP[14].xxxx 215: MUL TEMP[11].x, TEMP[14].xxxx, TEMP[14].xxxx 216: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[14].xxxx 217: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[11].xxxx 218: MAD TEMP[5].x, TEMP[5].xxxx, TEMP[10].xxxx, IMM[0].xxxx 219: MUL TEMP[5].x, TEMP[9].xxxx, TEMP[5].xxxx 220: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[6].xxxx 221: MAD TEMP[5].xyz, CONST[17].xyzz, TEMP[5].xxxx, TEMP[8].xyzz 222: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[5].xyzz 223: MAD TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xyzz, TEMP[3].xyzz 224: MAD TEMP[0].xyz, TEMP[7].xyzz, TEMP[4].xyzz, TEMP[1].xyzz 225: MOV TEMP[0].xyz, TEMP[0].xyzx 226: MAD TEMP[1].x, IN[3].xxxx, CONST[5].zzzz, CONST[5].wwww 227: MOV_SAT TEMP[1].x, TEMP[1].xxxx 228: LRP TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[4].xyzz 229: MOV TEMP[0].xyz, TEMP[0].xyzx 230: MOV TEMP[0].w, IMM[0].xxxx 231: MOV OUT[0], TEMP[0] 232: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 172) %55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200) %60 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208) %61 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212) %62 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216) %63 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224) %64 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228) %65 = call float @llvm.SI.load.const(<16 x i8> %23, i32 232) %66 = call float @llvm.SI.load.const(<16 x i8> %23, i32 236) %67 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240) %68 = call float @llvm.SI.load.const(<16 x i8> %23, i32 244) %69 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256) %70 = call float @llvm.SI.load.const(<16 x i8> %23, i32 260) %71 = call float @llvm.SI.load.const(<16 x i8> %23, i32 264) %72 = call float @llvm.SI.load.const(<16 x i8> %23, i32 268) %73 = call float @llvm.SI.load.const(<16 x i8> %23, i32 272) %74 = call float @llvm.SI.load.const(<16 x i8> %23, i32 276) %75 = call float @llvm.SI.load.const(<16 x i8> %23, i32 280) %76 = call float @llvm.SI.load.const(<16 x i8> %23, i32 288) %77 = call float @llvm.SI.load.const(<16 x i8> %23, i32 292) %78 = call float @llvm.SI.load.const(<16 x i8> %23, i32 300) %79 = call float @llvm.SI.load.const(<16 x i8> %23, i32 304) %80 = call float @llvm.SI.load.const(<16 x i8> %23, i32 308) %81 = call float @llvm.SI.load.const(<16 x i8> %23, i32 312) %82 = call float @llvm.SI.load.const(<16 x i8> %23, i32 368) %83 = call float @llvm.SI.load.const(<16 x i8> %23, i32 384) %84 = call float @llvm.SI.load.const(<16 x i8> %23, i32 416) %85 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %86 = load <32 x i8>, <32 x i8> addrspace(2)* %85, align 32, !tbaa !0 %87 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %88 = load <16 x i8>, <16 x i8> addrspace(2)* %87, align 16, !tbaa !0 %89 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %90 = bitcast <8 x i32> addrspace(2)* %89 to <32 x i8> addrspace(2)* %91 = load <32 x i8>, <32 x i8> addrspace(2)* %90, align 32, !tbaa !0 %92 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %93 = bitcast <4 x i32> addrspace(2)* %92 to <16 x i8> addrspace(2)* %94 = load <16 x i8>, <16 x i8> addrspace(2)* %93, align 16, !tbaa !0 %95 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %96 = load <8 x i32>, <8 x i32> addrspace(2)* %95, align 32, !tbaa !0 %97 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %98 = load <4 x i32>, <4 x i32> addrspace(2)* %97, align 16, !tbaa !0 %99 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %100 = bitcast <8 x i32> addrspace(2)* %99 to <32 x i8> addrspace(2)* %101 = load <32 x i8>, <32 x i8> addrspace(2)* %100, align 32, !tbaa !0 %102 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %103 = bitcast <4 x i32> addrspace(2)* %102 to <16 x i8> addrspace(2)* %104 = load <16 x i8>, <16 x i8> addrspace(2)* %103, align 16, !tbaa !0 %105 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %106 = bitcast <8 x i32> addrspace(2)* %105 to <32 x i8> addrspace(2)* %107 = load <32 x i8>, <32 x i8> addrspace(2)* %106, align 32, !tbaa !0 %108 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %109 = bitcast <4 x i32> addrspace(2)* %108 to <16 x i8> addrspace(2)* %110 = load <16 x i8>, <16 x i8> addrspace(2)* %109, align 16, !tbaa !0 %111 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %112 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %113 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %114 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %115 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %116 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %117 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %118 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %119 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %120 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %121 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %122 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7) %123 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %124 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %125 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %126 = fmul float %113, %113 %127 = fmul float %114, %114 %128 = fadd float %127, %126 %129 = fmul float %115, %115 %130 = fadd float %128, %129 %131 = call float @llvm.AMDGPU.rsq.clamped.f32(float %130) %132 = fmul float %113, %131 %133 = fmul float %114, %131 %134 = fmul float %115, %131 %135 = fmul float %120, %120 %136 = fmul float %121, %121 %137 = fadd float %136, %135 %138 = fmul float %122, %122 %139 = fadd float %137, %138 %140 = call float @llvm.AMDGPU.rsq.clamped.f32(float %139) %141 = fmul float %120, %140 %142 = fmul float %121, %140 %143 = fmul float %122, %140 %144 = bitcast float %111 to i32 %145 = bitcast float %112 to i32 %146 = insertelement <2 x i32> undef, i32 %144, i32 0 %147 = insertelement <2 x i32> %146, i32 %145, i32 1 %148 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %147, <32 x i8> %101, <16 x i8> %104, i32 2) %149 = extractelement <4 x float> %148, i32 0 %150 = fcmp ogt float %149, 0.000000e+00 br i1 %150, label %IF, label %ELSE IF: ; preds = %main_body %151 = call float @llvm.SI.load.const(<16 x i8> %23, i32 328) %152 = call float @llvm.SI.load.const(<16 x i8> %23, i32 324) %153 = call float @llvm.SI.load.const(<16 x i8> %23, i32 320) %154 = fmul float %153, %79 %155 = fmul float %152, %80 %156 = fmul float %151, %81 %157 = bitcast float %111 to i32 %158 = bitcast float %112 to i32 %159 = insertelement <2 x i32> undef, i32 %157, i32 0 %160 = insertelement <2 x i32> %159, i32 %158, i32 1 %161 = bitcast <8 x i32> %96 to <32 x i8> %162 = bitcast <4 x i32> %98 to <16 x i8> %163 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %160, <32 x i8> %161, <16 x i8> %162, i32 2) %164 = extractelement <4 x float> %163, i32 0 %165 = extractelement <4 x float> %163, i32 1 %166 = extractelement <4 x float> %163, i32 2 %167 = fmul float %154, %164 %168 = fmul float %155, %165 %169 = fmul float %156, %166 br label %ENDIF ELSE: ; preds = %main_body %170 = bitcast float %111 to i32 %171 = bitcast float %112 to i32 %172 = insertelement <2 x i32> undef, i32 %170, i32 0 %173 = insertelement <2 x i32> %172, i32 %171, i32 1 %174 = bitcast <8 x i32> %96 to <32 x i8> %175 = bitcast <4 x i32> %98 to <16 x i8> %176 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %173, <32 x i8> %174, <16 x i8> %175, i32 2) %177 = extractelement <4 x float> %176, i32 0 %178 = extractelement <4 x float> %176, i32 1 %179 = extractelement <4 x float> %176, i32 2 %180 = fmul float %79, %177 %181 = fmul float %80, %178 %182 = fmul float %81, %179 br label %ENDIF ENDIF: ; preds = %ELSE, %IF %temp14.0 = phi float [ %169, %IF ], [ %182, %ELSE ] %temp13.0 = phi float [ %168, %IF ], [ %181, %ELSE ] %temp12.0 = phi float [ %167, %IF ], [ %180, %ELSE ] %183 = call float @llvm.AMDGPU.lrp(float %82, float %temp12.0, float %69) %184 = call float @llvm.AMDGPU.lrp(float %82, float %temp13.0, float %70) %185 = call float @llvm.AMDGPU.lrp(float %82, float %temp14.0, float %71) %186 = fmul float %82, %72 %187 = fsub float %72, %186 %188 = fmul float %temp12.0, %187 %189 = fmul float %temp13.0, %187 %190 = fmul float %temp14.0, %187 %191 = bitcast float %111 to i32 %192 = bitcast float %112 to i32 %193 = insertelement <2 x i32> undef, i32 %191, i32 0 %194 = insertelement <2 x i32> %193, i32 %192, i32 1 %195 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %194, <32 x i8> %107, <16 x i8> %110, i32 2) %196 = extractelement <4 x float> %195, i32 1 %197 = fsub float 1.000000e+00, %84 %198 = fmul float %196, %84 %199 = fadd float %198, %197 %200 = fmul float %132, %24 %201 = fmul float %133, %25 %202 = fadd float %201, %200 %203 = fmul float %134, %26 %204 = fadd float %202, %203 %205 = call float @llvm.maxnum.f32(float %204, float 0.000000e+00) %206 = fmul float %27, %132 %207 = fmul float %28, %133 %208 = fadd float %206, %207 %209 = fmul float %29, %134 %210 = fadd float %208, %209 %211 = fadd float %210, %30 %212 = fmul float %31, %132 %213 = fmul float %32, %133 %214 = fadd float %212, %213 %215 = fmul float %33, %134 %216 = fadd float %214, %215 %217 = fadd float %216, %34 %218 = fmul float %35, %132 %219 = fmul float %36, %133 %220 = fadd float %218, %219 %221 = fmul float %37, %134 %222 = fadd float %220, %221 %223 = fadd float %222, %38 %224 = fadd float %116, %211 %225 = fadd float %117, %217 %226 = fadd float %118, %223 %227 = fmul float %224, %199 %228 = fmul float %225, %199 %229 = fmul float %226, %199 %230 = fmul float %132, %141 %231 = fmul float %133, %142 %232 = fadd float %231, %230 %233 = fmul float %134, %143 %234 = fadd float %232, %233 %235 = fmul float %234, %132 %236 = fmul float %234, %133 %237 = fmul float %234, %134 %238 = fmul float %235, 2.000000e+00 %239 = fmul float %236, 2.000000e+00 %240 = fmul float %237, 2.000000e+00 %241 = fsub float %141, %238 %242 = fsub float %142, %239 %243 = fsub float %143, %240 %244 = fcmp ogt float %54, 0.000000e+00 br i1 %244, label %IF73, label %ENDIF72 IF73: ; preds = %ENDIF %245 = fmul float %241, %241 %246 = fmul float %242, %242 %247 = fadd float %246, %245 %248 = fmul float %243, %243 %249 = fadd float %247, %248 %250 = call float @llvm.AMDGPU.rsq.clamped.f32(float %249) %251 = fmul float %241, %250 %252 = fmul float %242, %250 %253 = fmul float %243, %250 %254 = fsub float %44, %123 %255 = fsub float %45, %124 %256 = fsub float %46, %125 %257 = fdiv float 1.000000e+00, %251 %258 = fdiv float 1.000000e+00, %252 %259 = fdiv float 1.000000e+00, %253 %260 = fmul float %254, %257 %261 = fmul float %255, %258 %262 = fmul float %256, %259 %263 = fsub float %47, %123 %264 = fsub float %48, %124 %265 = fsub float %49, %125 %266 = fdiv float 1.000000e+00, %251 %267 = fdiv float 1.000000e+00, %252 %268 = fdiv float 1.000000e+00, %253 %269 = fmul float %263, %266 %270 = fmul float %264, %267 %271 = fmul float %265, %268 %272 = fcmp ogt float %251, 0.000000e+00 %273 = fcmp ogt float %252, 0.000000e+00 %274 = fcmp ogt float %253, 0.000000e+00 %. = select i1 %272, float %260, float %269 %temp64.0 = select i1 %273, float %261, float %270 %.99 = select i1 %274, float %262, float %271 %275 = fadd float %44, %47 %276 = fadd float %45, %48 %277 = fadd float %46, %49 %278 = fmul float %275, 5.000000e-01 %279 = fmul float %276, 5.000000e-01 %280 = fmul float %277, 5.000000e-01 %281 = call float @llvm.minnum.f32(float %., float %temp64.0) %282 = call float @llvm.minnum.f32(float %281, float %.99) %283 = fsub float %278, %51 %284 = fsub float %279, %52 %285 = fsub float %280, %53 %286 = fadd float %283, %123 %287 = fadd float %284, %124 %288 = fadd float %285, %125 %289 = fmul float %251, %282 %290 = fadd float %289, %286 %291 = fmul float %252, %282 %292 = fadd float %291, %287 %293 = fmul float %253, %282 %294 = fadd float %293, %288 %295 = fsub float %290, %278 %296 = fsub float %292, %279 %297 = fsub float %294, %280 br label %ENDIF72 ENDIF72: ; preds = %ENDIF, %IF73 %temp40.0 = phi float [ %295, %IF73 ], [ %241, %ENDIF ] %temp41.0 = phi float [ %296, %IF73 ], [ %242, %ENDIF ] %temp42.0 = phi float [ %297, %IF73 ], [ %243, %ENDIF ] %298 = fsub float 1.000000e+00, %83 %299 = call float @llvm.pow.f32(float %298, float 7.500000e-01) %300 = fmul float %299, 7.000000e+00 %301 = insertelement <4 x float> undef, float %temp40.0, i32 0 %302 = insertelement <4 x float> %301, float %temp41.0, i32 1 %303 = insertelement <4 x float> %302, float %temp42.0, i32 2 %304 = insertelement <4 x float> %303, float %300, i32 3 %305 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %304) %306 = extractelement <4 x float> %305, i32 0 %307 = extractelement <4 x float> %305, i32 1 %308 = extractelement <4 x float> %305, i32 2 %309 = extractelement <4 x float> %305, i32 3 %310 = call float @llvm.fabs.f32(float %308) %311 = fdiv float 1.000000e+00, %310 %312 = fmul float %306, %311 %313 = fadd float %312, 1.500000e+00 %314 = fmul float %307, %311 %315 = fadd float %314, 1.500000e+00 %316 = bitcast float %315 to i32 %317 = bitcast float %313 to i32 %318 = bitcast float %309 to i32 %319 = bitcast float %300 to i32 %320 = insertelement <4 x i32> undef, i32 %316, i32 0 %321 = insertelement <4 x i32> %320, i32 %317, i32 1 %322 = insertelement <4 x i32> %321, i32 %318, i32 2 %323 = insertelement <4 x i32> %322, i32 %319, i32 3 %324 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %323, <32 x i8> %86, <16 x i8> %88, i32 4) %325 = extractelement <4 x float> %324, i32 0 %326 = extractelement <4 x float> %324, i32 1 %327 = extractelement <4 x float> %324, i32 2 %328 = extractelement <4 x float> %324, i32 3 %329 = call float @llvm.pow.f32(float %328, float %56) %330 = fmul float %55, %329 %331 = fmul float %330, %325 %332 = fmul float %330, %326 %333 = fmul float %330, %327 %334 = fcmp olt float %50, 0x3FEFFFEB00000000 br i1 %334, label %IF85, label %ENDIF84 IF85: ; preds = %ENDIF72 %335 = fcmp ogt float %66, 0.000000e+00 br i1 %335, label %IF88, label %ENDIF87 ENDIF84: ; preds = %ENDIF72, %ENDIF87 %temp28.0 = phi float [ %563, %ENDIF87 ], [ %331, %ENDIF72 ] %temp29.0 = phi float [ %564, %ENDIF87 ], [ %332, %ENDIF72 ] %temp30.0 = phi float [ %565, %ENDIF87 ], [ %333, %ENDIF72 ] %336 = fmul float %temp28.0, %199 %337 = fmul float %temp29.0, %199 %338 = fmul float %temp30.0, %199 %339 = fsub float 1.000000e+00, %83 %340 = fsub float %24, %141 %341 = fsub float %25, %142 %342 = fsub float %26, %143 %343 = fmul float %340, %340 %344 = fmul float %341, %341 %345 = fadd float %344, %343 %346 = fmul float %342, %342 %347 = fadd float %345, %346 %348 = call float @llvm.AMDGPU.rsq.clamped.f32(float %347) %349 = fmul float %340, %348 %350 = fmul float %341, %348 %351 = fmul float %342, %348 %352 = fmul float %141, %132 %353 = fsub float -0.000000e+00, %352 %354 = fmul float %142, %133 %355 = fsub float %353, %354 %356 = fmul float %143, %134 %357 = fsub float %355, %356 %358 = call float @llvm.maxnum.f32(float %357, float 0.000000e+00) %359 = fmul float %24, %349 %360 = fmul float %25, %350 %361 = fadd float %360, %359 %362 = fmul float %26, %351 %363 = fadd float %361, %362 %364 = call float @llvm.maxnum.f32(float %363, float 0.000000e+00) %365 = fmul float %339, %339 %366 = fmul float %365, %78 %367 = fsub float 1.000000e+00, %339 %368 = fmul float %367, 0x3FEEF9DB20000000 %369 = fadd float %368, 0x3F9EB851E0000000 %370 = call float @llvm.log2.f32(float %369) %371 = fdiv float 1.000000e+00, %370 %372 = fmul float %371, 1.000000e+01 %373 = fmul float %372, %372 %374 = fsub float 1.000000e+00, %205 %375 = fsub float 1.000000e+00, %358 %376 = fmul float %364, 2.000000e+00 %377 = fmul float %364, %339 %378 = fmul float %376, %377 %379 = fadd float %378, 5.000000e-01 %380 = fsub float 1.000000e+00, %364 %381 = fsub float 1.000000e+00, %358 %382 = fsub float 1.000000e+00, %187 %383 = fadd float %83, %382 %384 = call float @llvm.AMDIL.clamp.(float %383, float 0.000000e+00, float 1.000000e+00) %385 = fmul float %381, %381 %386 = fmul float %381, %381 %387 = fmul float %386, %381 %388 = fmul float %385, %387 %389 = call float @llvm.AMDGPU.lrp(float %388, float %384, float %183) %390 = call float @llvm.AMDGPU.lrp(float %388, float %384, float %184) %391 = call float @llvm.AMDGPU.lrp(float %388, float %384, float %185) %392 = call float @llvm.AMDGPU.lrp(float %205, float 1.000000e+00, float %366) %393 = call float @llvm.AMDGPU.lrp(float %358, float 1.000000e+00, float %366) %394 = fmul float %392, %393 %395 = fadd float %394, 0x3F1A36E2E0000000 %396 = fdiv float 1.000000e+00, %395 %397 = fmul float %132, %349 %398 = fmul float %133, %350 %399 = fadd float %398, %397 %400 = fmul float %134, %351 %401 = fadd float %399, %400 %402 = call float @llvm.maxnum.f32(float %401, float 0.000000e+00) %403 = call float @llvm.pow.f32(float %402, float %373) %404 = fadd float %373, 1.000000e+00 %405 = fmul float %404, %77 %406 = fmul float %403, %405 %407 = fmul float %396, %406 %408 = fmul float %407, %205 %409 = fmul float %408, %76 %410 = call float @llvm.maxnum.f32(float %409, float 0.000000e+00) %411 = fmul float %410, %73 %412 = fmul float %410, %74 %413 = fmul float %410, %75 %414 = fsub float 1.000000e+00, %183 %415 = fsub float 1.000000e+00, %184 %416 = fsub float 1.000000e+00, %185 %417 = fmul float %380, %380 %418 = fmul float %380, %380 %419 = fmul float %418, %380 %420 = fmul float %417, %419 %421 = fmul float %414, %420 %422 = fadd float %421, %183 %423 = fmul float %415, %420 %424 = fadd float %423, %184 %425 = fmul float %416, %420 %426 = fadd float %425, %185 %427 = fadd float %379, -1.000000e+00 %428 = fmul float %374, %374 %429 = fmul float %374, %374 %430 = fmul float %429, %374 %431 = fmul float %428, %430 %432 = fmul float %427, %431 %433 = fadd float %432, 1.000000e+00 %434 = fadd float %379, -1.000000e+00 %435 = fmul float %375, %375 %436 = fmul float %375, %375 %437 = fmul float %436, %375 %438 = fmul float %435, %437 %439 = fmul float %434, %438 %440 = fadd float %439, 1.000000e+00 %441 = fmul float %433, %440 %442 = fmul float %441, %205 %443 = fmul float %73, %442 %444 = fadd float %443, %227 %445 = fmul float %74, %442 %446 = fadd float %445, %228 %447 = fmul float %75, %442 %448 = fadd float %447, %229 %449 = fmul float %188, %444 %450 = fmul float %189, %446 %451 = fmul float %190, %448 %452 = fmul float %411, %422 %453 = fadd float %452, %449 %454 = fmul float %412, %424 %455 = fadd float %454, %450 %456 = fmul float %413, %426 %457 = fadd float %456, %451 %458 = fmul float %336, %389 %459 = fadd float %458, %453 %460 = fmul float %337, %390 %461 = fadd float %460, %455 %462 = fmul float %338, %391 %463 = fadd float %462, %457 %464 = fmul float %119, %42 %465 = fadd float %464, %43 %466 = call float @llvm.AMDIL.clamp.(float %465, float 0.000000e+00, float 1.000000e+00) %467 = call float @llvm.AMDGPU.lrp(float %466, float %459, float %39) %468 = call float @llvm.AMDGPU.lrp(float %466, float %461, float %40) %469 = call float @llvm.AMDGPU.lrp(float %466, float %463, float %41) %470 = call i32 @llvm.SI.packf16(float %467, float %468) %471 = bitcast i32 %470 to float %472 = call i32 @llvm.SI.packf16(float %469, float 1.000000e+00) %473 = bitcast i32 %472 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %471, float %473, float %471, float %473) ret void IF88: ; preds = %IF85 %474 = fmul float %241, %241 %475 = fmul float %242, %242 %476 = fadd float %475, %474 %477 = fmul float %243, %243 %478 = fadd float %476, %477 %479 = call float @llvm.AMDGPU.rsq.clamped.f32(float %478) %480 = fmul float %241, %479 %481 = fmul float %242, %479 %482 = fmul float %243, %479 %483 = fsub float %57, %123 %484 = fsub float %58, %124 %485 = fsub float %59, %125 %486 = fdiv float 1.000000e+00, %480 %487 = fdiv float 1.000000e+00, %481 %488 = fdiv float 1.000000e+00, %482 %489 = fmul float %483, %486 %490 = fmul float %484, %487 %491 = fmul float %485, %488 %492 = fsub float %60, %123 %493 = fsub float %61, %124 %494 = fsub float %62, %125 %495 = fdiv float 1.000000e+00, %480 %496 = fdiv float 1.000000e+00, %481 %497 = fdiv float 1.000000e+00, %482 %498 = fmul float %492, %495 %499 = fmul float %493, %496 %500 = fmul float %494, %497 %501 = fcmp ogt float %480, 0.000000e+00 %502 = fcmp ogt float %481, 0.000000e+00 %503 = fcmp ogt float %482, 0.000000e+00 %.100 = select i1 %501, float %489, float %498 %temp64.1 = select i1 %502, float %490, float %499 %.101 = select i1 %503, float %491, float %500 %504 = fadd float %57, %60 %505 = fadd float %58, %61 %506 = fadd float %59, %62 %507 = fmul float %504, 5.000000e-01 %508 = fmul float %505, 5.000000e-01 %509 = fmul float %506, 5.000000e-01 %510 = call float @llvm.minnum.f32(float %.100, float %temp64.1) %511 = call float @llvm.minnum.f32(float %510, float %.101) %512 = fsub float %507, %63 %513 = fsub float %508, %64 %514 = fsub float %509, %65 %515 = fadd float %512, %123 %516 = fadd float %513, %124 %517 = fadd float %514, %125 %518 = fmul float %480, %511 %519 = fadd float %518, %515 %520 = fmul float %481, %511 %521 = fadd float %520, %516 %522 = fmul float %482, %511 %523 = fadd float %522, %517 %524 = fsub float %519, %507 %525 = fsub float %521, %508 %526 = fsub float %523, %509 br label %ENDIF87 ENDIF87: ; preds = %IF85, %IF88 %temp44.0 = phi float [ %524, %IF88 ], [ %241, %IF85 ] %temp45.0 = phi float [ %525, %IF88 ], [ %242, %IF85 ] %temp46.0 = phi float [ %526, %IF88 ], [ %243, %IF85 ] %527 = fsub float 1.000000e+00, %83 %528 = call float @llvm.pow.f32(float %527, float 7.500000e-01) %529 = fmul float %528, 7.000000e+00 %530 = insertelement <4 x float> undef, float %temp44.0, i32 0 %531 = insertelement <4 x float> %530, float %temp45.0, i32 1 %532 = insertelement <4 x float> %531, float %temp46.0, i32 2 %533 = insertelement <4 x float> %532, float %529, i32 3 %534 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %533) %535 = extractelement <4 x float> %534, i32 0 %536 = extractelement <4 x float> %534, i32 1 %537 = extractelement <4 x float> %534, i32 2 %538 = extractelement <4 x float> %534, i32 3 %539 = call float @llvm.fabs.f32(float %537) %540 = fdiv float 1.000000e+00, %539 %541 = fmul float %535, %540 %542 = fadd float %541, 1.500000e+00 %543 = fmul float %536, %540 %544 = fadd float %543, 1.500000e+00 %545 = bitcast float %544 to i32 %546 = bitcast float %542 to i32 %547 = bitcast float %538 to i32 %548 = bitcast float %529 to i32 %549 = insertelement <4 x i32> undef, i32 %545, i32 0 %550 = insertelement <4 x i32> %549, i32 %546, i32 1 %551 = insertelement <4 x i32> %550, i32 %547, i32 2 %552 = insertelement <4 x i32> %551, i32 %548, i32 3 %553 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %552, <32 x i8> %91, <16 x i8> %94, i32 4) %554 = extractelement <4 x float> %553, i32 0 %555 = extractelement <4 x float> %553, i32 1 %556 = extractelement <4 x float> %553, i32 2 %557 = extractelement <4 x float> %553, i32 3 %558 = call float @llvm.pow.f32(float %557, float %68) %559 = fmul float %67, %558 %560 = fmul float %559, %554 %561 = fmul float %559, %555 %562 = fmul float %559, %556 %563 = call float @llvm.AMDGPU.lrp(float %50, float %331, float %560) %564 = call float @llvm.AMDGPU.lrp(float %50, float %332, float %561) %565 = call float @llvm.AMDGPU.lrp(float %50, float %333, float %562) br label %ENDIF84 } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_interp_p1_f32 v17, v0, 0, 0, [m0] ; C8440000 v_interp_p2_f32 v17, [v17], v1, 0, 0, [m0] ; C8450001 v_interp_p1_f32 v18, v0, 1, 0, [m0] ; C8480100 v_interp_p2_f32 v18, [v18], v1, 1, 0, [m0] ; C8490101 v_interp_p1_f32 v3, v0, 0, 1, [m0] ; C80C0400 v_interp_p2_f32 v3, [v3], v1, 0, 1, [m0] ; C80D0401 v_interp_p1_f32 v10, v0, 1, 1, [m0] ; C8280500 v_interp_p2_f32 v10, [v10], v1, 1, 1, [m0] ; C8290501 v_interp_p1_f32 v13, v0, 2, 1, [m0] ; C8340600 v_interp_p2_f32 v13, [v13], v1, 2, 1, [m0] ; C8350601 v_interp_p1_f32 v7, v0, 0, 2, [m0] ; C81C0800 v_interp_p2_f32 v7, [v7], v1, 0, 2, [m0] ; C81D0801 v_interp_p1_f32 v8, v0, 1, 2, [m0] ; C8200900 v_interp_p2_f32 v8, [v8], v1, 1, 2, [m0] ; C8210901 v_interp_p1_f32 v9, v0, 2, 2, [m0] ; C8240A00 v_interp_p2_f32 v9, [v9], v1, 2, 2, [m0] ; C8250A01 v_interp_p1_f32 v2, v0, 0, 3, [m0] ; C8080C00 v_interp_p2_f32 v2, [v2], v1, 0, 3, [m0] ; C8090C01 v_interp_p1_f32 v23, v0, 1, 3, [m0] ; C85C0D00 v_interp_p2_f32 v23, [v23], v1, 1, 3, [m0] ; C85D0D01 v_interp_p1_f32 v24, v0, 2, 3, [m0] ; C8600E00 v_interp_p2_f32 v24, [v24], v1, 2, 3, [m0] ; C8610E01 v_interp_p1_f32 v25, v0, 3, 3, [m0] ; C8640F00 v_interp_p2_f32 v25, [v25], v1, 3, 3, [m0] ; C8650F01 v_interp_p1_f32 v21, v0, 0, 4, [m0] ; C8541000 v_interp_p2_f32 v21, [v21], v1, 0, 4, [m0] ; C8551001 v_interp_p1_f32 v16, v0, 1, 4, [m0] ; C8401100 v_interp_p2_f32 v16, [v16], v1, 1, 4, [m0] ; C8411101 v_interp_p1_f32 v22, v0, 2, 4, [m0] ; C8581200 v_mul_f32_e32 v0, v3, v3 ; 10000703 v_mac_f32_e32 v0, v10, v10 ; 3E00150A s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x4c ; C204014C s_buffer_load_dword s12, s[0:3], 0x4d ; C206014D s_buffer_load_dword s14, s[0:3], 0x4e ; C207014E s_load_dwordx4 s[28:31], s[4:5], 0xc ; C08E050C s_load_dwordx8 s[32:39], s[6:7], 0x18 ; C0D00718 s_load_dwordx4 s[24:27], s[4:5], 0x8 ; C08C0508 s_load_dwordx8 s[16:23], s[6:7], 0x10 ; C0C80710 v_mac_f32_e32 v0, v13, v13 ; 3E001B0D v_rsq_clamp_f32_e32 v14, v0 ; 7E1C5900 v_mul_f32_e32 v0, v23, v23 ; 10002F17 v_mac_f32_e32 v0, v24, v24 ; 3E003118 v_mac_f32_e32 v0, v25, v25 ; 3E003319 v_rsq_clamp_f32_e32 v0, v0 ; 7E005900 v_interp_p2_f32 v22, [v22], v1, 2, 4, [m0] ; C8591201 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v1, 1, 0, 0, 0, 0, 0, 0, 0, v[17:18], s[32:39], s[28:31] ; F0800100 00E80111 s_waitcnt vmcnt(0) ; BF8C0770 v_cmp_nlt_f32_e32 vcc, 0, v1 ; 7C1C0280 s_and_saveexec_b64 s[28:29], vcc ; BE9C246A s_xor_b64 s[28:29], exec, s[28:29] ; 899C1C7E image_sample v[26:28], 7, 0, 0, 0, 0, 0, 0, 0, v[17:18], s[16:23], s[24:27] ; F0800700 00C41A11 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v4, s8, v26 ; 10083408 v_mul_f32_e32 v5, s12, v27 ; 100A360C v_mul_f32_e32 v6, s14, v28 ; 100C380E s_or_saveexec_b64 s[28:29], s[28:29] ; BE9C251C s_buffer_load_dword s9, s[0:3], 0x2b ; C204812B s_buffer_load_dword s10, s[0:3], 0x40 ; C2050140 s_buffer_load_dword s11, s[0:3], 0x41 ; C2058141 s_buffer_load_dword s13, s[0:3], 0x42 ; C2068142 s_buffer_load_dword s15, s[0:3], 0x5c ; C207815C s_waitcnt lgkmcnt(0) ; BF8C007F s_xor_b64 exec, exec, s[28:29] ; 89FE1C7E s_cbranch_execz BB0_4 ; BF880000 v_mov_b32_e32 v1, s8 ; 7E020208 v_mov_b32_e32 v4, s12 ; 7E08020C v_mov_b32_e32 v5, s14 ; 7E0A020E s_buffer_load_dword s30, s[0:3], 0x50 ; C20F0150 s_buffer_load_dword s31, s[0:3], 0x51 ; C20F8151 s_buffer_load_dword s32, s[0:3], 0x52 ; C2100152 image_sample v[26:28], 7, 0, 0, 0, 0, 0, 0, 0, v[17:18], s[16:23], s[24:27] ; F0800700 00C41A11 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s30, v1 ; 1002021E v_mul_f32_e32 v6, s31, v4 ; 100C081F v_mul_f32_e32 v11, s32, v5 ; 10160A20 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v4, v26, v1 ; 1008031A v_mul_f32_e32 v5, v27, v6 ; 100A0D1B v_mul_f32_e32 v6, v28, v11 ; 100C171C s_or_b64 exec, exec, s[28:29] ; 88FE1C7E s_load_dwordx4 s[16:19], s[4:5], 0x10 ; C0880510 v_mul_f32_e32 v12, v14, v3 ; 1018070E v_mul_f32_e32 v11, v14, v10 ; 1016150E v_mul_f32_e32 v10, v14, v13 ; 10141B0E v_mul_f32_e32 v14, v0, v23 ; 101C2F00 s_load_dwordx8 s[20:27], s[6:7], 0x20 ; C0CA0720 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[17:20], 15, 0, 0, 0, 0, 0, 0, 0, v[17:18], s[20:27], s[16:19] ; F0800F00 00851111 v_mul_f32_e32 v15, v0, v24 ; 101E3100 v_mul_f32_e32 v1, v14, v12 ; 1002190E v_mac_f32_e32 v1, v15, v11 ; 3E02170F v_mul_f32_e32 v13, v0, v25 ; 101A3300 v_mac_f32_e32 v1, v13, v10 ; 3E02150D v_mul_f32_e32 v3, v12, v1 ; 1006030C v_mac_f32_e32 v3, v12, v1 ; 3E06030C s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v17, v11, v1 ; 1022030B v_mac_f32_e32 v17, v11, v1 ; 3E22030B v_mad_f32 v23, v23, v0, -v3 ; D2820017 840E0117 v_mad_f32 v24, v24, v0, -v17 ; D2820018 84460118 v_mul_f32_e32 v3, v10, v1 ; 1006030A v_mac_f32_e32 v3, v10, v1 ; 3E06030A s_buffer_load_dword s12, s[0:3], 0x27 ; C2060127 s_buffer_load_dword s29, s[0:3], 0x2c ; C20E812C s_buffer_load_dword s30, s[0:3], 0x2d ; C20F012D s_buffer_load_dword s8, s[0:3], 0x60 ; C2040160 v_mad_f32 v25, v25, v0, -v3 ; D2820019 840E0119 v_sub_f32_e64 v0, 1.0, s15 ; D2080000 00001EF2 v_mul_f32_e32 v3, s10, v0 ; 1006000A v_mul_f32_e32 v1, s11, v0 ; 1002000B v_mul_f32_e32 v0, s13, v0 ; 1000000D v_mac_f32_e32 v3, s15, v4 ; 3E06080F v_mov_b32_e32 v26, v23 ; 7E340317 v_mac_f32_e32 v1, s15, v5 ; 3E020A0F v_mov_b32_e32 v27, v24 ; 7E360318 v_mac_f32_e32 v0, s15, v6 ; 3E000C0F v_mov_b32_e32 v28, v25 ; 7E380319 v_cmp_lt_f32_e64 s[10:11], 0, s9 ; D002000A 00001280 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[20:21], s[10:11] ; BE94240A s_xor_b64 s[20:21], exec, s[20:21] ; 8994147E s_cbranch_execz BB0_6 ; BF880000 s_buffer_load_dword s9, s[0:3], 0x26 ; C2048126 s_buffer_load_dword s10, s[0:3], 0x28 ; C2050128 s_buffer_load_dword s11, s[0:3], 0x29 ; C2058129 s_buffer_load_dword s13, s[0:3], 0x2a ; C206812A s_buffer_load_dword s14, s[0:3], 0x20 ; C2070120 s_buffer_load_dword s16, s[0:3], 0x21 ; C2080121 s_buffer_load_dword s17, s[0:3], 0x22 ; C2088122 s_buffer_load_dword s18, s[0:3], 0x24 ; C2090124 s_buffer_load_dword s19, s[0:3], 0x25 ; C2098125 v_mul_f32_e32 v17, v23, v23 ; 10222F17 v_mac_f32_e32 v17, v24, v24 ; 3E223118 v_mac_f32_e32 v17, v25, v25 ; 3E223319 v_rsq_clamp_f32_e32 v17, v17 ; 7E225911 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v19, s9, v22 ; 08262C09 v_mov_b32_e32 v20, s9 ; 7E280209 v_sub_f32_e32 v26, s14, v21 ; 08342A0E v_sub_f32_e32 v27, s16, v16 ; 08362010 v_add_f32_e32 v20, s17, v20 ; 06282811 v_sub_f32_e32 v28, s17, v22 ; 08382C11 v_mad_f32 v29, 0.5, v20, -s13 ; D282001D 803628F0 v_add_f32_e32 v29, v22, v29 ; 063A3B16 v_mul_f32_e32 v30, v17, v23 ; 103C2F11 v_mul_f32_e32 v31, v17, v24 ; 103E3111 v_mul_f32_e32 v17, v17, v25 ; 10223311 v_rcp_f32_e32 v32, v30 ; 7E40551E v_rcp_f32_e32 v33, v31 ; 7E42551F v_rcp_f32_e32 v34, v17 ; 7E445511 v_sub_f32_e32 v35, s18, v21 ; 08462A12 v_mov_b32_e32 v36, s18 ; 7E480212 v_add_f32_e32 v36, s14, v36 ; 0648480E v_mul_f32_e32 v26, v32, v26 ; 10343520 v_mul_f32_e32 v27, v33, v27 ; 10363721 v_mul_f32_e32 v28, v34, v28 ; 10383922 v_mul_f32_e32 v32, v32, v35 ; 10404720 v_sub_f32_e32 v35, s19, v16 ; 08462013 v_mov_b32_e32 v37, s19 ; 7E4A0213 v_mul_f32_e32 v33, v33, v35 ; 10424721 v_mul_f32_e32 v19, v34, v19 ; 10262722 v_cmp_lt_f32_e32 vcc, 0, v30 ; 7C023C80 v_cndmask_b32_e32 v26, v32, v26 ; 00343520 v_cmp_lt_f32_e32 vcc, 0, v31 ; 7C023E80 v_cndmask_b32_e32 v27, v33, v27 ; 00363721 v_cmp_lt_f32_e32 vcc, 0, v17 ; 7C022280 v_cndmask_b32_e32 v19, v19, v28 ; 00263913 v_add_f32_e32 v28, s16, v37 ; 06384A10 v_min3_f32 v19, v26, v27, v19 ; D2A20013 044E371A v_mad_f32 v26, 0.5, v36, -s10 ; D282001A 802A48F0 v_mad_f32 v27, 0.5, v28, -s11 ; D282001B 802E38F0 v_add_f32_e32 v26, v21, v26 ; 06343515 v_add_f32_e32 v27, v16, v27 ; 06363710 v_mac_f32_e32 v26, v19, v30 ; 3E343D13 v_mac_f32_e32 v27, v19, v31 ; 3E363F13 v_mac_f32_e32 v29, v19, v17 ; 3E3A2313 v_mad_f32 v26, 0.5, -v36, v26 ; D282001A 446A48F0 v_mad_f32 v27, 0.5, -v28, v27 ; D282001B 446E38F0 v_mad_f32 v28, 0.5, -v20, v29 ; D282001C 447628F0 s_or_b64 exec, exec, s[20:21] ; 88FE147E s_buffer_load_dword s14, s[0:3], 0x17 ; C2070117 s_buffer_load_dword s16, s[0:3], 0x43 ; C2080143 s_buffer_load_dword s13, s[0:3], 0x68 ; C2068168 s_buffer_load_dword s9, s[0:3], 0x0 ; C2048100 s_buffer_load_dword s10, s[0:3], 0x1 ; C2050101 s_buffer_load_dword s11, s[0:3], 0x2 ; C2058102 s_buffer_load_dword s17, s[0:3], 0x4 ; C2088104 s_buffer_load_dword s18, s[0:3], 0x5 ; C2090105 s_buffer_load_dword s19, s[0:3], 0x6 ; C2098106 s_buffer_load_dword s21, s[0:3], 0x7 ; C20A8107 s_buffer_load_dword s20, s[0:3], 0x8 ; C20A0108 s_buffer_load_dword s22, s[0:3], 0x9 ; C20B0109 s_buffer_load_dword s23, s[0:3], 0xa ; C20B810A s_buffer_load_dword s24, s[0:3], 0xb ; C20C010B s_buffer_load_dword s25, s[0:3], 0xc ; C20C810C s_buffer_load_dword s26, s[0:3], 0xd ; C20D010D s_buffer_load_dword s27, s[0:3], 0xe ; C20D810E s_buffer_load_dword s28, s[0:3], 0xf ; C20E010F v_sub_f32_e64 v17, 1.0, s8 ; D2080011 000010F2 v_log_f32_e32 v17, v17 ; 7E224F11 s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500 s_load_dwordx8 s[36:43], s[6:7], 0x0 ; C0D20700 v_mul_legacy_f32_e32 v17, 0x3f400000, v17 ; 0E2222FF 3F400000 v_exp_f32_e32 v17, v17 ; 7E224B11 v_mul_f32_e32 v29, 0x40e00000, v17 ; 103A22FF 40E00000 v_cubeid_f32 v33, v26, v27, v28 ; D2880021 0472371A v_cubema_f32 v32, v26, v27, v28 ; D28E0020 0472371A v_cubesc_f32 v31, v26, v27, v28 ; D28A001F 0472371A v_cubetc_f32 v30, v26, v27, v28 ; D28C001E 0472371A v_mov_b32_e32 v26, 0x3fc00000 ; 7E3402FF 3FC00000 v_rcp_f32_e64 v17, |v32| ; D3540111 00000120 v_mad_f32 v27, v17, v30, v26 ; D282001B 046A3D11 v_mac_f32_e32 v26, v17, v31 ; 3E343F11 v_mov_b32_e32 v28, v33 ; 7E380321 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[26:29], 15, 0, 0, 0, 0, 0, 0, 0, v[26:29], s[36:43], s[32:35] ; F0900F00 01091A1A s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v17, v29 ; 7E224F1D v_mul_legacy_f32_e32 v17, s30, v17 ; 0E22221E v_exp_f32_e32 v17, v17 ; 7E224B11 v_mul_f32_e32 v17, s29, v17 ; 1022221D v_mul_f32_e32 v20, v26, v17 ; 1028231A v_mul_f32_e32 v19, v27, v17 ; 1026231B v_mul_f32_e32 v17, v28, v17 ; 1022231C v_mov_b32_e32 v27, s15 ; 7E36020F v_mov_b32_e32 v26, 0x3f7fff58 ; 7E3402FF 3F7FFF58 v_cmp_lt_f32_e32 vcc, s12, v26 ; 7C02340C s_and_saveexec_b64 s[30:31], vcc ; BE9E246A s_xor_b64 s[30:31], exec, s[30:31] ; 899E1E7E s_cbranch_execz BB0_10 ; BF880000 s_buffer_load_dword s32, s[0:3], 0x3b ; C210013B s_buffer_load_dword s15, s[0:3], 0x3c ; C207813C s_buffer_load_dword s29, s[0:3], 0x3d ; C20E813D s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_lt_f32_e64 s[32:33], 0, s32 ; D0020020 00004080 s_and_saveexec_b64 s[32:33], s[32:33] ; BEA02420 s_xor_b64 s[32:33], exec, s[32:33] ; 89A0207E s_cbranch_execz BB0_11 ; BF880000 s_buffer_load_dword s34, s[0:3], 0x36 ; C2110136 s_buffer_load_dword s35, s[0:3], 0x38 ; C2118138 s_buffer_load_dword s36, s[0:3], 0x39 ; C2120139 s_buffer_load_dword s37, s[0:3], 0x3a ; C212813A s_buffer_load_dword s38, s[0:3], 0x30 ; C2130130 s_buffer_load_dword s39, s[0:3], 0x31 ; C2138131 s_buffer_load_dword s40, s[0:3], 0x32 ; C2140132 s_buffer_load_dword s41, s[0:3], 0x34 ; C2148134 s_buffer_load_dword s42, s[0:3], 0x35 ; C2150135 v_mul_f32_e32 v26, v23, v23 ; 10342F17 v_mac_f32_e32 v26, v24, v24 ; 3E343118 v_mac_f32_e32 v26, v25, v25 ; 3E343319 v_rsq_clamp_f32_e32 v26, v26 ; 7E34591A s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v28, s34, v22 ; 08382C22 v_mov_b32_e32 v29, s34 ; 7E3A0222 v_sub_f32_e32 v30, s38, v21 ; 083C2A26 v_sub_f32_e32 v31, s39, v16 ; 083E2027 v_add_f32_e32 v29, s40, v29 ; 063A3A28 v_sub_f32_e32 v32, s40, v22 ; 08402C28 v_mad_f32 v33, 0.5, v29, -s37 ; D2820021 80963AF0 v_add_f32_e32 v22, v22, v33 ; 062C4316 v_mul_f32_e32 v23, v26, v23 ; 102E2F1A v_mul_f32_e32 v24, v26, v24 ; 1030311A v_mul_f32_e32 v25, v26, v25 ; 1032331A v_rcp_f32_e32 v26, v23 ; 7E345517 v_rcp_f32_e32 v33, v24 ; 7E425518 v_rcp_f32_e32 v34, v25 ; 7E445519 v_sub_f32_e32 v35, s41, v21 ; 08462A29 v_mov_b32_e32 v36, s41 ; 7E480229 v_add_f32_e32 v36, s38, v36 ; 06484826 v_mul_f32_e32 v30, v26, v30 ; 103C3D1A v_mul_f32_e32 v26, v26, v35 ; 1034471A v_mul_f32_e32 v31, v33, v31 ; 103E3F21 v_mul_f32_e32 v32, v34, v32 ; 10404122 v_mul_f32_e32 v28, v34, v28 ; 10383922 v_mad_f32 v34, 0.5, v36, -s35 ; D2820022 808E48F0 v_add_f32_e32 v21, v21, v34 ; 062A4515 v_sub_f32_e32 v34, s42, v16 ; 0844202A v_mov_b32_e32 v35, s42 ; 7E46022A v_mul_f32_e32 v33, v33, v34 ; 10424521 v_add_f32_e32 v34, s39, v35 ; 06444627 v_cmp_lt_f32_e32 vcc, 0, v23 ; 7C022E80 v_cndmask_b32_e32 v26, v26, v30 ; 00343D1A v_cmp_lt_f32_e32 vcc, 0, v24 ; 7C023080 v_cndmask_b32_e32 v30, v33, v31 ; 003C3F21 v_cmp_lt_f32_e32 vcc, 0, v25 ; 7C023280 v_cndmask_b32_e32 v28, v28, v32 ; 0038411C v_min3_f32 v26, v26, v30, v28 ; D2A2001A 04723D1A v_mad_f32 v28, 0.5, v34, -s36 ; D282001C 809244F0 v_add_f32_e32 v16, v16, v28 ; 06203910 v_mac_f32_e32 v21, v26, v23 ; 3E2A2F1A v_mac_f32_e32 v16, v26, v24 ; 3E20311A v_mac_f32_e32 v22, v26, v25 ; 3E2C331A v_mad_f32 v23, 0.5, -v36, v21 ; D2820017 445648F0 v_mad_f32 v24, 0.5, -v34, v16 ; D2820018 444244F0 v_mad_f32 v25, 0.5, -v29, v22 ; D2820019 445A3AF0 s_or_b64 exec, exec, s[32:33] ; 88FE207E v_sub_f32_e64 v16, 1.0, s8 ; D2080010 000010F2 v_log_f32_e32 v16, v16 ; 7E204F10 s_load_dwordx4 s[32:35], s[4:5], 0x4 ; C0900504 v_mul_legacy_f32_e32 v16, 0x3f400000, v16 ; 0E2020FF 3F400000 v_exp_f32_e32 v16, v16 ; 7E204B10 v_mul_f32_e32 v26, 0x40e00000, v16 ; 103420FF 40E00000 v_cubeid_f32 v31, v23, v24, v25 ; D288001F 04663117 v_cubema_f32 v30, v23, v24, v25 ; D28E001E 04663117 s_load_dwordx8 s[36:43], s[6:7], 0x8 ; C0D20708 v_cubesc_f32 v29, v23, v24, v25 ; D28A001D 04663117 v_cubetc_f32 v28, v23, v24, v25 ; D28C001C 04663117 v_rcp_f32_e64 v16, |v30| ; D3540110 0000011E v_mov_b32_e32 v23, 0x3fc00000 ; 7E2E02FF 3FC00000 v_mad_f32 v24, v16, v28, v23 ; D2820018 045E3910 v_mac_f32_e32 v23, v16, v29 ; 3E2E3B10 v_mov_b32_e32 v25, v31 ; 7E32031F s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[21:24], 15, 0, 0, 0, 0, 0, 0, 0, v[23:26], s[36:43], s[32:35] ; F0900F00 01091517 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v16, v24 ; 7E204F18 v_sub_f32_e64 v24, 1.0, s12 ; D2080018 000018F2 v_mul_legacy_f32_e32 v16, s29, v16 ; 0E20201D v_exp_f32_e32 v16, v16 ; 7E204B10 v_mul_f32_e32 v16, s15, v16 ; 1020200F v_mul_f32_e32 v21, v21, v16 ; 102A2115 v_mul_f32_e32 v22, v22, v16 ; 102C2116 v_mul_f32_e32 v16, v23, v16 ; 10202117 v_mul_f32_e32 v21, v21, v24 ; 102A3115 v_mul_f32_e32 v22, v22, v24 ; 102C3116 v_mul_f32_e32 v16, v16, v24 ; 10203110 v_mac_f32_e32 v21, s12, v20 ; 3E2A280C v_mac_f32_e32 v22, s12, v19 ; 3E2C260C v_mac_f32_e32 v16, s12, v17 ; 3E20220C v_mov_b32_e32 v17, v16 ; 7E220310 v_mov_b32_e32 v19, v22 ; 7E260316 v_mov_b32_e32 v20, v21 ; 7E280315 s_or_b64 exec, exec, s[30:31] ; 88FE1E7E v_mad_f32 v22, -v27, s16, s16 ; D2820016 2040211B v_mov_b32_e32 v16, s14 ; 7E20020E v_mul_f32_e32 v21, v22, v4 ; 102A0916 v_mul_f32_e32 v5, v22, v5 ; 100A0B16 v_mul_f32_e32 v4, v22, v6 ; 10080D16 v_mul_f32_e32 v6, s18, v11 ; 100C1612 v_mac_f32_e32 v6, s17, v12 ; 3E0C1811 v_mac_f32_e32 v6, s19, v10 ; 3E0C1413 v_add_f32_e32 v6, s21, v6 ; 060C0C15 v_add_f32_e32 v7, v6, v7 ; 060E0F06 v_mul_f32_e32 v6, s22, v11 ; 100C1616 v_mac_f32_e32 v6, s20, v12 ; 3E0C1814 v_mac_f32_e32 v6, s23, v10 ; 3E0C1417 v_add_f32_e32 v6, s24, v6 ; 060C0C18 v_add_f32_e32 v8, v6, v8 ; 06101106 v_mul_f32_e32 v6, s26, v11 ; 100C161A v_mac_f32_e32 v6, s25, v12 ; 3E0C1819 v_mac_f32_e32 v6, s27, v10 ; 3E0C141B v_add_f32_e32 v6, s28, v6 ; 060C0C1C v_add_f32_e32 v24, v6, v9 ; 06301306 s_buffer_load_dword s6, s[0:3], 0x10 ; C2030110 s_buffer_load_dword s5, s[0:3], 0x11 ; C2028111 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_buffer_load_dword s17, s[0:3], 0x16 ; C2088116 s_buffer_load_dword s14, s[0:3], 0x44 ; C2070144 s_buffer_load_dword s7, s[0:3], 0x45 ; C2038145 s_buffer_load_dword s12, s[0:3], 0x46 ; C2060146 s_buffer_load_dword s15, s[0:3], 0x48 ; C2078148 s_buffer_load_dword s16, s[0:3], 0x49 ; C2080149 s_buffer_load_dword s0, s[0:3], 0x4b ; C200014B v_sub_f32_e64 v23, 1.0, s13 ; D2080017 00001AF2 v_mac_f32_e32 v23, s13, v18 ; 3E2E240D v_mul_f32_e32 v6, s9, v12 ; 100C1809 v_mac_f32_e32 v6, s10, v11 ; 3E0C160A v_mac_f32_e32 v6, s11, v10 ; 3E0C140B v_max_f32_e32 v6, 0, v6 ; 200C0C80 v_mul_f32_e32 v9, v23, v7 ; 10120F17 v_mul_f32_e32 v7, v23, v8 ; 100E1117 v_mul_f32_e32 v8, v23, v24 ; 10103117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v16, s17, v2 ; 3E200411 v_mul_f32_e32 v2, v23, v20 ; 10042917 v_mul_f32_e32 v18, v23, v19 ; 10242717 v_mul_f32_e32 v17, v23, v17 ; 10222317 v_sub_f32_e32 v19, 1.0, v22 ; 08262CF2 v_add_f32_e32 v19, s8, v19 ; 06262608 v_sub_f32_e64 v20, 1.0, s8 ; D2080014 000010F2 v_add_f32_e64 v19, 0, v19 clamp ; D2060813 00022680 v_sub_f32_e32 v22, s9, v14 ; 082C1C09 v_sub_f32_e32 v23, s10, v15 ; 082E1E0A v_mul_f32_e32 v24, v22, v22 ; 10302D16 v_mac_f32_e32 v24, v23, v23 ; 3E302F17 v_sub_f32_e32 v25, s11, v13 ; 08321A0B v_mac_f32_e32 v24, v25, v25 ; 3E303319 v_rsq_clamp_f32_e32 v24, v24 ; 7E305918 v_mul_f32_e32 v22, v24, v22 ; 102C2D18 v_mul_f32_e32 v23, v24, v23 ; 102E2F18 v_mul_f32_e32 v24, v24, v25 ; 10303318 v_mul_f32_e32 v14, v14, v12 ; 101C190E v_mad_f32 v14, -v15, v11, -v14 ; D282000E A43A170F v_mul_f32_e32 v12, v22, v12 ; 10181916 v_mac_f32_e32 v12, v23, v11 ; 3E181717 v_mul_f32_e32 v11, s9, v22 ; 10162C09 v_mac_f32_e32 v11, s10, v23 ; 3E162E0A v_mad_f32 v13, -v13, v10, v14 ; D282000D 243A150D v_mac_f32_e32 v11, s11, v24 ; 3E16300B v_mac_f32_e32 v12, v24, v10 ; 3E181518 v_max_f32_e32 v10, 0, v11 ; 20141680 v_sub_f32_e32 v11, 1.0, v10 ; 081614F2 v_mul_f32_e32 v14, v11, v11 ; 101C170B v_mul_f32_e32 v11, v11, v14 ; 10161D0B v_mul_f32_e32 v11, v11, v14 ; 10161D0B v_max_f32_e32 v13, 0, v13 ; 201A1A80 v_sub_f32_e32 v14, 1.0, v13 ; 081C1AF2 v_mul_f32_e32 v15, v14, v14 ; 101E1D0E v_mul_f32_e32 v22, v14, v15 ; 102C1F0E v_mad_f32 v23, -v15, v22, 1.0 ; D2820017 23CA2D0F v_mul_f32_e32 v24, v3, v23 ; 10302F03 v_sub_f32_e32 v25, 1.0, v3 ; 083206F2 v_mac_f32_e32 v3, v11, v25 ; 3E06330B v_mul_f32_e32 v25, v1, v23 ; 10322F01 v_sub_f32_e32 v26, 1.0, v1 ; 083402F2 v_mac_f32_e32 v1, v11, v26 ; 3E02350B v_mul_f32_e32 v23, v0, v23 ; 102E2F00 v_sub_f32_e32 v26, 1.0, v0 ; 083400F2 v_mac_f32_e32 v0, v11, v26 ; 3E00350B v_sub_f32_e32 v11, 1.0, v20 ; 081628F2 v_mov_b32_e32 v26, 0x3cf5c28f ; 7E3402FF 3CF5C28F v_madmk_f32_e32 v11, v11, v26, 0x3f77ced9 ; 4016350B 3F77CED9 v_add_f32_e32 v26, v10, v10 ; 0634150A v_mul_f32_e32 v10, v20, v10 ; 10141514 v_mad_f32 v10, v26, v10, 0.5 ; D282000A 03C2151A v_mul_f32_e32 v15, v22, v15 ; 101E1F16 v_mac_f32_e32 v24, v19, v15 ; 3E301F13 v_mac_f32_e32 v25, v19, v15 ; 3E321F13 v_mac_f32_e32 v23, v19, v15 ; 3E2E1F13 v_mul_f32_e32 v19, v20, v20 ; 10262914 v_log_f32_e32 v11, v11 ; 7E164F0B v_mul_f32_e32 v19, s0, v19 ; 10262600 v_mul_f32_e32 v14, v19, v14 ; 101C1D13 v_mac_f32_e32 v14, 1.0, v13 ; 3E1C1AF2 v_rcp_f32_e32 v11, v11 ; 7E16550B v_sub_f32_e32 v13, 1.0, v6 ; 081A0CF2 v_mul_f32_e32 v19, v19, v13 ; 10261B13 v_mac_f32_e32 v19, 1.0, v6 ; 3E260CF2 v_max_f32_e32 v12, 0, v12 ; 20181880 v_log_f32_e32 v12, v12 ; 7E184F0C v_madak_f32_e32 v14, v19, v14, 0x38d1b717 ; 421C1D13 38D1B717 v_mul_f32_e32 v11, 0x41200000, v11 ; 101616FF 41200000 v_mul_f32_e32 v19, v11, v11 ; 1026170B v_mul_legacy_f32_e32 v12, v19, v12 ; 0E181913 v_rcp_f32_e32 v14, v14 ; 7E1C550E v_mad_f32 v11, v11, v11, 1.0 ; D282000B 03CA170B v_mul_f32_e32 v11, s16, v11 ; 10161610 v_exp_f32_e32 v12, v12 ; 7E184B0C v_mul_f32_e32 v11, v11, v12 ; 1016190B v_mul_f32_e32 v11, v11, v14 ; 10161D0B v_mul_f32_e32 v11, v6, v11 ; 10161706 v_mul_f32_e32 v11, s15, v11 ; 1016160F v_mul_f32_e32 v12, v13, v13 ; 10181B0D v_mul_f32_e32 v13, v13, v12 ; 101A190D v_mul_f32_e32 v12, v13, v12 ; 1018190D v_add_f32_e32 v10, -1.0, v10 ; 061414F3 v_mad_f32 v12, v10, v12, 1.0 ; D282000C 03CA190A v_mad_f32 v10, v10, v15, 1.0 ; D282000A 03CA1F0A v_mul_f32_e32 v10, v10, v12 ; 1014190A v_mul_f32_e32 v6, v6, v10 ; 100C1506 v_mac_f32_e32 v9, s14, v6 ; 3E120C0E v_mul_f32_e32 v9, v9, v21 ; 10122B09 v_max_f32_e32 v10, 0, v11 ; 20141680 v_mul_f32_e32 v11, s14, v10 ; 1016140E v_mac_f32_e32 v9, v3, v11 ; 3E121703 v_mac_f32_e32 v7, s7, v6 ; 3E0E0C07 v_mac_f32_e32 v8, s12, v6 ; 3E100C0C v_mul_f32_e32 v3, s7, v10 ; 10061407 v_mul_f32_e32 v6, s12, v10 ; 100C140C v_mul_f32_e32 v5, v7, v5 ; 100A0B07 v_mul_f32_e32 v4, v8, v4 ; 10080908 v_mac_f32_e32 v5, v1, v3 ; 3E0A0701 v_mac_f32_e32 v4, v0, v6 ; 3E080D00 v_mac_f32_e32 v9, v24, v2 ; 3E120518 v_mac_f32_e32 v5, v25, v18 ; 3E0A2519 v_mac_f32_e32 v4, v23, v17 ; 3E082317 v_add_f32_e64 v0, 0, v16 clamp ; D2060800 00022080 v_sub_f32_e32 v1, 1.0, v0 ; 080200F2 v_mul_f32_e32 v2, s6, v1 ; 10040206 v_mac_f32_e32 v2, v9, v0 ; 3E040109 v_mul_f32_e32 v3, s5, v1 ; 10060205 v_mac_f32_e32 v3, v5, v0 ; 3E060105 v_mul_f32_e32 v1, s4, v1 ; 10020204 v_mac_f32_e32 v1, v4, v0 ; 3E020104 v_cvt_pkrtz_f16_f32_e32 v0, v2, v3 ; 5E000702 v_cvt_pkrtz_f16_f32_e64 v1, v1, 1.0 ; D25E0001 0001E501 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 48 VGPRS: 40 Code Size: 2236 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL OUT[5], GENERIC[4] DCL OUT[6], GENERIC[5] DCL CONST[0..20] DCL TEMP[0..7], LOCAL IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MUL TEMP[0], CONST[2], IN[0].xxxx 1: MAD TEMP[0], CONST[3], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[4], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0].xyz, CONST[5], IN[0].wwww, TEMP[0] 4: MUL TEMP[1], CONST[17], IN[0].xxxx 5: MAD TEMP[1], CONST[18], IN[0].yyyy, TEMP[1] 6: MAD TEMP[1], CONST[19], IN[0].zzzz, TEMP[1] 7: MAD TEMP[1], CONST[20], IN[0].wwww, TEMP[1] 8: MAD TEMP[2].xy, IN[2].xyyy, CONST[10].xyyy, CONST[10].zwww 9: FSEQ TEMP[3].x, CONST[12].xxxx, IMM[0].xxxx 10: UIF TEMP[3].xxxx :0 11: MOV TEMP[3].xy, IN[2].xyxx 12: ELSE :0 13: MOV TEMP[3].xy, IN[3].xyxx 14: ENDIF 15: MAD TEMP[3].xy, TEMP[3].xyyy, CONST[11].xyyy, CONST[11].zwww 16: MOV TEMP[2].zw, TEMP[3].yyxy 17: MOV TEMP[3].x, CONST[6].xxxx 18: MOV TEMP[3].y, CONST[7].xxxx 19: MOV TEMP[3].z, CONST[8].xxxx 20: MOV TEMP[4].x, CONST[6].yyyy 21: MOV TEMP[4].y, CONST[7].yyyy 22: MOV TEMP[4].z, CONST[8].yyyy 23: MOV TEMP[5].x, CONST[6].zzzz 24: MOV TEMP[5].y, CONST[7].zzzz 25: MOV TEMP[5].z, CONST[8].zzzz 26: MUL TEMP[3].xyz, TEMP[3].xyzz, IN[1].xxxx 27: MAD TEMP[3].xyz, TEMP[4].xyzz, IN[1].yyyy, TEMP[3].xyzz 28: MAD TEMP[3].xyz, TEMP[5].xyzz, IN[1].zzzz, TEMP[3].xyzz 29: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz 30: RSQ TEMP[4].x, TEMP[4].xxxx 31: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx 32: MUL TEMP[4].xyz, CONST[2].xyzz, IN[4].xxxx 33: MAD TEMP[4].xyz, CONST[3].xyzz, IN[4].yyyy, TEMP[4].xyzz 34: MAD TEMP[4].xyz, CONST[4].xyzz, IN[4].zzzz, TEMP[4].xyzz 35: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz 36: RSQ TEMP[5].x, TEMP[5].xxxx 37: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx 38: MUL TEMP[5].xyz, TEMP[3].zxyy, TEMP[4].yzxx 39: MAD TEMP[5].xyz, TEMP[3].yzxx, TEMP[4].zxyy, -TEMP[5].xyzz 40: MUL TEMP[5].xyz, TEMP[5].xyzz, IN[4].wwww 41: MOV TEMP[4].xyz, TEMP[4].xyzx 42: MOV TEMP[5].xyz, TEMP[5].xyzx 43: MOV TEMP[3].xyz, TEMP[3].xyzx 44: MUL TEMP[6].xyz, TEMP[0].xyzz, CONST[1].wwww 45: ADD TEMP[6].xyz, CONST[1].xyzz, -TEMP[6].xyzz 46: MOV TEMP[4].w, TEMP[6].xxxx 47: MOV TEMP[5].w, TEMP[6].yyyy 48: MOV TEMP[3].w, TEMP[6].zzzz 49: MUL TEMP[6], CONST[2], IN[0].xxxx 50: MAD TEMP[6], CONST[3], IN[0].yyyy, TEMP[6] 51: MAD TEMP[6], CONST[4], IN[0].zzzz, TEMP[6] 52: MAD TEMP[6], CONST[5], IN[0].wwww, TEMP[6] 53: ADD TEMP[0].xyz, TEMP[0].xyzz, -CONST[0].xyzz 54: MOV TEMP[0].yzw, TEMP[0].yxyz 55: MUL TEMP[7], CONST[13], TEMP[6].xxxx 56: MAD TEMP[7], CONST[14], TEMP[6].yyyy, TEMP[7] 57: MAD TEMP[7], CONST[15], TEMP[6].zzzz, TEMP[7] 58: MAD TEMP[6].xyz, CONST[16], TEMP[6].wwww, TEMP[7] 59: MOV TEMP[6].xyz, TEMP[6].xyzx 60: MOV TEMP[0].x, TEMP[1].zzzz 61: MOV OUT[1], TEMP[2] 62: MOV OUT[3], TEMP[5] 63: MOV OUT[2], TEMP[4] 64: MOV OUT[4], TEMP[3] 65: MOV OUT[0], TEMP[1] 66: MOV OUT[6], TEMP[6] 67: MOV OUT[5], TEMP[0] 68: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 172) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248) %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256) %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260) %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264) %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272) %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276) %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280) %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284) %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288) %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292) %72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296) %73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300) %74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304) %75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308) %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312) %77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316) %78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320) %79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 324) %80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 328) %81 = call float @llvm.SI.load.const(<16 x i8> %12, i32 332) %82 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %83 = load <16 x i8>, <16 x i8> addrspace(2)* %82, align 16, !tbaa !0 %84 = add i32 %5, %7 %85 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %83, i32 0, i32 %84) %86 = extractelement <4 x float> %85, i32 0 %87 = extractelement <4 x float> %85, i32 1 %88 = extractelement <4 x float> %85, i32 2 %89 = extractelement <4 x float> %85, i32 3 %90 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %91 = load <16 x i8>, <16 x i8> addrspace(2)* %90, align 16, !tbaa !0 %92 = add i32 %5, %7 %93 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %91, i32 0, i32 %92) %94 = extractelement <4 x float> %93, i32 0 %95 = extractelement <4 x float> %93, i32 1 %96 = extractelement <4 x float> %93, i32 2 %97 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %98 = load <16 x i8>, <16 x i8> addrspace(2)* %97, align 16, !tbaa !0 %99 = add i32 %5, %7 %100 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %98, i32 0, i32 %99) %101 = extractelement <4 x float> %100, i32 0 %102 = extractelement <4 x float> %100, i32 1 %103 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %104 = load <16 x i8>, <16 x i8> addrspace(2)* %103, align 16, !tbaa !0 %105 = add i32 %5, %7 %106 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %104, i32 0, i32 %105) %107 = extractelement <4 x float> %106, i32 0 %108 = extractelement <4 x float> %106, i32 1 %109 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4 %110 = load <16 x i8>, <16 x i8> addrspace(2)* %109, align 16, !tbaa !0 %111 = add i32 %5, %7 %112 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %110, i32 0, i32 %111) %113 = extractelement <4 x float> %112, i32 0 %114 = extractelement <4 x float> %112, i32 1 %115 = extractelement <4 x float> %112, i32 2 %116 = extractelement <4 x float> %112, i32 3 %117 = fmul float %20, %86 %118 = fmul float %21, %86 %119 = fmul float %22, %86 %120 = fmul float %24, %87 %121 = fadd float %120, %117 %122 = fmul float %25, %87 %123 = fadd float %122, %118 %124 = fmul float %26, %87 %125 = fadd float %124, %119 %126 = fmul float %28, %88 %127 = fadd float %126, %121 %128 = fmul float %29, %88 %129 = fadd float %128, %123 %130 = fmul float %30, %88 %131 = fadd float %130, %125 %132 = fmul float %32, %89 %133 = fadd float %132, %127 %134 = fmul float %33, %89 %135 = fadd float %134, %129 %136 = fmul float %34, %89 %137 = fadd float %136, %131 %138 = fmul float %66, %86 %139 = fmul float %67, %86 %140 = fmul float %68, %86 %141 = fmul float %69, %86 %142 = fmul float %70, %87 %143 = fadd float %142, %138 %144 = fmul float %71, %87 %145 = fadd float %144, %139 %146 = fmul float %72, %87 %147 = fadd float %146, %140 %148 = fmul float %73, %87 %149 = fadd float %148, %141 %150 = fmul float %74, %88 %151 = fadd float %150, %143 %152 = fmul float %75, %88 %153 = fadd float %152, %145 %154 = fmul float %76, %88 %155 = fadd float %154, %147 %156 = fmul float %77, %88 %157 = fadd float %156, %149 %158 = fmul float %78, %89 %159 = fadd float %158, %151 %160 = fmul float %79, %89 %161 = fadd float %160, %153 %162 = fmul float %80, %89 %163 = fadd float %162, %155 %164 = fmul float %81, %89 %165 = fadd float %164, %157 %166 = fmul float %101, %45 %167 = fadd float %166, %47 %168 = fmul float %102, %46 %169 = fadd float %168, %48 %170 = fcmp oeq float %53, 0.000000e+00 %. = select i1 %170, float %101, float %107 %.32 = select i1 %170, float %102, float %108 %171 = fmul float %., %49 %172 = fadd float %171, %51 %173 = fmul float %.32, %50 %174 = fadd float %173, %52 %175 = fmul float %36, %94 %176 = fmul float %39, %94 %177 = fmul float %42, %94 %178 = fmul float %37, %95 %179 = fadd float %178, %175 %180 = fmul float %40, %95 %181 = fadd float %180, %176 %182 = fmul float %43, %95 %183 = fadd float %182, %177 %184 = fmul float %38, %96 %185 = fadd float %184, %179 %186 = fmul float %41, %96 %187 = fadd float %186, %181 %188 = fmul float %44, %96 %189 = fadd float %188, %183 %190 = fmul float %185, %185 %191 = fmul float %187, %187 %192 = fadd float %191, %190 %193 = fmul float %189, %189 %194 = fadd float %192, %193 %195 = call float @llvm.AMDGPU.rsq.clamped.f32(float %194) %196 = fmul float %185, %195 %197 = fmul float %187, %195 %198 = fmul float %189, %195 %199 = fmul float %20, %113 %200 = fmul float %21, %113 %201 = fmul float %22, %113 %202 = fmul float %24, %114 %203 = fadd float %202, %199 %204 = fmul float %25, %114 %205 = fadd float %204, %200 %206 = fmul float %26, %114 %207 = fadd float %206, %201 %208 = fmul float %28, %115 %209 = fadd float %208, %203 %210 = fmul float %29, %115 %211 = fadd float %210, %205 %212 = fmul float %30, %115 %213 = fadd float %212, %207 %214 = fmul float %209, %209 %215 = fmul float %211, %211 %216 = fadd float %215, %214 %217 = fmul float %213, %213 %218 = fadd float %216, %217 %219 = call float @llvm.AMDGPU.rsq.clamped.f32(float %218) %220 = fmul float %209, %219 %221 = fmul float %211, %219 %222 = fmul float %213, %219 %223 = fmul float %198, %221 %224 = fmul float %196, %222 %225 = fmul float %197, %220 %226 = fmul float %197, %222 %227 = fsub float %226, %223 %228 = fmul float %198, %220 %229 = fsub float %228, %224 %230 = fmul float %196, %221 %231 = fsub float %230, %225 %232 = fmul float %227, %116 %233 = fmul float %229, %116 %234 = fmul float %231, %116 %235 = fmul float %133, %19 %236 = fmul float %135, %19 %237 = fmul float %137, %19 %238 = fsub float %16, %235 %239 = fsub float %17, %236 %240 = fsub float %18, %237 %241 = fmul float %20, %86 %242 = fmul float %21, %86 %243 = fmul float %22, %86 %244 = fmul float %23, %86 %245 = fmul float %24, %87 %246 = fadd float %245, %241 %247 = fmul float %25, %87 %248 = fadd float %247, %242 %249 = fmul float %26, %87 %250 = fadd float %249, %243 %251 = fmul float %27, %87 %252 = fadd float %251, %244 %253 = fmul float %28, %88 %254 = fadd float %253, %246 %255 = fmul float %29, %88 %256 = fadd float %255, %248 %257 = fmul float %30, %88 %258 = fadd float %257, %250 %259 = fmul float %31, %88 %260 = fadd float %259, %252 %261 = fmul float %32, %89 %262 = fadd float %261, %254 %263 = fmul float %33, %89 %264 = fadd float %263, %256 %265 = fmul float %34, %89 %266 = fadd float %265, %258 %267 = fmul float %35, %89 %268 = fadd float %267, %260 %269 = fsub float %133, %13 %270 = fsub float %135, %14 %271 = fsub float %137, %15 %272 = fmul float %54, %262 %273 = fmul float %55, %262 %274 = fmul float %56, %262 %275 = fmul float %57, %264 %276 = fadd float %275, %272 %277 = fmul float %58, %264 %278 = fadd float %277, %273 %279 = fmul float %59, %264 %280 = fadd float %279, %274 %281 = fmul float %60, %266 %282 = fadd float %281, %276 %283 = fmul float %61, %266 %284 = fadd float %283, %278 %285 = fmul float %62, %266 %286 = fadd float %285, %280 %287 = fmul float %63, %268 %288 = fadd float %287, %282 %289 = fmul float %64, %268 %290 = fadd float %289, %284 %291 = fmul float %65, %268 %292 = fadd float %291, %286 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %167, float %169, float %172, float %174) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %220, float %221, float %222, float %238) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %232, float %233, float %234, float %239) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %196, float %197, float %198, float %240) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %163, float %269, float %270, float %271) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %288, float %290, float %292, float %268) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %159, float %161, float %163, float %165) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 s_load_dwordx4 s[20:23], s[8:9], 0xc ; C08A090C v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[8:11], s[8:9], 0x10 ; C0840910 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[8:11], v0, s[16:19], 0 idxen ; E00C2000 80040800 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[10:13], v0, s[20:23], 0 idxen ; E00C2000 80050A00 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[12:15], v0, s[8:11], 0 idxen ; E00C2000 80020C00 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x30 ; C2020130 s_buffer_load_dword s5, s[0:3], 0x2a ; C202812A s_buffer_load_dword s6, s[0:3], 0x28 ; C2030128 s_buffer_load_dword s7, s[0:3], 0x34 ; C2038134 s_buffer_load_dword s8, s[0:3], 0x35 ; C2040135 s_buffer_load_dword s9, s[0:3], 0x36 ; C2048136 s_buffer_load_dword s10, s[0:3], 0x38 ; C2050138 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_eq_f32_e64 vcc, 0, s4 ; D004006A 00000880 v_cndmask_b32_e32 v0, v10, v8 ; 0000110A v_cndmask_b32_e32 v10, v11, v9 ; 0014130B v_mov_b32_e32 v11, s5 ; 7E160205 s_buffer_load_dword s4, s[0:3], 0x29 ; C2020129 s_buffer_load_dword s5, s[0:3], 0x2b ; C202812B v_mac_f32_e32 v11, s6, v8 ; 3E161006 s_buffer_load_dword s6, s[0:3], 0x18 ; C2030118 s_buffer_load_dword s11, s[0:3], 0x19 ; C2058119 s_buffer_load_dword s12, s[0:3], 0x1c ; C206011C s_buffer_load_dword s13, s[0:3], 0x1d ; C206811D s_buffer_load_dword s14, s[0:3], 0x20 ; C2070120 s_buffer_load_dword s15, s[0:3], 0x2c ; C207812C s_buffer_load_dword s16, s[0:3], 0x2d ; C208012D s_buffer_load_dword s17, s[0:3], 0x2e ; C208812E s_buffer_load_dword s18, s[0:3], 0x2f ; C209012F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v8, s5 ; 7E100205 s_buffer_load_dword s5, s[0:3], 0x21 ; C2028121 v_mac_f32_e32 v8, s4, v9 ; 3E101204 v_mul_f32_e32 v9, s6, v5 ; 10120A06 v_mac_f32_e32 v9, s11, v6 ; 3E120C0B s_buffer_load_dword s4, s[0:3], 0x1a ; C202011A s_buffer_load_dword s6, s[0:3], 0x1e ; C203011E v_mul_f32_e32 v16, s12, v5 ; 10200A0C v_mac_f32_e32 v16, s13, v6 ; 3E200C0D v_mul_f32_e32 v5, s14, v5 ; 100A0A0E s_buffer_load_dword s11, s[0:3], 0x22 ; C2058122 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v5, s5, v6 ; 3E0A0C05 s_buffer_load_dword s5, s[0:3], 0x44 ; C2028144 s_buffer_load_dword s12, s[0:3], 0x48 ; C2060148 s_buffer_load_dword s13, s[0:3], 0x45 ; C2068145 s_buffer_load_dword s14, s[0:3], 0x49 ; C2070149 v_mac_f32_e32 v9, s4, v7 ; 3E120E04 s_buffer_load_dword s4, s[0:3], 0x46 ; C2020146 s_buffer_load_dword s19, s[0:3], 0x4a ; C209814A s_buffer_load_dword s20, s[0:3], 0x47 ; C20A0147 v_mac_f32_e32 v16, s6, v7 ; 3E200E06 v_mac_f32_e32 v5, s11, v7 ; 3E0A0E0B s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s5, v1 ; 100C0205 v_mac_f32_e32 v6, s12, v2 ; 3E0C040C v_mul_f32_e32 v7, s13, v1 ; 100E020D v_mac_f32_e32 v7, s14, v2 ; 3E0E040E s_buffer_load_dword s5, s[0:3], 0x4b ; C202814B v_mul_f32_e32 v17, s4, v1 ; 10220204 v_mac_f32_e32 v17, s19, v2 ; 3E220413 v_mul_f32_e32 v18, s20, v1 ; 10240214 s_buffer_load_dword s4, s[0:3], 0xb ; C202010B s_buffer_load_dword s6, s[0:3], 0xf ; C203010F s_buffer_load_dword s11, s[0:3], 0x4c ; C205814C s_buffer_load_dword s12, s[0:3], 0x4d ; C206014D s_buffer_load_dword s13, s[0:3], 0x4e ; C206814E s_buffer_load_dword s14, s[0:3], 0x4f ; C207014F s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v18, s5, v2 ; 3E240405 s_buffer_load_dword s5, s[0:3], 0xc ; C202810C s_buffer_load_dword s19, s[0:3], 0xd ; C209810D s_buffer_load_dword s20, s[0:3], 0xe ; C20A010E v_mul_f32_e32 v19, s4, v1 ; 10260204 s_buffer_load_dword s4, s[0:3], 0x13 ; C2020113 v_mac_f32_e32 v19, s6, v2 ; 3E260406 v_mac_f32_e32 v6, s11, v3 ; 3E0C060B s_buffer_load_dword s6, s[0:3], 0x8 ; C2030108 v_mac_f32_e32 v7, s12, v3 ; 3E0E060C v_mac_f32_e32 v17, s13, v3 ; 3E22060D s_buffer_load_dword s11, s[0:3], 0x10 ; C2058110 s_buffer_load_dword s12, s[0:3], 0x14 ; C2060114 v_mac_f32_e32 v18, s14, v3 ; 3E24060E s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v19, s4, v3 ; 3E260604 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A v_mul_f32_e32 v20, s6, v1 ; 10280206 v_mac_f32_e32 v20, s5, v2 ; 3E280405 s_buffer_load_dword s21, s[0:3], 0x12 ; C20A8112 v_mac_f32_e32 v20, s11, v3 ; 3E28060B v_mac_f32_e32 v20, s12, v4 ; 3E28080C s_buffer_load_dword s12, s[0:3], 0x15 ; C2060115 v_mul_f32_e32 v21, s13, v1 ; 102A020D v_mac_f32_e32 v21, s19, v2 ; 3E2A0413 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v21, s4, v3 ; 3E2A0604 s_buffer_load_dword s22, s[0:3], 0x16 ; C20B0116 s_buffer_load_dword s23, s[0:3], 0x50 ; C20B8150 s_buffer_load_dword s24, s[0:3], 0x51 ; C20C0151 s_buffer_load_dword s25, s[0:3], 0x52 ; C20C8152 s_buffer_load_dword s26, s[0:3], 0x53 ; C20D0153 s_buffer_load_dword s27, s[0:3], 0x17 ; C20D8117 v_mac_f32_e32 v21, s12, v4 ; 3E2A080C v_mul_f32_e32 v1, s14, v1 ; 1002020E v_mac_f32_e32 v1, s20, v2 ; 3E020414 v_mac_f32_e32 v1, s21, v3 ; 3E020615 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v1, s22, v4 ; 3E020816 v_mac_f32_e32 v6, s23, v4 ; 3E0C0817 v_mac_f32_e32 v7, s24, v4 ; 3E0E0818 v_mac_f32_e32 v17, s25, v4 ; 3E220819 v_mac_f32_e32 v18, s26, v4 ; 3E24081A v_mac_f32_e32 v19, s27, v4 ; 3E26081B v_mov_b32_e32 v2, s17 ; 7E040211 v_mac_f32_e32 v2, s15, v0 ; 3E04000F v_mov_b32_e32 v0, s18 ; 7E000212 v_mac_f32_e32 v0, s16, v10 ; 3E001410 exp 15, 32, 0, 0, 0, v11, v8, v2, v0 ; F800020F 0002080B s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s6, v12 ; 10001806 v_mac_f32_e32 v0, s5, v13 ; 3E001A05 v_mul_f32_e32 v2, s13, v12 ; 1004180D v_mac_f32_e32 v2, s19, v13 ; 3E041A13 v_mul_f32_e32 v3, s14, v12 ; 1006180E v_mac_f32_e32 v3, s20, v13 ; 3E061A14 v_mac_f32_e32 v0, s11, v14 ; 3E001C0B v_mac_f32_e32 v2, s4, v14 ; 3E041C04 v_mac_f32_e32 v3, s21, v14 ; 3E061C15 v_mul_f32_e32 v4, v9, v9 ; 10081309 v_mac_f32_e32 v4, v16, v16 ; 3E082110 v_mul_f32_e32 v8, v0, v0 ; 10100100 v_mac_f32_e32 v8, v2, v2 ; 3E100502 v_mac_f32_e32 v4, v5, v5 ; 3E080B05 v_rsq_clamp_f32_e32 v4, v4 ; 7E085904 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_buffer_load_dword s5, s[0:3], 0x7 ; C2028107 v_mac_f32_e32 v8, v3, v3 ; 3E100703 v_rsq_clamp_f32_e32 v8, v8 ; 7E105908 v_mul_f32_e32 v9, v4, v9 ; 10121304 v_mul_f32_e32 v10, v4, v16 ; 10142104 v_mul_f32_e32 v4, v4, v5 ; 10080B04 v_mul_f32_e32 v0, v8, v0 ; 10000108 v_mul_f32_e32 v2, v8, v2 ; 10040508 v_mul_f32_e32 v3, v8, v3 ; 10060708 s_buffer_load_dword s6, s[0:3], 0x5 ; C2030105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v5, s4 ; 7E0A0204 v_mad_f32 v5, -v20, s5, v5 ; D2820005 24140B14 exp 15, 33, 0, 0, 0, v0, v2, v3, v5 ; F800021F 05030200 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v5, v2, v4 ; 100A0902 v_mad_f32 v5, v10, v3, -v5 ; D2820005 8416070A v_mul_f32_e32 v3, v3, v9 ; 10061303 v_mad_f32 v3, v4, v0, -v3 ; D2820003 840E0104 v_mul_f32_e32 v0, v0, v10 ; 10001500 v_mad_f32 v0, v9, v2, -v0 ; D2820000 84020509 v_mul_f32_e32 v2, v15, v5 ; 10040B0F v_mul_f32_e32 v3, v15, v3 ; 1006070F v_mul_f32_e32 v0, v15, v0 ; 1000010F v_mov_b32_e32 v5, s6 ; 7E0A0206 v_mad_f32 v5, -v21, s5, v5 ; D2820005 24140B15 exp 15, 34, 0, 0, 0, v2, v3, v0, v5 ; F800022F 05000302 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_buffer_load_dword s6, s[0:3], 0x0 ; C2030100 s_buffer_load_dword s11, s[0:3], 0x1 ; C2058101 s_buffer_load_dword s12, s[0:3], 0x2 ; C2060102 s_buffer_load_dword s13, s[0:3], 0x40 ; C2068140 s_buffer_load_dword s14, s[0:3], 0x41 ; C2070141 s_buffer_load_dword s15, s[0:3], 0x42 ; C2078142 s_buffer_load_dword s16, s[0:3], 0x39 ; C2080139 s_buffer_load_dword s17, s[0:3], 0x3a ; C208813A s_buffer_load_dword s18, s[0:3], 0x3c ; C209013C s_buffer_load_dword s19, s[0:3], 0x3d ; C209813D s_buffer_load_dword s0, s[0:3], 0x3e ; C200013E s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mov_b32_e32 v0, s4 ; 7E000204 v_mad_f32 v0, -v1, s5, v0 ; D2820000 24000B01 v_subrev_f32_e32 v2, s6, v20 ; 0A042806 v_mul_f32_e32 v3, s7, v20 ; 10062807 v_mul_f32_e32 v5, s8, v20 ; 100A2808 v_mul_f32_e32 v8, s9, v20 ; 10102809 v_mac_f32_e32 v3, s10, v21 ; 3E062A0A v_mac_f32_e32 v5, s16, v21 ; 3E0A2A10 v_mac_f32_e32 v8, s17, v21 ; 3E102A11 v_subrev_f32_e32 v11, s11, v21 ; 0A162A0B v_mac_f32_e32 v3, s18, v1 ; 3E060212 v_mac_f32_e32 v5, s19, v1 ; 3E0A0213 v_mac_f32_e32 v8, s0, v1 ; 3E100200 v_subrev_f32_e32 v1, s12, v1 ; 0A02020C v_mac_f32_e32 v3, s13, v19 ; 3E06260D v_mac_f32_e32 v5, s14, v19 ; 3E0A260E v_mac_f32_e32 v8, s15, v19 ; 3E10260F exp 15, 35, 0, 0, 0, v9, v10, v4, v0 ; F800023F 00040A09 exp 15, 36, 0, 0, 0, v17, v2, v11, v1 ; F800024F 010B0211 exp 15, 37, 0, 0, 0, v3, v5, v8, v19 ; F800025F 13080503 exp 15, 12, 0, 1, 0, v6, v7, v17, v18 ; F80008CF 12110706 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 24 Code Size: 920 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL IN[4], GENERIC[4], PERSPECTIVE DCL IN[5], GENERIC[5], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL CONST[0..4] DCL CONST[7..9] DCL TEMP[0..13], LOCAL IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 0.0000} IMM[1] FLT32 { 10.0000, 0.9680, 0.0300, 0.5000} IMM[2] FLT32 { 0.0001, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].x, IN[1].xxxx 1: MOV TEMP[0].y, IN[2].xxxx 2: MOV TEMP[0].z, IN[3].xxxx 3: MOV TEMP[1].x, IN[1].yyyy 4: MOV TEMP[1].y, IN[2].yyyy 5: MOV TEMP[1].z, IN[3].yyyy 6: MOV TEMP[2].x, IN[1].zzzz 7: MOV TEMP[2].y, IN[2].zzzz 8: MOV TEMP[2].z, IN[3].zzzz 9: MOV TEMP[3].xy, IN[0].xyyy 10: TEX TEMP[3].yw, TEMP[3], SAMP[1], 2D 11: MAD TEMP[3].xy, TEMP[3].wyyy, IMM[0].xxxx, IMM[0].yyyy 12: MUL TEMP[3].xy, TEMP[3].xyyy, CONST[7].xxxx 13: DP2 TEMP[4].x, TEMP[3].xyyy, TEMP[3].xyyy 14: MOV_SAT TEMP[4].x, TEMP[4].xxxx 15: ADD TEMP[4].x, IMM[0].zzzz, -TEMP[4].xxxx 16: SQRT TEMP[4].x, TEMP[4].xxxx 17: MOV TEMP[3].z, TEMP[4].xxxx 18: DP3 TEMP[0].x, TEMP[3].xyzz, TEMP[0].xyzz 19: DP3 TEMP[1].x, TEMP[3].xyzz, TEMP[1].xyzz 20: MOV TEMP[0].y, TEMP[1].xxxx 21: DP3 TEMP[1].x, TEMP[3].xyzz, TEMP[2].xyzz 22: MOV TEMP[0].z, TEMP[1].xxxx 23: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[0].xyzz 24: RSQ TEMP[1].x, TEMP[1].xxxx 25: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xxxx 26: MOV TEMP[1].xy, IN[0].xyyy 27: TEX TEMP[1].xyz, TEMP[1], SAMP[0], 2D 28: MUL TEMP[1].xyz, CONST[4].xyzz, TEMP[1].xyzz 29: LRP TEMP[2].xyz, CONST[8].xxxx, TEMP[1].xyzz, CONST[1].xyzz 30: MOV TEMP[3].x, IN[1].wwww 31: MOV TEMP[3].y, IN[2].wwww 32: MOV TEMP[3].z, IN[3].wwww 33: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz 34: RSQ TEMP[4].x, TEMP[4].xxxx 35: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx 36: DP3 TEMP[4].x, TEMP[0].xyzz, TEMP[3].xyzz 37: MAX TEMP[4].x, IMM[0].wwww, TEMP[4].xxxx 38: DP3 TEMP[5].x, IN[5].xyzz, IN[5].xyzz 39: MOV TEMP[5].xy, TEMP[5].xxxx 40: TEX TEMP[5].w, TEMP[5], SAMP[2], 2D 41: MUL TEMP[5].xyz, CONST[2].xyzz, TEMP[5].wwww 42: DP3 TEMP[6].x, IN[4].yzww, IN[4].yzww 43: RSQ TEMP[6].x, TEMP[6].xxxx 44: MUL TEMP[6].xyz, IN[4].yzww, TEMP[6].xxxx 45: MOV TEMP[6].xyz, -TEMP[6].xyzx 46: ADD TEMP[7].x, IMM[0].zzzz, -CONST[9].xxxx 47: ADD TEMP[8].xyz, TEMP[3].xyzz, TEMP[6].xyzz 48: DP3 TEMP[9].x, TEMP[8].xyzz, TEMP[8].xyzz 49: RSQ TEMP[9].x, TEMP[9].xxxx 50: MUL TEMP[8].xyz, TEMP[8].xyzz, TEMP[9].xxxx 51: DP3 TEMP[6].x, TEMP[0].xyzz, TEMP[6].xyzz 52: MAX TEMP[6].x, IMM[0].wwww, TEMP[6].xxxx 53: DP3 TEMP[3].x, TEMP[3].xyzz, TEMP[8].xyzz 54: MAX TEMP[3].x, IMM[0].wwww, TEMP[3].xxxx 55: MUL TEMP[9].x, TEMP[7].xxxx, TEMP[7].xxxx 56: MUL TEMP[9].x, TEMP[9].xxxx, CONST[3].wwww 57: ADD TEMP[10].x, IMM[0].zzzz, -TEMP[7].xxxx 58: MAD TEMP[10].x, TEMP[10].xxxx, IMM[1].yyyy, IMM[1].zzzz 59: LG2 TEMP[10].x, TEMP[10].xxxx 60: RCP TEMP[10].x, TEMP[10].xxxx 61: MUL TEMP[10].x, IMM[1].xxxx, TEMP[10].xxxx 62: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[10].xxxx 63: ADD TEMP[11].x, IMM[0].zzzz, -TEMP[4].xxxx 64: ADD TEMP[12].x, IMM[0].zzzz, -TEMP[6].xxxx 65: MUL TEMP[13].x, IMM[0].xxxx, TEMP[3].xxxx 66: MUL TEMP[7].x, TEMP[3].xxxx, TEMP[7].xxxx 67: MAD TEMP[7].x, TEMP[13].xxxx, TEMP[7].xxxx, IMM[1].wwww 68: ADD TEMP[3].x, IMM[0].zzzz, -TEMP[3].xxxx 69: LRP TEMP[13].x, TEMP[4].xxxx, IMM[0].zzzz, TEMP[9].xxxx 70: LRP TEMP[6].x, TEMP[6].xxxx, IMM[0].zzzz, TEMP[9].xxxx 71: MAD TEMP[6].x, TEMP[13].xxxx, TEMP[6].xxxx, IMM[2].xxxx 72: RCP TEMP[6].x, TEMP[6].xxxx 73: DP3 TEMP[8].x, TEMP[0].xyzz, TEMP[8].xyzz 74: MAX TEMP[8].x, IMM[0].wwww, TEMP[8].xxxx 75: POW TEMP[8].x, TEMP[8].xxxx, TEMP[10].xxxx 76: ADD TEMP[9].x, TEMP[10].xxxx, IMM[0].zzzz 77: MUL TEMP[9].x, TEMP[9].xxxx, CONST[3].yyyy 78: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[9].xxxx 79: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[8].xxxx 80: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[4].xxxx 81: MUL TEMP[6].x, TEMP[6].xxxx, CONST[3].xxxx 82: MAX TEMP[6].x, IMM[0].wwww, TEMP[6].xxxx 83: MUL TEMP[6].xyz, TEMP[6].xxxx, TEMP[5].xyzz 84: ADD TEMP[8].xyz, IMM[0].zzzz, -TEMP[2].xyzz 85: MUL TEMP[9].x, TEMP[3].xxxx, TEMP[3].xxxx 86: MUL TEMP[10].x, TEMP[3].xxxx, TEMP[3].xxxx 87: MUL TEMP[3].x, TEMP[10].xxxx, TEMP[3].xxxx 88: MUL TEMP[3].x, TEMP[9].xxxx, TEMP[3].xxxx 89: MAD TEMP[2].xyz, TEMP[8].xyzz, TEMP[3].xxxx, TEMP[2].xyzz 90: MUL TEMP[3].x, CONST[8].xxxx, CONST[1].wwww 91: ADD TEMP[3].x, CONST[1].wwww, -TEMP[3].xxxx 92: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xxxx 93: ADD TEMP[3].x, TEMP[7].xxxx, IMM[0].yyyy 94: MUL TEMP[8].x, TEMP[11].xxxx, TEMP[11].xxxx 95: MUL TEMP[9].x, TEMP[11].xxxx, TEMP[11].xxxx 96: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[11].xxxx 97: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[9].xxxx 98: MAD TEMP[3].x, TEMP[3].xxxx, TEMP[8].xxxx, IMM[0].zzzz 99: ADD TEMP[7].x, TEMP[7].xxxx, IMM[0].yyyy 100: MUL TEMP[8].x, TEMP[12].xxxx, TEMP[12].xxxx 101: MUL TEMP[9].x, TEMP[12].xxxx, TEMP[12].xxxx 102: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[12].xxxx 103: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[9].xxxx 104: MAD TEMP[7].x, TEMP[7].xxxx, TEMP[8].xxxx, IMM[0].zzzz 105: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[7].xxxx 106: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[4].xxxx 107: MUL TEMP[3].xyz, TEMP[5].xyzz, TEMP[3].xxxx 108: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xyzz 109: MAD TEMP[0].xyz, TEMP[6].xyzz, TEMP[2].xyzz, TEMP[1].xyzz 110: MAD TEMP[1].x, IN[4].xxxx, CONST[0].zzzz, CONST[0].wwww 111: MOV_SAT TEMP[1].x, TEMP[1].xxxx 112: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xxxx 113: MOV TEMP[0].xyz, TEMP[0].xyzx 114: MOV TEMP[0].w, IMM[0].zzzz 115: MOV OUT[0], TEMP[0] 116: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %42 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %43 = load <32 x i8>, <32 x i8> addrspace(2)* %42, align 32, !tbaa !0 %44 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %45 = load <16 x i8>, <16 x i8> addrspace(2)* %44, align 16, !tbaa !0 %46 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %47 = bitcast <8 x i32> addrspace(2)* %46 to <32 x i8> addrspace(2)* %48 = load <32 x i8>, <32 x i8> addrspace(2)* %47, align 32, !tbaa !0 %49 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %50 = bitcast <4 x i32> addrspace(2)* %49 to <16 x i8> addrspace(2)* %51 = load <16 x i8>, <16 x i8> addrspace(2)* %50, align 16, !tbaa !0 %52 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %53 = bitcast <8 x i32> addrspace(2)* %52 to <32 x i8> addrspace(2)* %54 = load <32 x i8>, <32 x i8> addrspace(2)* %53, align 32, !tbaa !0 %55 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %56 = bitcast <4 x i32> addrspace(2)* %55 to <16 x i8> addrspace(2)* %57 = load <16 x i8>, <16 x i8> addrspace(2)* %56, align 16, !tbaa !0 %58 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %59 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %60 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %61 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %62 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %63 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %64 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %65 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %66 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %67 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %68 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %69 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %70 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %71 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7) %72 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %73 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %74 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %75 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7) %76 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7) %77 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %78 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7) %79 = bitcast float %58 to i32 %80 = bitcast float %59 to i32 %81 = insertelement <2 x i32> undef, i32 %79, i32 0 %82 = insertelement <2 x i32> %81, i32 %80, i32 1 %83 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %82, <32 x i8> %48, <16 x i8> %51, i32 2) %84 = extractelement <4 x float> %83, i32 1 %85 = extractelement <4 x float> %83, i32 3 %86 = fmul float %85, 2.000000e+00 %87 = fadd float %86, -1.000000e+00 %88 = fmul float %84, 2.000000e+00 %89 = fadd float %88, -1.000000e+00 %90 = fmul float %87, %39 %91 = fmul float %89, %39 %92 = fmul float %90, %90 %93 = fmul float %91, %91 %94 = fadd float %92, %93 %95 = call float @llvm.AMDIL.clamp.(float %94, float 0.000000e+00, float 1.000000e+00) %96 = fsub float 1.000000e+00, %95 %97 = call float @llvm.sqrt.f32(float %96) %98 = fmul float %90, %60 %99 = fmul float %91, %64 %100 = fadd float %99, %98 %101 = fmul float %97, %68 %102 = fadd float %100, %101 %103 = fmul float %90, %61 %104 = fmul float %91, %65 %105 = fadd float %104, %103 %106 = fmul float %97, %69 %107 = fadd float %105, %106 %108 = fmul float %90, %62 %109 = fmul float %91, %66 %110 = fadd float %109, %108 %111 = fmul float %97, %70 %112 = fadd float %110, %111 %113 = fmul float %102, %102 %114 = fmul float %107, %107 %115 = fadd float %114, %113 %116 = fmul float %112, %112 %117 = fadd float %115, %116 %118 = call float @llvm.AMDGPU.rsq.clamped.f32(float %117) %119 = fmul float %102, %118 %120 = fmul float %107, %118 %121 = fmul float %112, %118 %122 = bitcast float %58 to i32 %123 = bitcast float %59 to i32 %124 = insertelement <2 x i32> undef, i32 %122, i32 0 %125 = insertelement <2 x i32> %124, i32 %123, i32 1 %126 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %125, <32 x i8> %43, <16 x i8> %45, i32 2) %127 = extractelement <4 x float> %126, i32 0 %128 = extractelement <4 x float> %126, i32 1 %129 = extractelement <4 x float> %126, i32 2 %130 = fmul float %36, %127 %131 = fmul float %37, %128 %132 = fmul float %38, %129 %133 = call float @llvm.AMDGPU.lrp(float %40, float %130, float %26) %134 = call float @llvm.AMDGPU.lrp(float %40, float %131, float %27) %135 = call float @llvm.AMDGPU.lrp(float %40, float %132, float %28) %136 = fmul float %63, %63 %137 = fmul float %67, %67 %138 = fadd float %137, %136 %139 = fmul float %71, %71 %140 = fadd float %138, %139 %141 = call float @llvm.AMDGPU.rsq.clamped.f32(float %140) %142 = fmul float %63, %141 %143 = fmul float %67, %141 %144 = fmul float %71, %141 %145 = fmul float %119, %142 %146 = fmul float %120, %143 %147 = fadd float %146, %145 %148 = fmul float %121, %144 %149 = fadd float %147, %148 %150 = call float @llvm.maxnum.f32(float %149, float 0.000000e+00) %151 = fmul float %76, %76 %152 = fmul float %77, %77 %153 = fadd float %152, %151 %154 = fmul float %78, %78 %155 = fadd float %153, %154 %156 = bitcast float %155 to i32 %157 = bitcast float %155 to i32 %158 = insertelement <2 x i32> undef, i32 %156, i32 0 %159 = insertelement <2 x i32> %158, i32 %157, i32 1 %160 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %159, <32 x i8> %54, <16 x i8> %57, i32 2) %161 = extractelement <4 x float> %160, i32 3 %162 = fmul float %30, %161 %163 = fmul float %31, %161 %164 = fmul float %32, %161 %165 = fmul float %73, %73 %166 = fmul float %74, %74 %167 = fadd float %166, %165 %168 = fmul float %75, %75 %169 = fadd float %167, %168 %170 = call float @llvm.AMDGPU.rsq.clamped.f32(float %169) %171 = fmul float %73, %170 %172 = fmul float %74, %170 %173 = fmul float %75, %170 %174 = fsub float 1.000000e+00, %41 %175 = fsub float %142, %171 %176 = fsub float %143, %172 %177 = fsub float %144, %173 %178 = fmul float %175, %175 %179 = fmul float %176, %176 %180 = fadd float %179, %178 %181 = fmul float %177, %177 %182 = fadd float %180, %181 %183 = call float @llvm.AMDGPU.rsq.clamped.f32(float %182) %184 = fmul float %175, %183 %185 = fmul float %176, %183 %186 = fmul float %177, %183 %187 = fmul float %171, %119 %188 = fsub float -0.000000e+00, %187 %189 = fmul float %172, %120 %190 = fsub float %188, %189 %191 = fmul float %173, %121 %192 = fsub float %190, %191 %193 = call float @llvm.maxnum.f32(float %192, float 0.000000e+00) %194 = fmul float %142, %184 %195 = fmul float %143, %185 %196 = fadd float %195, %194 %197 = fmul float %144, %186 %198 = fadd float %196, %197 %199 = call float @llvm.maxnum.f32(float %198, float 0.000000e+00) %200 = fmul float %174, %174 %201 = fmul float %200, %35 %202 = fsub float 1.000000e+00, %174 %203 = fmul float %202, 0x3FEEF9DB20000000 %204 = fadd float %203, 0x3F9EB851E0000000 %205 = call float @llvm.log2.f32(float %204) %206 = fdiv float 1.000000e+00, %205 %207 = fmul float %206, 1.000000e+01 %208 = fmul float %207, %207 %209 = fsub float 1.000000e+00, %150 %210 = fsub float 1.000000e+00, %193 %211 = fmul float %199, 2.000000e+00 %212 = fmul float %199, %174 %213 = fmul float %211, %212 %214 = fadd float %213, 5.000000e-01 %215 = fsub float 1.000000e+00, %199 %216 = call float @llvm.AMDGPU.lrp(float %150, float 1.000000e+00, float %201) %217 = call float @llvm.AMDGPU.lrp(float %193, float 1.000000e+00, float %201) %218 = fmul float %216, %217 %219 = fadd float %218, 0x3F1A36E2E0000000 %220 = fdiv float 1.000000e+00, %219 %221 = fmul float %119, %184 %222 = fmul float %120, %185 %223 = fadd float %222, %221 %224 = fmul float %121, %186 %225 = fadd float %223, %224 %226 = call float @llvm.maxnum.f32(float %225, float 0.000000e+00) %227 = call float @llvm.pow.f32(float %226, float %208) %228 = fadd float %208, 1.000000e+00 %229 = fmul float %228, %34 %230 = fmul float %227, %229 %231 = fmul float %220, %230 %232 = fmul float %231, %150 %233 = fmul float %232, %33 %234 = call float @llvm.maxnum.f32(float %233, float 0.000000e+00) %235 = fmul float %234, %162 %236 = fmul float %234, %163 %237 = fmul float %234, %164 %238 = fsub float 1.000000e+00, %133 %239 = fsub float 1.000000e+00, %134 %240 = fsub float 1.000000e+00, %135 %241 = fmul float %215, %215 %242 = fmul float %215, %215 %243 = fmul float %242, %215 %244 = fmul float %241, %243 %245 = fmul float %238, %244 %246 = fadd float %245, %133 %247 = fmul float %239, %244 %248 = fadd float %247, %134 %249 = fmul float %240, %244 %250 = fadd float %249, %135 %251 = fmul float %40, %29 %252 = fsub float %29, %251 %253 = fmul float %130, %252 %254 = fmul float %131, %252 %255 = fmul float %132, %252 %256 = fadd float %214, -1.000000e+00 %257 = fmul float %209, %209 %258 = fmul float %209, %209 %259 = fmul float %258, %209 %260 = fmul float %257, %259 %261 = fmul float %256, %260 %262 = fadd float %261, 1.000000e+00 %263 = fadd float %214, -1.000000e+00 %264 = fmul float %210, %210 %265 = fmul float %210, %210 %266 = fmul float %265, %210 %267 = fmul float %264, %266 %268 = fmul float %263, %267 %269 = fadd float %268, 1.000000e+00 %270 = fmul float %262, %269 %271 = fmul float %270, %150 %272 = fmul float %162, %271 %273 = fmul float %163, %271 %274 = fmul float %164, %271 %275 = fmul float %253, %272 %276 = fmul float %254, %273 %277 = fmul float %255, %274 %278 = fmul float %235, %246 %279 = fadd float %278, %275 %280 = fmul float %236, %248 %281 = fadd float %280, %276 %282 = fmul float %237, %250 %283 = fadd float %282, %277 %284 = fmul float %72, %24 %285 = fadd float %284, %25 %286 = call float @llvm.AMDIL.clamp.(float %285, float 0.000000e+00, float 1.000000e+00) %287 = fmul float %279, %286 %288 = fmul float %281, %286 %289 = fmul float %283, %286 %290 = call i32 @llvm.SI.packf16(float %287, float %288) %291 = bitcast i32 %290 to float %292 = call i32 @llvm.SI.packf16(float %289, float 1.000000e+00) %293 = bitcast i32 %292 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %291, float %293, float %291, float %293) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400 v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401 v_interp_p1_f32 v5, v0, 1, 1, [m0] ; C8140500 v_interp_p2_f32 v5, [v5], v1, 1, 1, [m0] ; C8150501 v_interp_p1_f32 v6, v0, 2, 1, [m0] ; C8180600 v_interp_p2_f32 v6, [v6], v1, 2, 1, [m0] ; C8190601 v_interp_p1_f32 v7, v0, 3, 1, [m0] ; C81C0700 v_interp_p2_f32 v7, [v7], v1, 3, 1, [m0] ; C81D0701 v_interp_p1_f32 v8, v0, 0, 2, [m0] ; C8200800 v_interp_p2_f32 v8, [v8], v1, 0, 2, [m0] ; C8210801 v_interp_p1_f32 v9, v0, 1, 2, [m0] ; C8240900 v_interp_p2_f32 v9, [v9], v1, 1, 2, [m0] ; C8250901 v_interp_p1_f32 v10, v0, 2, 2, [m0] ; C8280A00 v_interp_p2_f32 v10, [v10], v1, 2, 2, [m0] ; C8290A01 v_interp_p1_f32 v11, v0, 3, 2, [m0] ; C82C0B00 v_interp_p2_f32 v11, [v11], v1, 3, 2, [m0] ; C82D0B01 v_interp_p1_f32 v12, v0, 0, 3, [m0] ; C8300C00 v_interp_p2_f32 v12, [v12], v1, 0, 3, [m0] ; C8310C01 v_interp_p1_f32 v13, v0, 1, 3, [m0] ; C8340D00 v_interp_p2_f32 v13, [v13], v1, 1, 3, [m0] ; C8350D01 v_interp_p1_f32 v14, v0, 2, 3, [m0] ; C8380E00 v_interp_p2_f32 v14, [v14], v1, 2, 3, [m0] ; C8390E01 v_interp_p1_f32 v15, v0, 3, 3, [m0] ; C83C0F00 v_interp_p2_f32 v15, [v15], v1, 3, 3, [m0] ; C83D0F01 v_interp_p1_f32 v16, v0, 0, 4, [m0] ; C8401000 v_interp_p2_f32 v16, [v16], v1, 0, 4, [m0] ; C8411001 v_interp_p1_f32 v17, v0, 1, 4, [m0] ; C8441100 v_interp_p2_f32 v17, [v17], v1, 1, 4, [m0] ; C8451101 v_interp_p1_f32 v18, v0, 2, 4, [m0] ; C8481200 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_interp_p2_f32 v18, [v18], v1, 2, 4, [m0] ; C8491201 v_interp_p1_f32 v19, v0, 3, 4, [m0] ; C84C1300 v_interp_p2_f32 v19, [v19], v1, 3, 4, [m0] ; C84D1301 v_interp_p1_f32 v20, v0, 0, 5, [m0] ; C8501400 v_interp_p2_f32 v20, [v20], v1, 0, 5, [m0] ; C8511401 v_interp_p1_f32 v21, v0, 1, 5, [m0] ; C8541500 v_interp_p2_f32 v21, [v21], v1, 1, 5, [m0] ; C8551501 v_interp_p1_f32 v0, v0, 2, 5, [m0] ; C8001600 v_interp_p2_f32 v0, [v0], v1, 2, 5, [m0] ; C8011601 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s32, s[0:3], 0x10 ; C2100110 s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504 s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708 s_buffer_load_dword s33, s[0:3], 0x11 ; C2108111 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700 s_buffer_load_dword s34, s[0:3], 0x12 ; C2110112 v_mul_f32_e32 v22, v20, v20 ; 102C2914 s_load_dwordx4 s[36:39], s[4:5], 0x8 ; C0920508 s_load_dwordx8 s[40:47], s[6:7], 0x10 ; C0D40710 v_mac_f32_e32 v22, v21, v21 ; 3E2C2B15 v_mac_f32_e32 v22, v0, v0 ; 3E2C0100 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:1], 10, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[24:31], s[12:15] ; F0800A00 00660002 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_buffer_load_dword s5, s[0:3], 0x3 ; C2028103 image_sample v[23:25], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[16:23], s[8:11] ; F0800700 00441702 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v2, s32, v23 ; 10042E20 v_mul_f32_e32 v3, s33, v24 ; 10063021 v_mul_f32_e32 v20, s34, v25 ; 10283222 v_mov_b32_e32 v23, v22 ; 7E2E0316 image_sample v21, 8, 0, 0, 0, 0, 0, 0, 0, v[22:23], s[40:47], s[36:39] ; F0800800 012A1516 v_mad_f32 v1, 2.0, v1, -1.0 ; D2820001 03CE02F4 v_mad_f32 v0, 2.0, v0, -1.0 ; D2820000 03CE00F4 s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104 s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105 s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106 v_mov_b32_e32 v22, s5 ; 7E2C0205 v_mac_f32_e32 v22, s4, v16 ; 3E2C2004 s_buffer_load_dword s4, s[0:3], 0x1c ; C202011C s_buffer_load_dword s5, s[0:3], 0xd ; C202810D s_buffer_load_dword s9, s[0:3], 0xf ; C204810F s_buffer_load_dword s10, s[0:3], 0x7 ; C2050107 s_buffer_load_dword s11, s[0:3], 0x8 ; C2058108 s_buffer_load_dword s12, s[0:3], 0x9 ; C2060109 s_buffer_load_dword s13, s[0:3], 0xa ; C206810A s_buffer_load_dword s14, s[0:3], 0xc ; C207010C s_buffer_load_dword s15, s[0:3], 0x20 ; C2078120 s_buffer_load_dword s0, s[0:3], 0x24 ; C2000124 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v1, s4, v1 ; 10020204 v_mul_f32_e32 v0, s4, v0 ; 10000004 v_mul_f32_e32 v4, v4, v1 ; 10080304 v_mac_f32_e32 v4, v8, v0 ; 3E080108 v_mul_f32_e32 v5, v5, v1 ; 100A0305 v_mac_f32_e32 v5, v9, v0 ; 3E0A0109 v_mul_f32_e32 v6, v6, v1 ; 100C0306 v_mac_f32_e32 v6, v10, v0 ; 3E0C010A v_mul_f32_e32 v0, v0, v0 ; 10000100 v_mac_f32_e32 v0, v1, v1 ; 3E000301 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_sub_f32_e32 v0, 1.0, v0 ; 080000F2 v_sqrt_f32_e32 v0, v0 ; 7E006700 v_mac_f32_e32 v4, v12, v0 ; 3E08010C v_mac_f32_e32 v5, v13, v0 ; 3E0A010D v_mac_f32_e32 v6, v14, v0 ; 3E0C010E v_mul_f32_e32 v0, v4, v4 ; 10000904 v_mac_f32_e32 v0, v5, v5 ; 3E000B05 v_mac_f32_e32 v0, v6, v6 ; 3E000D06 v_rsq_clamp_f32_e32 v0, v0 ; 7E005900 v_mul_f32_e32 v1, v17, v17 ; 10022311 v_mac_f32_e32 v1, v18, v18 ; 3E022512 v_mac_f32_e32 v1, v19, v19 ; 3E022713 v_rsq_clamp_f32_e32 v1, v1 ; 7E025901 v_mul_f32_e32 v8, v7, v7 ; 10100F07 v_mac_f32_e32 v8, v11, v11 ; 3E10170B v_mac_f32_e32 v8, v15, v15 ; 3E101F0F v_rsq_clamp_f32_e32 v8, v8 ; 7E105908 v_mul_f32_e32 v9, v1, v17 ; 10122301 v_mul_f32_e32 v10, v1, v18 ; 10142501 v_mul_f32_e32 v1, v1, v19 ; 10022701 v_mul_f32_e32 v4, v0, v4 ; 10080900 v_mul_f32_e32 v5, v0, v5 ; 100A0B00 v_mul_f32_e32 v12, v8, v7 ; 10180F08 v_mad_f32 v7, v7, v8, -v9 ; D2820007 84261107 v_mul_f32_e32 v9, v4, v9 ; 10121304 v_mad_f32 v9, -v10, v5, -v9 ; D2820009 A4260B0A v_mad_f32 v10, v11, v8, -v10 ; D282000A 842A110B v_mul_f32_e32 v13, v7, v7 ; 101A0F07 v_mac_f32_e32 v13, v10, v10 ; 3E1A150A v_mad_f32 v14, v15, v8, -v1 ; D282000E 8406110F v_mac_f32_e32 v13, v14, v14 ; 3E1A1D0E v_rsq_clamp_f32_e32 v13, v13 ; 7E1A590D v_mul_f32_e32 v11, v8, v11 ; 10161708 v_mul_f32_e32 v16, v12, v4 ; 1020090C v_mac_f32_e32 v16, v11, v5 ; 3E200B0B v_mul_f32_e32 v7, v13, v7 ; 100E0F0D v_mul_f32_e32 v10, v13, v10 ; 1014150D v_mul_f32_e32 v12, v7, v12 ; 10181907 v_mac_f32_e32 v12, v10, v11 ; 3E18170A v_mul_f32_e32 v4, v7, v4 ; 10080907 v_mac_f32_e32 v4, v10, v5 ; 3E080B0A v_mul_f32_e32 v0, v0, v6 ; 10000D00 v_mul_f32_e32 v5, v8, v15 ; 100A1F08 v_mul_f32_e32 v6, v13, v14 ; 100C1D0D v_mad_f32 v1, -v1, v0, v9 ; D2820001 24260101 v_mac_f32_e32 v16, v5, v0 ; 3E200105 v_mac_f32_e32 v12, v6, v5 ; 3E180B06 v_mac_f32_e32 v4, v6, v0 ; 3E080106 v_sub_f32_e64 v0, 1.0, s15 ; D2080000 00001EF2 v_mul_f32_e32 v5, s6, v0 ; 100A0006 v_mad_f32 v6, -v0, s6, 1.0 ; D2820006 23C80D00 v_mul_f32_e32 v7, s7, v0 ; 100E0007 v_mad_f32 v8, -v0, s7, 1.0 ; D2820008 23C80F00 v_mul_f32_e32 v9, s8, v0 ; 10120008 v_mad_f32 v0, -v0, s8, 1.0 ; D2820000 23C81100 v_sub_f32_e64 v10, 1.0, s0 ; D208000A 000000F2 v_sub_f32_e32 v11, 1.0, v10 ; 081614F2 v_mov_b32_e32 v13, 0x3cf5c28f ; 7E1A02FF 3CF5C28F v_madmk_f32_e32 v11, v11, v13, 0x3f77ced9 ; 40161B0B 3F77CED9 v_max_f32_e32 v12, 0, v12 ; 20181880 v_sub_f32_e32 v13, 1.0, v12 ; 081A18F2 v_mul_f32_e32 v14, v13, v13 ; 101C1B0D v_mul_f32_e32 v13, v13, v14 ; 101A1D0D v_mul_f32_e32 v13, v13, v14 ; 101A1D0D v_mac_f32_e32 v5, s15, v2 ; 3E0A040F v_mad_f32 v6, -s15, v2, v6 ; D2820006 241A040F v_mac_f32_e32 v5, v13, v6 ; 3E0A0D0D v_mac_f32_e32 v7, s15, v3 ; 3E0E060F v_mad_f32 v6, -s15, v3, v8 ; D2820006 2422060F v_mac_f32_e32 v7, v13, v6 ; 3E0E0D0D v_mac_f32_e32 v9, s15, v20 ; 3E12280F v_log_f32_e32 v6, v11 ; 7E0C4F0B v_mad_f32 v0, -s15, v20, v0 ; D2820000 2402280F v_mac_f32_e32 v9, v13, v0 ; 3E12010D v_mul_f32_e32 v0, s11, v21 ; 10002A0B v_mul_f32_e32 v8, s12, v21 ; 10102A0C v_rcp_f32_e32 v6, v6 ; 7E0C5506 v_mul_f32_e32 v11, s13, v21 ; 10162A0D v_mul_f32_e32 v13, v10, v10 ; 101A150A v_mul_f32_e32 v13, s9, v13 ; 101A1A09 v_mul_f32_e32 v6, 0x41200000, v6 ; 100C0CFF 41200000 v_mad_f32 v14, v6, v6, 1.0 ; D282000E 03CA0D06 v_mul_f32_e32 v14, s5, v14 ; 101C1C05 v_max_f32_e32 v1, 0, v1 ; 20020280 v_sub_f32_e32 v15, 1.0, v1 ; 081E02F2 v_mul_f32_e32 v17, v13, v15 ; 10221F0D v_mac_f32_e32 v17, 1.0, v1 ; 3E2202F2 v_max_f32_e32 v1, 0, v16 ; 20022080 v_sub_f32_e32 v16, 1.0, v1 ; 082002F2 v_mul_f32_e32 v13, v13, v16 ; 101A210D v_mac_f32_e32 v13, 1.0, v1 ; 3E1A02F2 v_max_f32_e32 v4, 0, v4 ; 20080880 v_log_f32_e32 v4, v4 ; 7E084F04 v_madak_f32_e32 v13, v13, v17, 0x38d1b717 ; 421A230D 38D1B717 v_mul_f32_e32 v6, v6, v6 ; 100C0D06 v_rcp_f32_e32 v13, v13 ; 7E1A550D v_mul_legacy_f32_e32 v4, v6, v4 ; 0E080906 v_exp_f32_e32 v4, v4 ; 7E084B04 v_mul_f32_e32 v4, v14, v4 ; 1008090E v_mul_f32_e32 v4, v4, v13 ; 10081B04 v_mul_f32_e32 v4, v1, v4 ; 10080901 v_mul_f32_e32 v4, s14, v4 ; 1008080E v_mov_b32_e32 v6, s15 ; 7E0C020F v_mad_f32 v6, -v6, s10, s10 ; D2820006 20281506 v_mul_f32_e32 v10, v10, v12 ; 1014190A v_add_f32_e32 v12, v12, v12 ; 0618190C v_mad_f32 v10, v12, v10, 0.5 ; D282000A 03C2150C v_mul_f32_e32 v12, v16, v16 ; 10182110 v_mul_f32_e32 v13, v16, v12 ; 101A1910 v_mul_f32_e32 v12, v13, v12 ; 1018190D v_mul_f32_e32 v13, v15, v15 ; 101A1F0F v_mul_f32_e32 v14, v15, v13 ; 101C1B0F v_mul_f32_e32 v13, v14, v13 ; 101A1B0E v_add_f32_e32 v10, -1.0, v10 ; 061414F3 v_mad_f32 v12, v10, v12, 1.0 ; D282000C 03CA190A v_mad_f32 v10, v10, v13, 1.0 ; D282000A 03CA1B0A v_mul_f32_e32 v10, v10, v12 ; 1014190A v_mul_f32_e32 v2, v6, v2 ; 10040506 v_mul_f32_e32 v1, v1, v10 ; 10021501 v_mul_f32_e32 v10, v1, v0 ; 10140101 v_mul_f32_e32 v2, v10, v2 ; 1004050A v_max_f32_e32 v4, 0, v4 ; 20080880 v_mul_f32_e32 v0, v0, v4 ; 10000900 v_mac_f32_e32 v2, v5, v0 ; 3E040105 v_mul_f32_e32 v0, v6, v3 ; 10000706 v_mul_f32_e32 v3, v1, v8 ; 10061101 v_mul_f32_e32 v0, v3, v0 ; 10000103 v_mul_f32_e32 v3, v8, v4 ; 10060908 v_mac_f32_e32 v0, v7, v3 ; 3E000707 v_mul_f32_e32 v3, v6, v20 ; 10062906 v_mul_f32_e32 v4, v11, v4 ; 1008090B v_mul_f32_e32 v1, v1, v11 ; 10021701 v_mul_f32_e32 v1, v1, v3 ; 10020701 v_mac_f32_e32 v1, v9, v4 ; 3E020909 v_add_f32_e64 v3, 0, v22 clamp ; D2060803 00022C80 v_mul_f32_e32 v2, v3, v2 ; 10040503 v_mul_f32_e32 v0, v3, v0 ; 10000103 v_mul_f32_e32 v1, v3, v1 ; 10020303 v_cvt_pkrtz_f16_f32_e32 v0, v2, v0 ; 5E000102 v_cvt_pkrtz_f16_f32_e64 v1, v1, 1.0 ; D25E0001 0001E501 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 48 VGPRS: 28 Code Size: 1084 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL OUT[5], GENERIC[4] DCL OUT[6], GENERIC[5] DCL CONST[0..20] DCL TEMP[0..7], LOCAL IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MUL TEMP[0], CONST[2], IN[0].xxxx 1: MAD TEMP[0], CONST[3], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[4], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0].xyz, CONST[5], IN[0].wwww, TEMP[0] 4: MUL TEMP[1], CONST[17], IN[0].xxxx 5: MAD TEMP[1], CONST[18], IN[0].yyyy, TEMP[1] 6: MAD TEMP[1], CONST[19], IN[0].zzzz, TEMP[1] 7: MAD TEMP[1], CONST[20], IN[0].wwww, TEMP[1] 8: MAD TEMP[2].xy, IN[2].xyyy, CONST[10].xyyy, CONST[10].zwww 9: FSEQ TEMP[3].x, CONST[12].xxxx, IMM[0].xxxx 10: UIF TEMP[3].xxxx :0 11: MOV TEMP[3].xy, IN[2].xyxx 12: ELSE :0 13: MOV TEMP[3].xy, IN[3].xyxx 14: ENDIF 15: MAD TEMP[3].xy, TEMP[3].xyyy, CONST[11].xyyy, CONST[11].zwww 16: MOV TEMP[2].zw, TEMP[3].yyxy 17: MOV TEMP[3].x, CONST[6].xxxx 18: MOV TEMP[3].y, CONST[7].xxxx 19: MOV TEMP[3].z, CONST[8].xxxx 20: MOV TEMP[4].x, CONST[6].yyyy 21: MOV TEMP[4].y, CONST[7].yyyy 22: MOV TEMP[4].z, CONST[8].yyyy 23: MOV TEMP[5].x, CONST[6].zzzz 24: MOV TEMP[5].y, CONST[7].zzzz 25: MOV TEMP[5].z, CONST[8].zzzz 26: MUL TEMP[3].xyz, TEMP[3].xyzz, IN[1].xxxx 27: MAD TEMP[3].xyz, TEMP[4].xyzz, IN[1].yyyy, TEMP[3].xyzz 28: MAD TEMP[3].xyz, TEMP[5].xyzz, IN[1].zzzz, TEMP[3].xyzz 29: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz 30: RSQ TEMP[4].x, TEMP[4].xxxx 31: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx 32: MUL TEMP[4].xyz, CONST[2].xyzz, IN[4].xxxx 33: MAD TEMP[4].xyz, CONST[3].xyzz, IN[4].yyyy, TEMP[4].xyzz 34: MAD TEMP[4].xyz, CONST[4].xyzz, IN[4].zzzz, TEMP[4].xyzz 35: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz 36: RSQ TEMP[5].x, TEMP[5].xxxx 37: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx 38: MUL TEMP[5].xyz, TEMP[3].zxyy, TEMP[4].yzxx 39: MAD TEMP[5].xyz, TEMP[3].yzxx, TEMP[4].zxyy, -TEMP[5].xyzz 40: MUL TEMP[5].xyz, TEMP[5].xyzz, IN[4].wwww 41: MOV TEMP[4].xyz, TEMP[4].xyzx 42: MOV TEMP[5].xyz, TEMP[5].xyzx 43: MOV TEMP[3].xyz, TEMP[3].xyzx 44: MUL TEMP[6].xyz, TEMP[0].xyzz, CONST[1].wwww 45: ADD TEMP[6].xyz, CONST[1].xyzz, -TEMP[6].xyzz 46: MOV TEMP[4].w, TEMP[6].xxxx 47: MOV TEMP[5].w, TEMP[6].yyyy 48: MOV TEMP[3].w, TEMP[6].zzzz 49: MUL TEMP[6], CONST[2], IN[0].xxxx 50: MAD TEMP[6], CONST[3], IN[0].yyyy, TEMP[6] 51: MAD TEMP[6], CONST[4], IN[0].zzzz, TEMP[6] 52: MAD TEMP[6], CONST[5], IN[0].wwww, TEMP[6] 53: ADD TEMP[0].xyz, TEMP[0].xyzz, -CONST[0].xyzz 54: MOV TEMP[0].yzw, TEMP[0].yxyz 55: MUL TEMP[7], CONST[13], TEMP[6].xxxx 56: MAD TEMP[7], CONST[14], TEMP[6].yyyy, TEMP[7] 57: MAD TEMP[7], CONST[15], TEMP[6].zzzz, TEMP[7] 58: MAD TEMP[6].xyz, CONST[16], TEMP[6].wwww, TEMP[7] 59: MOV TEMP[6].xyz, TEMP[6].xyzx 60: MOV TEMP[0].x, TEMP[1].zzzz 61: MOV OUT[1], TEMP[2] 62: MOV OUT[3], TEMP[5] 63: MOV OUT[2], TEMP[4] 64: MOV OUT[4], TEMP[3] 65: MOV OUT[0], TEMP[1] 66: MOV OUT[6], TEMP[6] 67: MOV OUT[5], TEMP[0] 68: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 172) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248) %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256) %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260) %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264) %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272) %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276) %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280) %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284) %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288) %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292) %72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296) %73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300) %74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304) %75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308) %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312) %77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316) %78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320) %79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 324) %80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 328) %81 = call float @llvm.SI.load.const(<16 x i8> %12, i32 332) %82 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %83 = load <16 x i8>, <16 x i8> addrspace(2)* %82, align 16, !tbaa !0 %84 = add i32 %5, %7 %85 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %83, i32 0, i32 %84) %86 = extractelement <4 x float> %85, i32 0 %87 = extractelement <4 x float> %85, i32 1 %88 = extractelement <4 x float> %85, i32 2 %89 = extractelement <4 x float> %85, i32 3 %90 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %91 = load <16 x i8>, <16 x i8> addrspace(2)* %90, align 16, !tbaa !0 %92 = add i32 %5, %7 %93 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %91, i32 0, i32 %92) %94 = extractelement <4 x float> %93, i32 0 %95 = extractelement <4 x float> %93, i32 1 %96 = extractelement <4 x float> %93, i32 2 %97 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %98 = load <16 x i8>, <16 x i8> addrspace(2)* %97, align 16, !tbaa !0 %99 = add i32 %5, %7 %100 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %98, i32 0, i32 %99) %101 = extractelement <4 x float> %100, i32 0 %102 = extractelement <4 x float> %100, i32 1 %103 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %104 = load <16 x i8>, <16 x i8> addrspace(2)* %103, align 16, !tbaa !0 %105 = add i32 %5, %7 %106 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %104, i32 0, i32 %105) %107 = extractelement <4 x float> %106, i32 0 %108 = extractelement <4 x float> %106, i32 1 %109 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4 %110 = load <16 x i8>, <16 x i8> addrspace(2)* %109, align 16, !tbaa !0 %111 = add i32 %5, %7 %112 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %110, i32 0, i32 %111) %113 = extractelement <4 x float> %112, i32 0 %114 = extractelement <4 x float> %112, i32 1 %115 = extractelement <4 x float> %112, i32 2 %116 = extractelement <4 x float> %112, i32 3 %117 = fmul float %20, %86 %118 = fmul float %21, %86 %119 = fmul float %22, %86 %120 = fmul float %24, %87 %121 = fadd float %120, %117 %122 = fmul float %25, %87 %123 = fadd float %122, %118 %124 = fmul float %26, %87 %125 = fadd float %124, %119 %126 = fmul float %28, %88 %127 = fadd float %126, %121 %128 = fmul float %29, %88 %129 = fadd float %128, %123 %130 = fmul float %30, %88 %131 = fadd float %130, %125 %132 = fmul float %32, %89 %133 = fadd float %132, %127 %134 = fmul float %33, %89 %135 = fadd float %134, %129 %136 = fmul float %34, %89 %137 = fadd float %136, %131 %138 = fmul float %66, %86 %139 = fmul float %67, %86 %140 = fmul float %68, %86 %141 = fmul float %69, %86 %142 = fmul float %70, %87 %143 = fadd float %142, %138 %144 = fmul float %71, %87 %145 = fadd float %144, %139 %146 = fmul float %72, %87 %147 = fadd float %146, %140 %148 = fmul float %73, %87 %149 = fadd float %148, %141 %150 = fmul float %74, %88 %151 = fadd float %150, %143 %152 = fmul float %75, %88 %153 = fadd float %152, %145 %154 = fmul float %76, %88 %155 = fadd float %154, %147 %156 = fmul float %77, %88 %157 = fadd float %156, %149 %158 = fmul float %78, %89 %159 = fadd float %158, %151 %160 = fmul float %79, %89 %161 = fadd float %160, %153 %162 = fmul float %80, %89 %163 = fadd float %162, %155 %164 = fmul float %81, %89 %165 = fadd float %164, %157 %166 = fmul float %101, %45 %167 = fadd float %166, %47 %168 = fmul float %102, %46 %169 = fadd float %168, %48 %170 = fcmp oeq float %53, 0.000000e+00 %. = select i1 %170, float %101, float %107 %.32 = select i1 %170, float %102, float %108 %171 = fmul float %., %49 %172 = fadd float %171, %51 %173 = fmul float %.32, %50 %174 = fadd float %173, %52 %175 = fmul float %36, %94 %176 = fmul float %39, %94 %177 = fmul float %42, %94 %178 = fmul float %37, %95 %179 = fadd float %178, %175 %180 = fmul float %40, %95 %181 = fadd float %180, %176 %182 = fmul float %43, %95 %183 = fadd float %182, %177 %184 = fmul float %38, %96 %185 = fadd float %184, %179 %186 = fmul float %41, %96 %187 = fadd float %186, %181 %188 = fmul float %44, %96 %189 = fadd float %188, %183 %190 = fmul float %185, %185 %191 = fmul float %187, %187 %192 = fadd float %191, %190 %193 = fmul float %189, %189 %194 = fadd float %192, %193 %195 = call float @llvm.AMDGPU.rsq.clamped.f32(float %194) %196 = fmul float %185, %195 %197 = fmul float %187, %195 %198 = fmul float %189, %195 %199 = fmul float %20, %113 %200 = fmul float %21, %113 %201 = fmul float %22, %113 %202 = fmul float %24, %114 %203 = fadd float %202, %199 %204 = fmul float %25, %114 %205 = fadd float %204, %200 %206 = fmul float %26, %114 %207 = fadd float %206, %201 %208 = fmul float %28, %115 %209 = fadd float %208, %203 %210 = fmul float %29, %115 %211 = fadd float %210, %205 %212 = fmul float %30, %115 %213 = fadd float %212, %207 %214 = fmul float %209, %209 %215 = fmul float %211, %211 %216 = fadd float %215, %214 %217 = fmul float %213, %213 %218 = fadd float %216, %217 %219 = call float @llvm.AMDGPU.rsq.clamped.f32(float %218) %220 = fmul float %209, %219 %221 = fmul float %211, %219 %222 = fmul float %213, %219 %223 = fmul float %198, %221 %224 = fmul float %196, %222 %225 = fmul float %197, %220 %226 = fmul float %197, %222 %227 = fsub float %226, %223 %228 = fmul float %198, %220 %229 = fsub float %228, %224 %230 = fmul float %196, %221 %231 = fsub float %230, %225 %232 = fmul float %227, %116 %233 = fmul float %229, %116 %234 = fmul float %231, %116 %235 = fmul float %133, %19 %236 = fmul float %135, %19 %237 = fmul float %137, %19 %238 = fsub float %16, %235 %239 = fsub float %17, %236 %240 = fsub float %18, %237 %241 = fmul float %20, %86 %242 = fmul float %21, %86 %243 = fmul float %22, %86 %244 = fmul float %23, %86 %245 = fmul float %24, %87 %246 = fadd float %245, %241 %247 = fmul float %25, %87 %248 = fadd float %247, %242 %249 = fmul float %26, %87 %250 = fadd float %249, %243 %251 = fmul float %27, %87 %252 = fadd float %251, %244 %253 = fmul float %28, %88 %254 = fadd float %253, %246 %255 = fmul float %29, %88 %256 = fadd float %255, %248 %257 = fmul float %30, %88 %258 = fadd float %257, %250 %259 = fmul float %31, %88 %260 = fadd float %259, %252 %261 = fmul float %32, %89 %262 = fadd float %261, %254 %263 = fmul float %33, %89 %264 = fadd float %263, %256 %265 = fmul float %34, %89 %266 = fadd float %265, %258 %267 = fmul float %35, %89 %268 = fadd float %267, %260 %269 = fsub float %133, %13 %270 = fsub float %135, %14 %271 = fsub float %137, %15 %272 = fmul float %54, %262 %273 = fmul float %55, %262 %274 = fmul float %56, %262 %275 = fmul float %57, %264 %276 = fadd float %275, %272 %277 = fmul float %58, %264 %278 = fadd float %277, %273 %279 = fmul float %59, %264 %280 = fadd float %279, %274 %281 = fmul float %60, %266 %282 = fadd float %281, %276 %283 = fmul float %61, %266 %284 = fadd float %283, %278 %285 = fmul float %62, %266 %286 = fadd float %285, %280 %287 = fmul float %63, %268 %288 = fadd float %287, %282 %289 = fmul float %64, %268 %290 = fadd float %289, %284 %291 = fmul float %65, %268 %292 = fadd float %291, %286 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %167, float %169, float %172, float %174) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %220, float %221, float %222, float %238) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %232, float %233, float %234, float %239) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %196, float %197, float %198, float %240) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %163, float %269, float %270, float %271) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %288, float %290, float %292, float %268) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %159, float %161, float %163, float %165) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 s_load_dwordx4 s[20:23], s[8:9], 0xc ; C08A090C v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[8:11], s[8:9], 0x10 ; C0840910 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[8:11], v0, s[16:19], 0 idxen ; E00C2000 80040800 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[10:13], v0, s[20:23], 0 idxen ; E00C2000 80050A00 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[12:15], v0, s[8:11], 0 idxen ; E00C2000 80020C00 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x30 ; C2020130 s_buffer_load_dword s5, s[0:3], 0x2a ; C202812A s_buffer_load_dword s6, s[0:3], 0x28 ; C2030128 s_buffer_load_dword s7, s[0:3], 0x34 ; C2038134 s_buffer_load_dword s8, s[0:3], 0x35 ; C2040135 s_buffer_load_dword s9, s[0:3], 0x36 ; C2048136 s_buffer_load_dword s10, s[0:3], 0x38 ; C2050138 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_eq_f32_e64 vcc, 0, s4 ; D004006A 00000880 v_cndmask_b32_e32 v0, v10, v8 ; 0000110A v_cndmask_b32_e32 v10, v11, v9 ; 0014130B v_mov_b32_e32 v11, s5 ; 7E160205 s_buffer_load_dword s4, s[0:3], 0x29 ; C2020129 s_buffer_load_dword s5, s[0:3], 0x2b ; C202812B v_mac_f32_e32 v11, s6, v8 ; 3E161006 s_buffer_load_dword s6, s[0:3], 0x18 ; C2030118 s_buffer_load_dword s11, s[0:3], 0x19 ; C2058119 s_buffer_load_dword s12, s[0:3], 0x1c ; C206011C s_buffer_load_dword s13, s[0:3], 0x1d ; C206811D s_buffer_load_dword s14, s[0:3], 0x20 ; C2070120 s_buffer_load_dword s15, s[0:3], 0x2c ; C207812C s_buffer_load_dword s16, s[0:3], 0x2d ; C208012D s_buffer_load_dword s17, s[0:3], 0x2e ; C208812E s_buffer_load_dword s18, s[0:3], 0x2f ; C209012F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v8, s5 ; 7E100205 s_buffer_load_dword s5, s[0:3], 0x21 ; C2028121 v_mac_f32_e32 v8, s4, v9 ; 3E101204 v_mul_f32_e32 v9, s6, v5 ; 10120A06 v_mac_f32_e32 v9, s11, v6 ; 3E120C0B s_buffer_load_dword s4, s[0:3], 0x1a ; C202011A s_buffer_load_dword s6, s[0:3], 0x1e ; C203011E v_mul_f32_e32 v16, s12, v5 ; 10200A0C v_mac_f32_e32 v16, s13, v6 ; 3E200C0D v_mul_f32_e32 v5, s14, v5 ; 100A0A0E s_buffer_load_dword s11, s[0:3], 0x22 ; C2058122 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v5, s5, v6 ; 3E0A0C05 s_buffer_load_dword s5, s[0:3], 0x44 ; C2028144 s_buffer_load_dword s12, s[0:3], 0x48 ; C2060148 s_buffer_load_dword s13, s[0:3], 0x45 ; C2068145 s_buffer_load_dword s14, s[0:3], 0x49 ; C2070149 v_mac_f32_e32 v9, s4, v7 ; 3E120E04 s_buffer_load_dword s4, s[0:3], 0x46 ; C2020146 s_buffer_load_dword s19, s[0:3], 0x4a ; C209814A s_buffer_load_dword s20, s[0:3], 0x47 ; C20A0147 v_mac_f32_e32 v16, s6, v7 ; 3E200E06 v_mac_f32_e32 v5, s11, v7 ; 3E0A0E0B s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s5, v1 ; 100C0205 v_mac_f32_e32 v6, s12, v2 ; 3E0C040C v_mul_f32_e32 v7, s13, v1 ; 100E020D v_mac_f32_e32 v7, s14, v2 ; 3E0E040E s_buffer_load_dword s5, s[0:3], 0x4b ; C202814B v_mul_f32_e32 v17, s4, v1 ; 10220204 v_mac_f32_e32 v17, s19, v2 ; 3E220413 v_mul_f32_e32 v18, s20, v1 ; 10240214 s_buffer_load_dword s4, s[0:3], 0xb ; C202010B s_buffer_load_dword s6, s[0:3], 0xf ; C203010F s_buffer_load_dword s11, s[0:3], 0x4c ; C205814C s_buffer_load_dword s12, s[0:3], 0x4d ; C206014D s_buffer_load_dword s13, s[0:3], 0x4e ; C206814E s_buffer_load_dword s14, s[0:3], 0x4f ; C207014F s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v18, s5, v2 ; 3E240405 s_buffer_load_dword s5, s[0:3], 0xc ; C202810C s_buffer_load_dword s19, s[0:3], 0xd ; C209810D s_buffer_load_dword s20, s[0:3], 0xe ; C20A010E v_mul_f32_e32 v19, s4, v1 ; 10260204 s_buffer_load_dword s4, s[0:3], 0x13 ; C2020113 v_mac_f32_e32 v19, s6, v2 ; 3E260406 v_mac_f32_e32 v6, s11, v3 ; 3E0C060B s_buffer_load_dword s6, s[0:3], 0x8 ; C2030108 v_mac_f32_e32 v7, s12, v3 ; 3E0E060C v_mac_f32_e32 v17, s13, v3 ; 3E22060D s_buffer_load_dword s11, s[0:3], 0x10 ; C2058110 s_buffer_load_dword s12, s[0:3], 0x14 ; C2060114 v_mac_f32_e32 v18, s14, v3 ; 3E24060E s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v19, s4, v3 ; 3E260604 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A v_mul_f32_e32 v20, s6, v1 ; 10280206 v_mac_f32_e32 v20, s5, v2 ; 3E280405 s_buffer_load_dword s21, s[0:3], 0x12 ; C20A8112 v_mac_f32_e32 v20, s11, v3 ; 3E28060B v_mac_f32_e32 v20, s12, v4 ; 3E28080C s_buffer_load_dword s12, s[0:3], 0x15 ; C2060115 v_mul_f32_e32 v21, s13, v1 ; 102A020D v_mac_f32_e32 v21, s19, v2 ; 3E2A0413 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v21, s4, v3 ; 3E2A0604 s_buffer_load_dword s22, s[0:3], 0x16 ; C20B0116 s_buffer_load_dword s23, s[0:3], 0x50 ; C20B8150 s_buffer_load_dword s24, s[0:3], 0x51 ; C20C0151 s_buffer_load_dword s25, s[0:3], 0x52 ; C20C8152 s_buffer_load_dword s26, s[0:3], 0x53 ; C20D0153 s_buffer_load_dword s27, s[0:3], 0x17 ; C20D8117 v_mac_f32_e32 v21, s12, v4 ; 3E2A080C v_mul_f32_e32 v1, s14, v1 ; 1002020E v_mac_f32_e32 v1, s20, v2 ; 3E020414 v_mac_f32_e32 v1, s21, v3 ; 3E020615 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v1, s22, v4 ; 3E020816 v_mac_f32_e32 v6, s23, v4 ; 3E0C0817 v_mac_f32_e32 v7, s24, v4 ; 3E0E0818 v_mac_f32_e32 v17, s25, v4 ; 3E220819 v_mac_f32_e32 v18, s26, v4 ; 3E24081A v_mac_f32_e32 v19, s27, v4 ; 3E26081B v_mov_b32_e32 v2, s17 ; 7E040211 v_mac_f32_e32 v2, s15, v0 ; 3E04000F v_mov_b32_e32 v0, s18 ; 7E000212 v_mac_f32_e32 v0, s16, v10 ; 3E001410 exp 15, 32, 0, 0, 0, v11, v8, v2, v0 ; F800020F 0002080B s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s6, v12 ; 10001806 v_mac_f32_e32 v0, s5, v13 ; 3E001A05 v_mul_f32_e32 v2, s13, v12 ; 1004180D v_mac_f32_e32 v2, s19, v13 ; 3E041A13 v_mul_f32_e32 v3, s14, v12 ; 1006180E v_mac_f32_e32 v3, s20, v13 ; 3E061A14 v_mac_f32_e32 v0, s11, v14 ; 3E001C0B v_mac_f32_e32 v2, s4, v14 ; 3E041C04 v_mac_f32_e32 v3, s21, v14 ; 3E061C15 v_mul_f32_e32 v4, v9, v9 ; 10081309 v_mac_f32_e32 v4, v16, v16 ; 3E082110 v_mul_f32_e32 v8, v0, v0 ; 10100100 v_mac_f32_e32 v8, v2, v2 ; 3E100502 v_mac_f32_e32 v4, v5, v5 ; 3E080B05 v_rsq_clamp_f32_e32 v4, v4 ; 7E085904 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_buffer_load_dword s5, s[0:3], 0x7 ; C2028107 v_mac_f32_e32 v8, v3, v3 ; 3E100703 v_rsq_clamp_f32_e32 v8, v8 ; 7E105908 v_mul_f32_e32 v9, v4, v9 ; 10121304 v_mul_f32_e32 v10, v4, v16 ; 10142104 v_mul_f32_e32 v4, v4, v5 ; 10080B04 v_mul_f32_e32 v0, v8, v0 ; 10000108 v_mul_f32_e32 v2, v8, v2 ; 10040508 v_mul_f32_e32 v3, v8, v3 ; 10060708 s_buffer_load_dword s6, s[0:3], 0x5 ; C2030105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v5, s4 ; 7E0A0204 v_mad_f32 v5, -v20, s5, v5 ; D2820005 24140B14 exp 15, 33, 0, 0, 0, v0, v2, v3, v5 ; F800021F 05030200 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v5, v2, v4 ; 100A0902 v_mad_f32 v5, v10, v3, -v5 ; D2820005 8416070A v_mul_f32_e32 v3, v3, v9 ; 10061303 v_mad_f32 v3, v4, v0, -v3 ; D2820003 840E0104 v_mul_f32_e32 v0, v0, v10 ; 10001500 v_mad_f32 v0, v9, v2, -v0 ; D2820000 84020509 v_mul_f32_e32 v2, v15, v5 ; 10040B0F v_mul_f32_e32 v3, v15, v3 ; 1006070F v_mul_f32_e32 v0, v15, v0 ; 1000010F v_mov_b32_e32 v5, s6 ; 7E0A0206 v_mad_f32 v5, -v21, s5, v5 ; D2820005 24140B15 exp 15, 34, 0, 0, 0, v2, v3, v0, v5 ; F800022F 05000302 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_buffer_load_dword s6, s[0:3], 0x0 ; C2030100 s_buffer_load_dword s11, s[0:3], 0x1 ; C2058101 s_buffer_load_dword s12, s[0:3], 0x2 ; C2060102 s_buffer_load_dword s13, s[0:3], 0x40 ; C2068140 s_buffer_load_dword s14, s[0:3], 0x41 ; C2070141 s_buffer_load_dword s15, s[0:3], 0x42 ; C2078142 s_buffer_load_dword s16, s[0:3], 0x39 ; C2080139 s_buffer_load_dword s17, s[0:3], 0x3a ; C208813A s_buffer_load_dword s18, s[0:3], 0x3c ; C209013C s_buffer_load_dword s19, s[0:3], 0x3d ; C209813D s_buffer_load_dword s0, s[0:3], 0x3e ; C200013E s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mov_b32_e32 v0, s4 ; 7E000204 v_mad_f32 v0, -v1, s5, v0 ; D2820000 24000B01 v_subrev_f32_e32 v2, s6, v20 ; 0A042806 v_mul_f32_e32 v3, s7, v20 ; 10062807 v_mul_f32_e32 v5, s8, v20 ; 100A2808 v_mul_f32_e32 v8, s9, v20 ; 10102809 v_mac_f32_e32 v3, s10, v21 ; 3E062A0A v_mac_f32_e32 v5, s16, v21 ; 3E0A2A10 v_mac_f32_e32 v8, s17, v21 ; 3E102A11 v_subrev_f32_e32 v11, s11, v21 ; 0A162A0B v_mac_f32_e32 v3, s18, v1 ; 3E060212 v_mac_f32_e32 v5, s19, v1 ; 3E0A0213 v_mac_f32_e32 v8, s0, v1 ; 3E100200 v_subrev_f32_e32 v1, s12, v1 ; 0A02020C v_mac_f32_e32 v3, s13, v19 ; 3E06260D v_mac_f32_e32 v5, s14, v19 ; 3E0A260E v_mac_f32_e32 v8, s15, v19 ; 3E10260F exp 15, 35, 0, 0, 0, v9, v10, v4, v0 ; F800023F 00040A09 exp 15, 36, 0, 0, 0, v17, v2, v11, v1 ; F800024F 010B0211 exp 15, 37, 0, 0, 0, v3, v5, v8, v19 ; F800025F 13080503 exp 15, 12, 0, 1, 0, v6, v7, v17, v18 ; F80008CF 12110706 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 24 Code Size: 920 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL IN[4], GENERIC[4], PERSPECTIVE DCL IN[5], GENERIC[5], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL CONST[0..5] DCL CONST[8..9] DCL TEMP[0..13], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 10.0000, 0.9680} IMM[1] FLT32 { 0.0300, 2.0000, 0.5000, 0.0001} IMM[2] FLT32 { -1.0000, 0.0000, 0.0000, 0.0000} 0: DP3 TEMP[0].x, IN[3].xyzz, IN[3].xyzz 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MUL TEMP[0].xyz, IN[3].xyzz, TEMP[0].xxxx 3: DP3 TEMP[1].x, IN[4].yzww, IN[4].yzww 4: RSQ TEMP[1].x, TEMP[1].xxxx 5: MUL TEMP[1].xyz, IN[4].yzww, TEMP[1].xxxx 6: MOV TEMP[2].xy, IN[0].xyyy 7: TEX TEMP[2].x, TEMP[2], SAMP[1], 2D 8: MOV TEMP[3].xyz, IMM[0].xxxx 9: FSLT TEMP[2].x, IMM[0].yyyy, TEMP[2].xxxx 10: UIF TEMP[2].xxxx :0 11: MUL TEMP[2].xyz, CONST[5].xyzz, CONST[4].xyzz 12: MOV TEMP[4].xy, IN[0].xyyy 13: TEX TEMP[4].xyz, TEMP[4], SAMP[0], 2D 14: MUL TEMP[3].xyz, TEMP[2].xyzz, TEMP[4].xyzz 15: ELSE :0 16: MOV TEMP[2].xy, IN[0].xyyy 17: TEX TEMP[2].xyz, TEMP[2], SAMP[0], 2D 18: MUL TEMP[3].xyz, CONST[4].xyzz, TEMP[2].xyzz 19: ENDIF 20: LRP TEMP[2].xyz, CONST[8].xxxx, TEMP[3].xyzz, CONST[1].xyzz 21: MOV TEMP[4].x, IN[1].wwww 22: MOV TEMP[4].y, IN[2].wwww 23: MOV TEMP[4].z, IN[3].wwww 24: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz 25: RSQ TEMP[5].x, TEMP[5].xxxx 26: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx 27: DP3 TEMP[5].x, TEMP[0].xyzz, TEMP[4].xyzz 28: MAX TEMP[5].x, IMM[0].yyyy, TEMP[5].xxxx 29: DP3 TEMP[6].x, IN[5].xyzz, IN[5].xyzz 30: MOV TEMP[6].xy, TEMP[6].xxxx 31: TEX TEMP[6].w, TEMP[6], SAMP[2], 2D 32: MUL TEMP[6].xyz, CONST[2].xyzz, TEMP[6].wwww 33: MOV TEMP[1].xyz, -TEMP[1].xyzx 34: ADD TEMP[7].x, IMM[0].xxxx, -CONST[9].xxxx 35: ADD TEMP[8].xyz, TEMP[4].xyzz, TEMP[1].xyzz 36: DP3 TEMP[9].x, TEMP[8].xyzz, TEMP[8].xyzz 37: RSQ TEMP[9].x, TEMP[9].xxxx 38: MUL TEMP[8].xyz, TEMP[8].xyzz, TEMP[9].xxxx 39: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[1].xyzz 40: MAX TEMP[1].x, IMM[0].yyyy, TEMP[1].xxxx 41: DP3 TEMP[4].x, TEMP[4].xyzz, TEMP[8].xyzz 42: MAX TEMP[4].x, IMM[0].yyyy, TEMP[4].xxxx 43: MUL TEMP[9].x, TEMP[7].xxxx, TEMP[7].xxxx 44: MUL TEMP[9].x, TEMP[9].xxxx, CONST[3].wwww 45: ADD TEMP[10].x, IMM[0].xxxx, -TEMP[7].xxxx 46: MAD TEMP[10].x, TEMP[10].xxxx, IMM[0].wwww, IMM[1].xxxx 47: LG2 TEMP[10].x, TEMP[10].xxxx 48: RCP TEMP[10].x, TEMP[10].xxxx 49: MUL TEMP[10].x, IMM[0].zzzz, TEMP[10].xxxx 50: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[10].xxxx 51: ADD TEMP[11].x, IMM[0].xxxx, -TEMP[5].xxxx 52: ADD TEMP[12].x, IMM[0].xxxx, -TEMP[1].xxxx 53: MUL TEMP[13].x, IMM[1].yyyy, TEMP[4].xxxx 54: MUL TEMP[7].x, TEMP[4].xxxx, TEMP[7].xxxx 55: MAD TEMP[7].x, TEMP[13].xxxx, TEMP[7].xxxx, IMM[1].zzzz 56: ADD TEMP[4].x, IMM[0].xxxx, -TEMP[4].xxxx 57: LRP TEMP[13].x, TEMP[5].xxxx, IMM[0].xxxx, TEMP[9].xxxx 58: LRP TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx, TEMP[9].xxxx 59: MAD TEMP[1].x, TEMP[13].xxxx, TEMP[1].xxxx, IMM[1].wwww 60: RCP TEMP[1].x, TEMP[1].xxxx 61: DP3 TEMP[8].x, TEMP[0].xyzz, TEMP[8].xyzz 62: MAX TEMP[8].x, IMM[0].yyyy, TEMP[8].xxxx 63: POW TEMP[8].x, TEMP[8].xxxx, TEMP[10].xxxx 64: ADD TEMP[9].x, TEMP[10].xxxx, IMM[0].xxxx 65: MUL TEMP[9].x, TEMP[9].xxxx, CONST[3].yyyy 66: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[9].xxxx 67: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[8].xxxx 68: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[5].xxxx 69: MUL TEMP[1].x, TEMP[1].xxxx, CONST[3].xxxx 70: MAX TEMP[1].x, IMM[0].yyyy, TEMP[1].xxxx 71: MUL TEMP[1].xyz, TEMP[1].xxxx, TEMP[6].xyzz 72: ADD TEMP[8].xyz, IMM[0].xxxx, -TEMP[2].xyzz 73: MUL TEMP[9].x, TEMP[4].xxxx, TEMP[4].xxxx 74: MUL TEMP[10].x, TEMP[4].xxxx, TEMP[4].xxxx 75: MUL TEMP[4].x, TEMP[10].xxxx, TEMP[4].xxxx 76: MUL TEMP[4].x, TEMP[9].xxxx, TEMP[4].xxxx 77: MAD TEMP[2].xyz, TEMP[8].xyzz, TEMP[4].xxxx, TEMP[2].xyzz 78: MUL TEMP[4].x, CONST[8].xxxx, CONST[1].wwww 79: ADD TEMP[4].x, CONST[1].wwww, -TEMP[4].xxxx 80: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx 81: ADD TEMP[4].x, TEMP[7].xxxx, IMM[2].xxxx 82: MUL TEMP[8].x, TEMP[11].xxxx, TEMP[11].xxxx 83: MUL TEMP[9].x, TEMP[11].xxxx, TEMP[11].xxxx 84: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[11].xxxx 85: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[9].xxxx 86: MAD TEMP[4].x, TEMP[4].xxxx, TEMP[8].xxxx, IMM[0].xxxx 87: ADD TEMP[7].x, TEMP[7].xxxx, IMM[2].xxxx 88: MUL TEMP[8].x, TEMP[12].xxxx, TEMP[12].xxxx 89: MUL TEMP[9].x, TEMP[12].xxxx, TEMP[12].xxxx 90: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[12].xxxx 91: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[9].xxxx 92: MAD TEMP[7].x, TEMP[7].xxxx, TEMP[8].xxxx, IMM[0].xxxx 93: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[7].xxxx 94: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx 95: MUL TEMP[4].xyz, TEMP[6].xyzz, TEMP[4].xxxx 96: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xyzz 97: MAD TEMP[0].xyz, TEMP[1].xyzz, TEMP[2].xyzz, TEMP[3].xyzz 98: MAD TEMP[1].x, IN[4].xxxx, CONST[0].zzzz, CONST[0].wwww 99: MOV_SAT TEMP[1].x, TEMP[1].xxxx 100: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xxxx 101: MOV TEMP[0].xyz, TEMP[0].xyzx 102: MOV TEMP[0].w, IMM[0].xxxx 103: MOV OUT[0], TEMP[0] 104: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %41 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %42 = load <8 x i32>, <8 x i32> addrspace(2)* %41, align 32, !tbaa !0 %43 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %44 = load <4 x i32>, <4 x i32> addrspace(2)* %43, align 16, !tbaa !0 %45 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %46 = bitcast <8 x i32> addrspace(2)* %45 to <32 x i8> addrspace(2)* %47 = load <32 x i8>, <32 x i8> addrspace(2)* %46, align 32, !tbaa !0 %48 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %49 = bitcast <4 x i32> addrspace(2)* %48 to <16 x i8> addrspace(2)* %50 = load <16 x i8>, <16 x i8> addrspace(2)* %49, align 16, !tbaa !0 %51 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %52 = bitcast <8 x i32> addrspace(2)* %51 to <32 x i8> addrspace(2)* %53 = load <32 x i8>, <32 x i8> addrspace(2)* %52, align 32, !tbaa !0 %54 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %55 = bitcast <4 x i32> addrspace(2)* %54 to <16 x i8> addrspace(2)* %56 = load <16 x i8>, <16 x i8> addrspace(2)* %55, align 16, !tbaa !0 %57 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %58 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %59 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %60 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %61 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %62 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %63 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %64 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7) %65 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %66 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %67 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %68 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7) %69 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7) %70 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %71 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7) %72 = fmul float %61, %61 %73 = fmul float %62, %62 %74 = fadd float %73, %72 %75 = fmul float %63, %63 %76 = fadd float %74, %75 %77 = call float @llvm.AMDGPU.rsq.clamped.f32(float %76) %78 = fmul float %61, %77 %79 = fmul float %62, %77 %80 = fmul float %63, %77 %81 = fmul float %66, %66 %82 = fmul float %67, %67 %83 = fadd float %82, %81 %84 = fmul float %68, %68 %85 = fadd float %83, %84 %86 = call float @llvm.AMDGPU.rsq.clamped.f32(float %85) %87 = fmul float %66, %86 %88 = fmul float %67, %86 %89 = fmul float %68, %86 %90 = bitcast float %57 to i32 %91 = bitcast float %58 to i32 %92 = insertelement <2 x i32> undef, i32 %90, i32 0 %93 = insertelement <2 x i32> %92, i32 %91, i32 1 %94 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %93, <32 x i8> %47, <16 x i8> %50, i32 2) %95 = extractelement <4 x float> %94, i32 0 %96 = fcmp ogt float %95, 0.000000e+00 br i1 %96, label %IF, label %ELSE IF: ; preds = %main_body %97 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %98 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %99 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %100 = fmul float %99, %36 %101 = fmul float %98, %37 %102 = fmul float %97, %38 %103 = bitcast float %57 to i32 %104 = bitcast float %58 to i32 %105 = insertelement <2 x i32> undef, i32 %103, i32 0 %106 = insertelement <2 x i32> %105, i32 %104, i32 1 %107 = bitcast <8 x i32> %42 to <32 x i8> %108 = bitcast <4 x i32> %44 to <16 x i8> %109 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %106, <32 x i8> %107, <16 x i8> %108, i32 2) %110 = extractelement <4 x float> %109, i32 0 %111 = extractelement <4 x float> %109, i32 1 %112 = extractelement <4 x float> %109, i32 2 %113 = fmul float %100, %110 %114 = fmul float %101, %111 %115 = fmul float %102, %112 br label %ENDIF ELSE: ; preds = %main_body %116 = bitcast float %57 to i32 %117 = bitcast float %58 to i32 %118 = insertelement <2 x i32> undef, i32 %116, i32 0 %119 = insertelement <2 x i32> %118, i32 %117, i32 1 %120 = bitcast <8 x i32> %42 to <32 x i8> %121 = bitcast <4 x i32> %44 to <16 x i8> %122 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %119, <32 x i8> %120, <16 x i8> %121, i32 2) %123 = extractelement <4 x float> %122, i32 0 %124 = extractelement <4 x float> %122, i32 1 %125 = extractelement <4 x float> %122, i32 2 %126 = fmul float %36, %123 %127 = fmul float %37, %124 %128 = fmul float %38, %125 br label %ENDIF ENDIF: ; preds = %ELSE, %IF %temp14.0 = phi float [ %115, %IF ], [ %128, %ELSE ] %temp13.0 = phi float [ %114, %IF ], [ %127, %ELSE ] %temp12.0 = phi float [ %113, %IF ], [ %126, %ELSE ] %129 = call float @llvm.AMDGPU.lrp(float %39, float %temp12.0, float %26) %130 = call float @llvm.AMDGPU.lrp(float %39, float %temp13.0, float %27) %131 = call float @llvm.AMDGPU.lrp(float %39, float %temp14.0, float %28) %132 = fmul float %59, %59 %133 = fmul float %60, %60 %134 = fadd float %133, %132 %135 = fmul float %64, %64 %136 = fadd float %134, %135 %137 = call float @llvm.AMDGPU.rsq.clamped.f32(float %136) %138 = fmul float %59, %137 %139 = fmul float %60, %137 %140 = fmul float %64, %137 %141 = fmul float %78, %138 %142 = fmul float %79, %139 %143 = fadd float %142, %141 %144 = fmul float %80, %140 %145 = fadd float %143, %144 %146 = call float @llvm.maxnum.f32(float %145, float 0.000000e+00) %147 = fmul float %69, %69 %148 = fmul float %70, %70 %149 = fadd float %148, %147 %150 = fmul float %71, %71 %151 = fadd float %149, %150 %152 = bitcast float %151 to i32 %153 = bitcast float %151 to i32 %154 = insertelement <2 x i32> undef, i32 %152, i32 0 %155 = insertelement <2 x i32> %154, i32 %153, i32 1 %156 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %155, <32 x i8> %53, <16 x i8> %56, i32 2) %157 = extractelement <4 x float> %156, i32 3 %158 = fmul float %30, %157 %159 = fmul float %31, %157 %160 = fmul float %32, %157 %161 = fsub float 1.000000e+00, %40 %162 = fsub float %138, %87 %163 = fsub float %139, %88 %164 = fsub float %140, %89 %165 = fmul float %162, %162 %166 = fmul float %163, %163 %167 = fadd float %166, %165 %168 = fmul float %164, %164 %169 = fadd float %167, %168 %170 = call float @llvm.AMDGPU.rsq.clamped.f32(float %169) %171 = fmul float %162, %170 %172 = fmul float %163, %170 %173 = fmul float %164, %170 %174 = fmul float %87, %78 %175 = fsub float -0.000000e+00, %174 %176 = fmul float %88, %79 %177 = fsub float %175, %176 %178 = fmul float %89, %80 %179 = fsub float %177, %178 %180 = call float @llvm.maxnum.f32(float %179, float 0.000000e+00) %181 = fmul float %138, %171 %182 = fmul float %139, %172 %183 = fadd float %182, %181 %184 = fmul float %140, %173 %185 = fadd float %183, %184 %186 = call float @llvm.maxnum.f32(float %185, float 0.000000e+00) %187 = fmul float %161, %161 %188 = fmul float %187, %35 %189 = fsub float 1.000000e+00, %161 %190 = fmul float %189, 0x3FEEF9DB20000000 %191 = fadd float %190, 0x3F9EB851E0000000 %192 = call float @llvm.log2.f32(float %191) %193 = fdiv float 1.000000e+00, %192 %194 = fmul float %193, 1.000000e+01 %195 = fmul float %194, %194 %196 = fsub float 1.000000e+00, %146 %197 = fsub float 1.000000e+00, %180 %198 = fmul float %186, 2.000000e+00 %199 = fmul float %186, %161 %200 = fmul float %198, %199 %201 = fadd float %200, 5.000000e-01 %202 = fsub float 1.000000e+00, %186 %203 = call float @llvm.AMDGPU.lrp(float %146, float 1.000000e+00, float %188) %204 = call float @llvm.AMDGPU.lrp(float %180, float 1.000000e+00, float %188) %205 = fmul float %203, %204 %206 = fadd float %205, 0x3F1A36E2E0000000 %207 = fdiv float 1.000000e+00, %206 %208 = fmul float %78, %171 %209 = fmul float %79, %172 %210 = fadd float %209, %208 %211 = fmul float %80, %173 %212 = fadd float %210, %211 %213 = call float @llvm.maxnum.f32(float %212, float 0.000000e+00) %214 = call float @llvm.pow.f32(float %213, float %195) %215 = fadd float %195, 1.000000e+00 %216 = fmul float %215, %34 %217 = fmul float %214, %216 %218 = fmul float %207, %217 %219 = fmul float %218, %146 %220 = fmul float %219, %33 %221 = call float @llvm.maxnum.f32(float %220, float 0.000000e+00) %222 = fmul float %221, %158 %223 = fmul float %221, %159 %224 = fmul float %221, %160 %225 = fsub float 1.000000e+00, %129 %226 = fsub float 1.000000e+00, %130 %227 = fsub float 1.000000e+00, %131 %228 = fmul float %202, %202 %229 = fmul float %202, %202 %230 = fmul float %229, %202 %231 = fmul float %228, %230 %232 = fmul float %225, %231 %233 = fadd float %232, %129 %234 = fmul float %226, %231 %235 = fadd float %234, %130 %236 = fmul float %227, %231 %237 = fadd float %236, %131 %238 = fmul float %39, %29 %239 = fsub float %29, %238 %240 = fmul float %temp12.0, %239 %241 = fmul float %temp13.0, %239 %242 = fmul float %temp14.0, %239 %243 = fadd float %201, -1.000000e+00 %244 = fmul float %196, %196 %245 = fmul float %196, %196 %246 = fmul float %245, %196 %247 = fmul float %244, %246 %248 = fmul float %243, %247 %249 = fadd float %248, 1.000000e+00 %250 = fadd float %201, -1.000000e+00 %251 = fmul float %197, %197 %252 = fmul float %197, %197 %253 = fmul float %252, %197 %254 = fmul float %251, %253 %255 = fmul float %250, %254 %256 = fadd float %255, 1.000000e+00 %257 = fmul float %249, %256 %258 = fmul float %257, %146 %259 = fmul float %158, %258 %260 = fmul float %159, %258 %261 = fmul float %160, %258 %262 = fmul float %240, %259 %263 = fmul float %241, %260 %264 = fmul float %242, %261 %265 = fmul float %222, %233 %266 = fadd float %265, %262 %267 = fmul float %223, %235 %268 = fadd float %267, %263 %269 = fmul float %224, %237 %270 = fadd float %269, %264 %271 = fmul float %65, %24 %272 = fadd float %271, %25 %273 = call float @llvm.AMDIL.clamp.(float %272, float 0.000000e+00, float 1.000000e+00) %274 = fmul float %266, %273 %275 = fmul float %268, %273 %276 = fmul float %270, %273 %277 = call i32 @llvm.SI.packf16(float %274, float %275) %278 = bitcast i32 %277 to float %279 = call i32 @llvm.SI.packf16(float %276, float 1.000000e+00) %280 = bitcast i32 %279 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %278, float %280, float %278, float %280) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[12:15], s[2:3], 0x0 ; C0860300 v_interp_p1_f32 v11, v0, 0, 0, [m0] ; C82C0000 v_interp_p2_f32 v11, [v11], v1, 0, 0, [m0] ; C82D0001 v_interp_p1_f32 v12, v0, 1, 0, [m0] ; C8300100 v_interp_p2_f32 v12, [v12], v1, 1, 0, [m0] ; C8310101 v_interp_p1_f32 v7, v0, 3, 1, [m0] ; C81C0700 v_interp_p2_f32 v7, [v7], v1, 3, 1, [m0] ; C81D0701 v_interp_p1_f32 v5, v0, 3, 2, [m0] ; C8140B00 v_interp_p2_f32 v5, [v5], v1, 3, 2, [m0] ; C8150B01 v_interp_p1_f32 v8, v0, 0, 3, [m0] ; C8200C00 v_interp_p2_f32 v8, [v8], v1, 0, 3, [m0] ; C8210C01 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s1, s[12:15], 0x3 ; C2008D03 s_buffer_load_dword s10, s[12:15], 0x10 ; C2050D10 s_buffer_load_dword s11, s[12:15], 0x11 ; C2058D11 s_buffer_load_dword s16, s[12:15], 0x12 ; C2080D12 s_buffer_load_dword s0, s[12:15], 0x20 ; C2000D20 v_interp_p1_f32 v13, v0, 1, 3, [m0] ; C8340D00 v_interp_p2_f32 v13, [v13], v1, 1, 3, [m0] ; C8350D01 v_interp_p1_f32 v14, v0, 2, 3, [m0] ; C8380E00 v_interp_p2_f32 v14, [v14], v1, 2, 3, [m0] ; C8390E01 v_interp_p1_f32 v6, v0, 3, 3, [m0] ; C8180F00 v_interp_p2_f32 v6, [v6], v1, 3, 3, [m0] ; C8190F01 v_interp_p1_f32 v15, v0, 0, 4, [m0] ; C83C1000 v_interp_p2_f32 v15, [v15], v1, 0, 4, [m0] ; C83D1001 v_interp_p1_f32 v16, v0, 1, 4, [m0] ; C8401100 v_interp_p2_f32 v16, [v16], v1, 1, 4, [m0] ; C8411101 v_interp_p1_f32 v17, v0, 2, 4, [m0] ; C8441200 v_interp_p2_f32 v17, [v17], v1, 2, 4, [m0] ; C8451201 v_interp_p1_f32 v18, v0, 3, 4, [m0] ; C8481300 v_interp_p2_f32 v18, [v18], v1, 3, 4, [m0] ; C8491301 v_interp_p1_f32 v9, v0, 0, 5, [m0] ; C8241400 v_interp_p2_f32 v9, [v9], v1, 0, 5, [m0] ; C8251401 v_interp_p1_f32 v10, v0, 1, 5, [m0] ; C8281500 v_interp_p2_f32 v10, [v10], v1, 1, 5, [m0] ; C8291501 v_interp_p1_f32 v0, v0, 2, 5, [m0] ; C8001600 v_mul_f32_e32 v19, v8, v8 ; 10261108 v_mac_f32_e32 v19, v13, v13 ; 3E261B0D s_load_dwordx4 s[20:23], s[4:5], 0x4 ; C08A0504 s_load_dwordx8 s[36:43], s[6:7], 0x8 ; C0D20708 s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500 s_load_dwordx8 s[24:31], s[6:7], 0x0 ; C0CC0700 v_mac_f32_e32 v19, v14, v14 ; 3E261D0E v_rsq_clamp_f32_e32 v19, v19 ; 7E265913 v_mul_f32_e32 v20, v16, v16 ; 10282110 v_mac_f32_e32 v20, v17, v17 ; 3E282311 v_mac_f32_e32 v20, v18, v18 ; 3E282512 v_rsq_clamp_f32_e32 v20, v20 ; 7E285914 v_interp_p2_f32 v0, [v0], v1, 2, 5, [m0] ; C8011601 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v1, 1, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[36:43], s[20:23] ; F0800100 00A9010B s_waitcnt vmcnt(0) ; BF8C0770 v_cmp_nlt_f32_e32 vcc, 0, v1 ; 7C1C0280 s_and_saveexec_b64 s[22:23], vcc ; BE96246A s_xor_b64 s[22:23], exec, s[22:23] ; 8996167E image_sample v[21:23], 7, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[24:31], s[32:35] ; F0800700 0106150B s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v2, s10, v21 ; 10042A0A v_mul_f32_e32 v3, s11, v22 ; 10062C0B v_mul_f32_e32 v4, s16, v23 ; 10082E10 s_or_saveexec_b64 s[22:23], s[22:23] ; BE962516 v_mov_b32_e32 v1, s1 ; 7E020201 s_buffer_load_dword s21, s[12:15], 0x2 ; C20A8D02 s_buffer_load_dword s1, s[12:15], 0x4 ; C2008D04 s_buffer_load_dword s2, s[12:15], 0x5 ; C2010D05 s_buffer_load_dword s3, s[12:15], 0x6 ; C2018D06 s_buffer_load_dword s36, s[12:15], 0x7 ; C2120D07 s_buffer_load_dword s17, s[12:15], 0x8 ; C2088D08 s_buffer_load_dword s18, s[12:15], 0x9 ; C2090D09 s_buffer_load_dword s19, s[12:15], 0xa ; C2098D0A s_buffer_load_dword s8, s[12:15], 0xc ; C2040D0C s_buffer_load_dword s9, s[12:15], 0xd ; C2048D0D s_buffer_load_dword s20, s[12:15], 0xf ; C20A0D0F s_buffer_load_dword s37, s[12:15], 0x24 ; C2128D24 v_mov_b32_e32 v21, s0 ; 7E2A0200 s_waitcnt lgkmcnt(0) ; BF8C007F s_xor_b64 exec, exec, s[22:23] ; 89FE167E s_cbranch_execz BB0_4 ; BF880000 v_mov_b32_e32 v2, s10 ; 7E04020A v_mov_b32_e32 v3, s11 ; 7E06020B v_mov_b32_e32 v4, s16 ; 7E080210 s_buffer_load_dword s38, s[12:15], 0x14 ; C2130D14 s_buffer_load_dword s39, s[12:15], 0x15 ; C2138D15 s_buffer_load_dword s40, s[12:15], 0x16 ; C2140D16 image_sample v[22:24], 7, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[24:31], s[32:35] ; F0800700 0106160B s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s38, v2 ; 10040426 v_mul_f32_e32 v3, s39, v3 ; 10060627 v_mul_f32_e32 v4, s40, v4 ; 10080828 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v2, v22, v2 ; 10040516 v_mul_f32_e32 v3, v23, v3 ; 10060717 v_mul_f32_e32 v4, v24, v4 ; 10080918 s_or_b64 exec, exec, s[22:23] ; 88FE167E v_mac_f32_e32 v1, s21, v15 ; 3E021E15 v_mad_f32 v11, -v21, s36, s36 ; D282000B 20904915 v_mul_f32_e32 v8, v19, v8 ; 10101113 v_mul_f32_e32 v12, v19, v13 ; 10181B13 v_mul_f32_e32 v13, v19, v14 ; 101A1D13 v_mul_f32_e32 v14, v20, v16 ; 101C2114 v_mul_f32_e32 v15, v20, v17 ; 101E2314 v_mul_f32_e32 v16, v20, v18 ; 10202514 v_sub_f32_e64 v17, 1.0, s37 ; D2080011 00004AF2 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 s_load_dwordx4 s[12:15], s[4:5], 0x8 ; C0860508 s_load_dwordx8 s[24:31], s[6:7], 0x10 ; C0CC0710 v_mul_f32_e32 v18, v9, v9 ; 10241309 v_mac_f32_e32 v18, v10, v10 ; 3E24150A v_mul_f32_e32 v9, v7, v7 ; 10120F07 v_mac_f32_e32 v9, v5, v5 ; 3E120B05 v_mac_f32_e32 v9, v6, v6 ; 3E120D06 v_rsq_clamp_f32_e32 v9, v9 ; 7E125909 v_mac_f32_e32 v18, v0, v0 ; 3E240100 v_mov_b32_e32 v19, v18 ; 7E260312 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v0, 8, 0, 0, 0, 0, 0, 0, 0, v[18:19], s[24:31], s[12:15] ; F0800800 00660012 v_mul_f32_e32 v10, v9, v7 ; 10140F09 v_mad_f32 v7, v7, v9, -v14 ; D2820007 843A1307 v_mul_f32_e32 v14, v8, v14 ; 101C1D08 v_mad_f32 v14, -v15, v12, -v14 ; D282000E A43A190F v_mad_f32 v15, v5, v9, -v15 ; D282000F 843E1305 v_mul_f32_e32 v18, v7, v7 ; 10240F07 v_mac_f32_e32 v18, v15, v15 ; 3E241F0F v_mad_f32 v19, v6, v9, -v16 ; D2820013 84421306 v_mac_f32_e32 v18, v19, v19 ; 3E242713 v_rsq_clamp_f32_e32 v18, v18 ; 7E245912 v_mul_f32_e32 v5, v9, v5 ; 100A0B09 v_mul_f32_e32 v20, v10, v8 ; 1028110A v_mac_f32_e32 v20, v5, v12 ; 3E281905 v_mul_f32_e32 v7, v18, v7 ; 100E0F12 v_mul_f32_e32 v15, v18, v15 ; 101E1F12 v_mul_f32_e32 v10, v7, v10 ; 10141507 v_mac_f32_e32 v10, v15, v5 ; 3E140B0F v_mul_f32_e32 v5, v7, v8 ; 100A1107 v_mac_f32_e32 v5, v15, v12 ; 3E0A190F v_mul_f32_e32 v6, v9, v6 ; 100C0D09 v_mul_f32_e32 v7, v18, v19 ; 100E2712 v_mad_f32 v8, -v16, v13, v14 ; D2820008 243A1B10 v_mac_f32_e32 v20, v6, v13 ; 3E281B06 v_mac_f32_e32 v10, v7, v6 ; 3E140D07 v_mac_f32_e32 v5, v7, v13 ; 3E0A1B07 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v6, s17, v0 ; 100C0011 v_mul_f32_e32 v7, s18, v0 ; 100E0012 v_mul_f32_e32 v0, s19, v0 ; 10000013 v_mul_f32_e32 v9, v17, v17 ; 10122311 v_mul_f32_e32 v9, s20, v9 ; 10121214 v_sub_f32_e32 v12, 1.0, v17 ; 081822F2 v_mov_b32_e32 v13, 0x3cf5c28f ; 7E1A02FF 3CF5C28F v_madmk_f32_e32 v12, v12, v13, 0x3f77ced9 ; 40181B0C 3F77CED9 v_max_f32_e32 v10, 0, v10 ; 20141480 v_mul_f32_e32 v13, v17, v10 ; 101A1511 v_add_f32_e32 v14, v10, v10 ; 061C150A v_mad_f32 v13, v14, v13, 0.5 ; D282000D 03C21B0E v_max_f32_e32 v8, 0, v8 ; 20101080 v_log_f32_e32 v12, v12 ; 7E184F0C v_sub_f32_e32 v14, 1.0, v8 ; 081C10F2 v_mul_f32_e32 v15, v9, v14 ; 101E1D09 v_mac_f32_e32 v15, 1.0, v8 ; 3E1E10F2 v_max_f32_e32 v8, 0, v20 ; 20102880 v_rcp_f32_e32 v12, v12 ; 7E18550C v_sub_f32_e32 v16, 1.0, v8 ; 082010F2 v_mul_f32_e32 v9, v9, v16 ; 10122109 v_mac_f32_e32 v9, 1.0, v8 ; 3E1210F2 v_max_f32_e32 v5, 0, v5 ; 200A0A80 v_log_f32_e32 v5, v5 ; 7E0A4F05 v_madak_f32_e32 v9, v9, v15, 0x38d1b717 ; 42121F09 38D1B717 v_mul_f32_e32 v12, 0x41200000, v12 ; 101818FF 41200000 v_mul_f32_e32 v15, v12, v12 ; 101E190C v_mul_legacy_f32_e32 v5, v15, v5 ; 0E0A0B0F v_rcp_f32_e32 v9, v9 ; 7E125509 v_mad_f32 v12, v12, v12, 1.0 ; D282000C 03CA190C v_mul_f32_e32 v12, s9, v12 ; 10181809 v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_mul_f32_e32 v5, v12, v5 ; 100A0B0C v_mul_f32_e32 v5, v5, v9 ; 100A1305 v_mul_f32_e32 v5, v8, v5 ; 100A0B08 v_mul_f32_e32 v5, s8, v5 ; 100A0A08 v_sub_f32_e32 v9, 1.0, v10 ; 081214F2 v_mul_f32_e32 v10, v9, v9 ; 10141309 v_mul_f32_e32 v9, v9, v10 ; 10121509 v_mul_f32_e32 v9, v9, v10 ; 10121509 v_sub_f32_e64 v10, 1.0, s0 ; D208000A 000000F2 v_mul_f32_e32 v12, s1, v10 ; 10181401 v_mad_f32 v15, -v10, s1, 1.0 ; D282000F 23C8030A v_mul_f32_e32 v17, s2, v10 ; 10221402 v_mad_f32 v18, -v10, s2, 1.0 ; D2820012 23C8050A v_mul_f32_e32 v19, s3, v10 ; 10261403 v_mad_f32 v10, -v10, s3, 1.0 ; D282000A 23C8070A v_mac_f32_e32 v12, s0, v2 ; 3E180400 v_mac_f32_e32 v17, s0, v3 ; 3E220600 v_mac_f32_e32 v19, s0, v4 ; 3E260800 v_mad_f32 v15, -s0, v2, v15 ; D282000F 243E0400 v_mad_f32 v18, -s0, v3, v18 ; D2820012 244A0600 v_mad_f32 v10, -s0, v4, v10 ; D282000A 242A0800 v_mac_f32_e32 v12, v9, v15 ; 3E181F09 v_mac_f32_e32 v17, v9, v18 ; 3E222509 v_max_f32_e32 v5, 0, v5 ; 200A0A80 v_mac_f32_e32 v19, v9, v10 ; 3E261509 v_mul_f32_e32 v9, v6, v5 ; 10120B06 v_mul_f32_e32 v10, v7, v5 ; 10140B07 v_mul_f32_e32 v5, v0, v5 ; 100A0B00 v_mul_f32_e32 v2, v11, v2 ; 1004050B v_mul_f32_e32 v3, v11, v3 ; 1006070B v_mul_f32_e32 v4, v11, v4 ; 1008090B v_add_f32_e32 v11, -1.0, v13 ; 06161AF3 v_mul_f32_e32 v13, v16, v16 ; 101A2110 v_mul_f32_e32 v15, v16, v13 ; 101E1B10 v_mul_f32_e32 v13, v15, v13 ; 101A1B0F v_mad_f32 v13, v11, v13, 1.0 ; D282000D 03CA1B0B v_mul_f32_e32 v15, v14, v14 ; 101E1D0E v_mul_f32_e32 v14, v14, v15 ; 101C1F0E v_mul_f32_e32 v14, v14, v15 ; 101C1F0E v_mad_f32 v11, v11, v14, 1.0 ; D282000B 03CA1D0B v_mul_f32_e32 v11, v11, v13 ; 10161B0B v_mul_f32_e32 v8, v8, v11 ; 10101708 v_mul_f32_e32 v6, v8, v6 ; 100C0D08 v_mul_f32_e32 v7, v8, v7 ; 100E0F08 v_mul_f32_e32 v0, v8, v0 ; 10000108 v_mul_f32_e32 v2, v6, v2 ; 10040506 v_mul_f32_e32 v3, v7, v3 ; 10060707 v_mul_f32_e32 v0, v0, v4 ; 10000900 v_mac_f32_e32 v2, v12, v9 ; 3E04130C v_mac_f32_e32 v3, v17, v10 ; 3E061511 v_mac_f32_e32 v0, v19, v5 ; 3E000B13 v_mul_f32_e32 v2, v1, v2 ; 10040501 v_mul_f32_e32 v3, v1, v3 ; 10060701 v_mul_f32_e32 v0, v1, v0 ; 10000101 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 v_cvt_pkrtz_f16_f32_e64 v0, v0, 1.0 ; D25E0000 0001E500 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 48 VGPRS: 28 Code Size: 1052 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL OUT[5], GENERIC[4] DCL CONST[0..19] DCL TEMP[0..7], LOCAL IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MUL TEMP[0], CONST[5], IN[0].xxxx 1: MAD TEMP[0], CONST[6], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[7], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0].xyz, CONST[8], IN[0].wwww, TEMP[0] 4: MUL TEMP[1], CONST[16], IN[0].xxxx 5: MAD TEMP[1], CONST[17], IN[0].yyyy, TEMP[1] 6: MAD TEMP[1], CONST[18], IN[0].zzzz, TEMP[1] 7: MAD TEMP[1], CONST[19], IN[0].wwww, TEMP[1] 8: MAD TEMP[2].xy, IN[2].xyyy, CONST[13].xyyy, CONST[13].zwww 9: FSEQ TEMP[3].x, CONST[15].xxxx, IMM[0].xxxx 10: UIF TEMP[3].xxxx :0 11: MOV TEMP[3].xy, IN[2].xyxx 12: ELSE :0 13: MOV TEMP[3].xy, IN[3].xyxx 14: ENDIF 15: MAD TEMP[3].xy, TEMP[3].xyyy, CONST[14].xyyy, CONST[14].zwww 16: MOV TEMP[2].zw, TEMP[3].yyxy 17: MOV TEMP[3].x, CONST[9].xxxx 18: MOV TEMP[3].y, CONST[10].xxxx 19: MOV TEMP[3].z, CONST[11].xxxx 20: MOV TEMP[4].x, CONST[9].yyyy 21: MOV TEMP[4].y, CONST[10].yyyy 22: MOV TEMP[4].z, CONST[11].yyyy 23: MOV TEMP[5].x, CONST[9].zzzz 24: MOV TEMP[5].y, CONST[10].zzzz 25: MOV TEMP[5].z, CONST[11].zzzz 26: MUL TEMP[3].xyz, TEMP[3].xyzz, IN[1].xxxx 27: MAD TEMP[3].xyz, TEMP[4].xyzz, IN[1].yyyy, TEMP[3].xyzz 28: MAD TEMP[3].xyz, TEMP[5].xyzz, IN[1].zzzz, TEMP[3].xyzz 29: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz 30: RSQ TEMP[4].x, TEMP[4].xxxx 31: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx 32: MOV TEMP[4].xyz, TEMP[3].xyzx 33: MUL TEMP[5], TEMP[3].xyzz, TEMP[3].yzzx 34: DP4 TEMP[6].x, CONST[1], TEMP[5] 35: DP4 TEMP[7].x, CONST[2], TEMP[5] 36: MOV TEMP[6].y, TEMP[7].xxxx 37: DP4 TEMP[5].x, CONST[3], TEMP[5] 38: MOV TEMP[6].z, TEMP[5].xxxx 39: MUL TEMP[5].x, TEMP[3].yyyy, TEMP[3].yyyy 40: MAD TEMP[3].x, TEMP[3].xxxx, TEMP[3].xxxx, -TEMP[5].xxxx 41: MAD TEMP[3].xyz, CONST[4].xyzz, TEMP[3].xxxx, TEMP[6].xyzz 42: ADD TEMP[5].xyz, TEMP[0].xyzz, -CONST[0].xyzz 43: MOV TEMP[5].yzw, TEMP[5].yxyz 44: MOV TEMP[5].x, TEMP[1].zzzz 45: MOV TEMP[0].xyz, TEMP[0].xyzx 46: MOV OUT[5], TEMP[0] 47: MOV OUT[1], TEMP[2] 48: MOV OUT[2], TEMP[4] 49: MOV OUT[3], TEMP[3] 50: MOV OUT[0], TEMP[1] 51: MOV OUT[4], TEMP[5] 52: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236) %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240) %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256) %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260) %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264) %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 268) %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272) %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276) %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280) %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284) %72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288) %73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292) %74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296) %75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300) %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304) %77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308) %78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312) %79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316) %80 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %81 = load <16 x i8>, <16 x i8> addrspace(2)* %80, align 16, !tbaa !0 %82 = add i32 %5, %7 %83 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %81, i32 0, i32 %82) %84 = extractelement <4 x float> %83, i32 0 %85 = extractelement <4 x float> %83, i32 1 %86 = extractelement <4 x float> %83, i32 2 %87 = extractelement <4 x float> %83, i32 3 %88 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %89 = load <16 x i8>, <16 x i8> addrspace(2)* %88, align 16, !tbaa !0 %90 = add i32 %5, %7 %91 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %89, i32 0, i32 %90) %92 = extractelement <4 x float> %91, i32 0 %93 = extractelement <4 x float> %91, i32 1 %94 = extractelement <4 x float> %91, i32 2 %95 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %96 = load <16 x i8>, <16 x i8> addrspace(2)* %95, align 16, !tbaa !0 %97 = add i32 %5, %7 %98 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %96, i32 0, i32 %97) %99 = extractelement <4 x float> %98, i32 0 %100 = extractelement <4 x float> %98, i32 1 %101 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %102 = load <16 x i8>, <16 x i8> addrspace(2)* %101, align 16, !tbaa !0 %103 = add i32 %5, %7 %104 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %102, i32 0, i32 %103) %105 = extractelement <4 x float> %104, i32 0 %106 = extractelement <4 x float> %104, i32 1 %107 = fmul float %31, %84 %108 = fmul float %32, %84 %109 = fmul float %33, %84 %110 = fmul float %34, %84 %111 = fmul float %35, %85 %112 = fadd float %111, %107 %113 = fmul float %36, %85 %114 = fadd float %113, %108 %115 = fmul float %37, %85 %116 = fadd float %115, %109 %117 = fmul float %38, %85 %118 = fadd float %117, %110 %119 = fmul float %39, %86 %120 = fadd float %119, %112 %121 = fmul float %40, %86 %122 = fadd float %121, %114 %123 = fmul float %41, %86 %124 = fadd float %123, %116 %125 = fmul float %42, %86 %126 = fadd float %125, %118 %127 = fmul float %43, %87 %128 = fadd float %127, %120 %129 = fmul float %44, %87 %130 = fadd float %129, %122 %131 = fmul float %45, %87 %132 = fadd float %131, %124 %133 = fmul float %64, %84 %134 = fmul float %65, %84 %135 = fmul float %66, %84 %136 = fmul float %67, %84 %137 = fmul float %68, %85 %138 = fadd float %137, %133 %139 = fmul float %69, %85 %140 = fadd float %139, %134 %141 = fmul float %70, %85 %142 = fadd float %141, %135 %143 = fmul float %71, %85 %144 = fadd float %143, %136 %145 = fmul float %72, %86 %146 = fadd float %145, %138 %147 = fmul float %73, %86 %148 = fadd float %147, %140 %149 = fmul float %74, %86 %150 = fadd float %149, %142 %151 = fmul float %75, %86 %152 = fadd float %151, %144 %153 = fmul float %76, %87 %154 = fadd float %153, %146 %155 = fmul float %77, %87 %156 = fadd float %155, %148 %157 = fmul float %78, %87 %158 = fadd float %157, %150 %159 = fmul float %79, %87 %160 = fadd float %159, %152 %161 = fmul float %99, %55 %162 = fadd float %161, %57 %163 = fmul float %100, %56 %164 = fadd float %163, %58 %165 = fcmp oeq float %63, 0.000000e+00 %. = select i1 %165, float %99, float %105 %.32 = select i1 %165, float %100, float %106 %166 = fmul float %., %59 %167 = fadd float %166, %61 %168 = fmul float %.32, %60 %169 = fadd float %168, %62 %170 = fmul float %46, %92 %171 = fmul float %49, %92 %172 = fmul float %52, %92 %173 = fmul float %47, %93 %174 = fadd float %173, %170 %175 = fmul float %50, %93 %176 = fadd float %175, %171 %177 = fmul float %53, %93 %178 = fadd float %177, %172 %179 = fmul float %48, %94 %180 = fadd float %179, %174 %181 = fmul float %51, %94 %182 = fadd float %181, %176 %183 = fmul float %54, %94 %184 = fadd float %183, %178 %185 = fmul float %180, %180 %186 = fmul float %182, %182 %187 = fadd float %186, %185 %188 = fmul float %184, %184 %189 = fadd float %187, %188 %190 = call float @llvm.AMDGPU.rsq.clamped.f32(float %189) %191 = fmul float %180, %190 %192 = fmul float %182, %190 %193 = fmul float %184, %190 %194 = fmul float %191, %192 %195 = fmul float %192, %193 %196 = fmul float %193, %193 %197 = fmul float %193, %191 %198 = fmul float %16, %194 %199 = fmul float %17, %195 %200 = fadd float %198, %199 %201 = fmul float %18, %196 %202 = fadd float %200, %201 %203 = fmul float %19, %197 %204 = fadd float %202, %203 %205 = fmul float %20, %194 %206 = fmul float %21, %195 %207 = fadd float %205, %206 %208 = fmul float %22, %196 %209 = fadd float %207, %208 %210 = fmul float %23, %197 %211 = fadd float %209, %210 %212 = fmul float %24, %194 %213 = fmul float %25, %195 %214 = fadd float %212, %213 %215 = fmul float %26, %196 %216 = fadd float %214, %215 %217 = fmul float %27, %197 %218 = fadd float %216, %217 %219 = fmul float %192, %192 %220 = fmul float %191, %191 %221 = fsub float %220, %219 %222 = fmul float %28, %221 %223 = fadd float %222, %204 %224 = fmul float %29, %221 %225 = fadd float %224, %211 %226 = fmul float %30, %221 %227 = fadd float %226, %218 %228 = fsub float %128, %13 %229 = fsub float %130, %14 %230 = fsub float %132, %15 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %162, float %164, float %167, float %169) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %191, float %192, float %193, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %223, float %225, float %227, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %158, float %228, float %229, float %230) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %128, float %130, float %132, float %126) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %154, float %156, float %158, float %160) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0 ; 7E020280 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[20:23], s[2:3], 0x0 ; C08A0300 s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 s_load_dwordx4 s[12:15], s[8:9], 0x8 ; C0860908 s_load_dwordx4 s[8:11], s[8:9], 0xc ; C084090C s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s18, s[20:23], 0x20 ; C2091520 buffer_load_format_xyzw v[2:5], v0, s[0:3], 0 idxen ; E00C2000 80000200 buffer_load_format_xyzw v[6:9], v0, s[4:7], 0 idxen ; E00C2000 80010600 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[9:12], v0, s[12:15], 0 idxen ; E00C2000 80030900 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[11:14], v0, s[8:11], 0 idxen ; E00C2000 80020B00 s_buffer_load_dword s19, s[20:23], 0x21 ; C2099521 s_buffer_load_dword s24, s[20:23], 0x22 ; C20C1522 s_buffer_load_dword s25, s[20:23], 0x24 ; C20C9524 s_buffer_load_dword s26, s[20:23], 0x25 ; C20D1525 s_buffer_load_dword s27, s[20:23], 0x26 ; C20D9526 s_buffer_load_dword s28, s[20:23], 0x28 ; C20E1528 s_buffer_load_dword s29, s[20:23], 0x29 ; C20E9529 s_buffer_load_dword s30, s[20:23], 0x2a ; C20F152A s_buffer_load_dword s31, s[20:23], 0x2c ; C20F952C s_buffer_load_dword s32, s[20:23], 0x2d ; C210152D s_buffer_load_dword s33, s[20:23], 0x2e ; C210952E s_buffer_load_dword s34, s[20:23], 0x34 ; C2111534 s_buffer_load_dword s35, s[20:23], 0x35 ; C2119535 s_buffer_load_dword s5, s[20:23], 0x36 ; C2029536 s_buffer_load_dword s0, s[20:23], 0x0 ; C2001500 s_buffer_load_dword s1, s[20:23], 0x1 ; C2009501 s_buffer_load_dword s2, s[20:23], 0x2 ; C2011502 s_buffer_load_dword s6, s[20:23], 0x4 ; C2031504 s_buffer_load_dword s14, s[20:23], 0x5 ; C2071505 s_buffer_load_dword s4, s[20:23], 0x6 ; C2021506 s_buffer_load_dword s3, s[20:23], 0x7 ; C2019507 s_buffer_load_dword s12, s[20:23], 0x8 ; C2061508 s_buffer_load_dword s16, s[20:23], 0x9 ; C2081509 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v0, s5 ; 7E000205 s_buffer_load_dword s7, s[20:23], 0xa ; C203950A s_buffer_load_dword s5, s[20:23], 0xb ; C202950B s_buffer_load_dword s15, s[20:23], 0xc ; C207950C s_buffer_load_dword s17, s[20:23], 0xd ; C208950D s_buffer_load_dword s13, s[20:23], 0xe ; C206950E s_buffer_load_dword s8, s[20:23], 0xf ; C204150F s_buffer_load_dword s36, s[20:23], 0x3c ; C212153C s_buffer_load_dword s37, s[20:23], 0x40 ; C2129540 s_buffer_load_dword s38, s[20:23], 0x41 ; C2131541 s_buffer_load_dword s39, s[20:23], 0x42 ; C2139542 s_buffer_load_dword s40, s[20:23], 0x43 ; C2141543 s_buffer_load_dword s9, s[20:23], 0x10 ; C2049510 s_buffer_load_dword s10, s[20:23], 0x11 ; C2051511 s_buffer_load_dword s11, s[20:23], 0x12 ; C2059512 s_buffer_load_dword s41, s[20:23], 0x14 ; C2149514 s_buffer_load_dword s42, s[20:23], 0x15 ; C2151515 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_eq_f32_e64 vcc, 0, s36 ; D004006A 00004880 s_buffer_load_dword s36, s[20:23], 0x16 ; C2121516 s_buffer_load_dword s43, s[20:23], 0x17 ; C2159517 s_buffer_load_dword s44, s[20:23], 0x18 ; C2161518 s_buffer_load_dword s45, s[20:23], 0x19 ; C2169519 s_buffer_load_dword s46, s[20:23], 0x1a ; C217151A s_buffer_load_dword s47, s[20:23], 0x37 ; C2179537 s_buffer_load_dword s48, s[20:23], 0x38 ; C2181538 s_buffer_load_dword s49, s[20:23], 0x39 ; C2189539 s_buffer_load_dword s50, s[20:23], 0x3a ; C219153A s_buffer_load_dword s51, s[20:23], 0x3b ; C219953B s_buffer_load_dword s52, s[20:23], 0x1b ; C21A151B s_buffer_load_dword s53, s[20:23], 0x1c ; C21A951C s_buffer_load_dword s54, s[20:23], 0x1d ; C21B151D s_buffer_load_dword s55, s[20:23], 0x1e ; C21B951E s_buffer_load_dword s56, s[20:23], 0x1f ; C21C151F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v13, s47 ; 7E1A022F s_buffer_load_dword s47, s[20:23], 0x44 ; C2179544 s_buffer_load_dword s57, s[20:23], 0x45 ; C21C9545 s_buffer_load_dword s58, s[20:23], 0x46 ; C21D1546 v_mov_b32_e32 v14, s50 ; 7E1C0232 s_buffer_load_dword s50, s[20:23], 0x47 ; C2191547 v_mov_b32_e32 v15, s51 ; 7E1E0233 s_buffer_load_dword s51, s[20:23], 0x48 ; C2199548 s_buffer_load_dword s59, s[20:23], 0x49 ; C21D9549 s_buffer_load_dword s60, s[20:23], 0x4a ; C21E154A s_buffer_load_dword s61, s[20:23], 0x4b ; C21E954B s_buffer_load_dword s62, s[20:23], 0x4c ; C21F154C s_buffer_load_dword s63, s[20:23], 0x4d ; C21F954D s_buffer_load_dword s64, s[20:23], 0x4e ; C220154E s_buffer_load_dword s20, s[20:23], 0x4f ; C20A154F v_mul_f32_e32 v16, s41, v2 ; 10200429 v_mac_f32_e32 v0, s34, v9 ; 3E001222 v_mul_f32_e32 v17, s42, v2 ; 1022042A v_mul_f32_e32 v18, s36, v2 ; 10240424 v_mul_f32_e32 v19, s43, v2 ; 1026042B v_mac_f32_e32 v13, s35, v10 ; 3E1A1423 v_mul_f32_e32 v20, s25, v6 ; 10280C19 v_mul_f32_e32 v21, s28, v6 ; 102A0C1C v_mul_f32_e32 v6, s31, v6 ; 100C0C1F v_mac_f32_e32 v16, s44, v3 ; 3E20062C v_mac_f32_e32 v17, s45, v3 ; 3E22062D v_mac_f32_e32 v18, s46, v3 ; 3E24062E v_mac_f32_e32 v20, s26, v7 ; 3E280E1A v_mac_f32_e32 v21, s29, v7 ; 3E2A0E1D v_cndmask_b32_e32 v9, v11, v9 ; 0012130B v_cndmask_b32_e32 v10, v12, v10 ; 0014150C v_mac_f32_e32 v6, s32, v7 ; 3E0C0E20 v_mac_f32_e32 v20, s27, v8 ; 3E28101B v_mac_f32_e32 v21, s30, v8 ; 3E2A101E v_mac_f32_e32 v6, s33, v8 ; 3E0C1021 v_mac_f32_e32 v19, s52, v3 ; 3E260634 v_mul_f32_e32 v7, s37, v2 ; 100E0425 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v7, s47, v3 ; 3E0E062F v_mul_f32_e32 v8, s38, v2 ; 10100426 v_mac_f32_e32 v8, s57, v3 ; 3E100639 v_mul_f32_e32 v11, s39, v2 ; 10160427 v_mac_f32_e32 v11, s58, v3 ; 3E16063A v_mul_f32_e32 v2, s40, v2 ; 10040428 v_mac_f32_e32 v2, s50, v3 ; 3E040632 v_mac_f32_e32 v16, s53, v4 ; 3E200835 v_mac_f32_e32 v17, s54, v4 ; 3E220836 v_mac_f32_e32 v18, s55, v4 ; 3E240837 v_mac_f32_e32 v19, s56, v4 ; 3E260838 v_mac_f32_e32 v7, s51, v4 ; 3E0E0833 v_mac_f32_e32 v8, s59, v4 ; 3E10083B v_mac_f32_e32 v11, s60, v4 ; 3E16083C v_mac_f32_e32 v2, s61, v4 ; 3E04083D v_mac_f32_e32 v16, s18, v5 ; 3E200A12 v_mac_f32_e32 v17, s19, v5 ; 3E220A13 v_mac_f32_e32 v18, s24, v5 ; 3E240A18 v_mac_f32_e32 v7, s62, v5 ; 3E0E0A3E v_mac_f32_e32 v8, s63, v5 ; 3E100A3F v_mac_f32_e32 v11, s64, v5 ; 3E160A40 v_mac_f32_e32 v2, s20, v5 ; 3E040A14 v_mul_f32_e32 v3, v20, v20 ; 10062914 v_mac_f32_e32 v3, v21, v21 ; 3E062B15 v_mac_f32_e32 v3, v6, v6 ; 3E060D06 v_rsq_clamp_f32_e32 v3, v3 ; 7E065903 v_mac_f32_e32 v14, s48, v9 ; 3E1C1230 v_mac_f32_e32 v15, s49, v10 ; 3E1E1431 exp 15, 32, 0, 0, 0, v0, v13, v14, v15 ; F800020F 0F0E0D00 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, v3, v20 ; 10002903 v_mul_f32_e32 v4, v3, v21 ; 10082B03 v_mul_f32_e32 v3, v3, v6 ; 10060D03 v_mul_f32_e32 v5, v3, v4 ; 100A0903 v_mul_f32_e32 v6, s14, v5 ; 100C0A0E v_mul_f32_e32 v9, s16, v5 ; 10120A10 v_mul_f32_e32 v5, s17, v5 ; 100A0A11 v_mul_f32_e32 v10, v4, v0 ; 10140104 v_mac_f32_e32 v6, s6, v10 ; 3E0C1406 v_mac_f32_e32 v9, s12, v10 ; 3E12140C v_mac_f32_e32 v5, s15, v10 ; 3E0A140F v_mul_f32_e32 v10, v3, v3 ; 10140703 v_mac_f32_e32 v6, s4, v10 ; 3E0C1404 v_mac_f32_e32 v9, s7, v10 ; 3E121407 v_mac_f32_e32 v5, s13, v10 ; 3E0A140D v_mul_f32_e32 v10, v0, v3 ; 10140700 v_mac_f32_e32 v6, s3, v10 ; 3E0C1403 v_mac_f32_e32 v9, s5, v10 ; 3E121405 v_mac_f32_e32 v5, s8, v10 ; 3E0A1408 v_mul_f32_e32 v10, v4, v4 ; 10140904 v_mad_f32 v10, v0, v0, -v10 ; D282000A 842A0100 v_mac_f32_e32 v6, s9, v10 ; 3E0C1409 v_mac_f32_e32 v9, s10, v10 ; 3E12140A v_mac_f32_e32 v5, s11, v10 ; 3E0A140B v_subrev_f32_e32 v10, s0, v16 ; 0A142000 v_subrev_f32_e32 v12, s1, v17 ; 0A182201 v_subrev_f32_e32 v13, s2, v18 ; 0A1A2402 exp 15, 33, 0, 0, 0, v0, v4, v3, v1 ; F800021F 01030400 exp 15, 34, 0, 0, 0, v6, v9, v5, v1 ; F800022F 01050906 exp 15, 35, 0, 0, 0, v11, v10, v12, v13 ; F800023F 0D0C0A0B exp 15, 36, 0, 0, 0, v16, v17, v18, v19 ; F800024F 13121110 exp 15, 12, 0, 1, 0, v7, v8, v11, v2 ; F80008CF 020B0807 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 72 VGPRS: 24 Code Size: 748 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL IN[4], GENERIC[4], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SVIEW[0], CUBE, FLOAT DCL SVIEW[1], CUBE, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL CONST[0..5] DCL CONST[8..19] DCL CONST[21..22] DCL CONST[24] DCL TEMP[0..17], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 2.0000, 0.5000} IMM[1] FLT32 { 0.7500, 7.0000, 1.0000, 10.0000} IMM[2] FLT32 { 0.9680, 0.0300, 0.0001, -1.0000} 0: DP3 TEMP[0].x, IN[1].xyzz, IN[1].xyzz 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MUL TEMP[0].xyz, IN[1].xyzz, TEMP[0].xxxx 3: DP3 TEMP[1].x, IN[3].yzww, IN[3].yzww 4: RSQ TEMP[1].x, TEMP[1].xxxx 5: MUL TEMP[1].xyz, IN[3].yzww, TEMP[1].xxxx 6: MOV TEMP[2].xy, IN[0].xyyy 7: TEX TEMP[2].xyz, TEMP[2], SAMP[2], 2D 8: MUL TEMP[2].xyz, CONST[19].xyzz, TEMP[2].xyzz 9: LRP TEMP[3].xyz, CONST[21].xxxx, TEMP[2].xyzz, CONST[16].xyzz 10: MUL TEMP[4].x, CONST[21].xxxx, CONST[16].wwww 11: ADD TEMP[4].x, CONST[16].wwww, -TEMP[4].xxxx 12: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[4].xxxx 13: MOV TEMP[5].xy, IN[0].xyyy 14: TEX TEMP[5].y, TEMP[5], SAMP[3], 2D 15: ADD TEMP[6].x, IMM[0].xxxx, -CONST[24].xxxx 16: MAD TEMP[5].x, TEMP[5].yyyy, CONST[24].xxxx, TEMP[6].xxxx 17: DP3 TEMP[6].x, TEMP[0].xyzz, CONST[0].xyzz 18: MAX TEMP[6].x, IMM[0].yyyy, TEMP[6].xxxx 19: MOV TEMP[7].xyz, IMM[0].yyyy 20: MOV TEMP[8].w, IMM[0].xxxx 21: MOV TEMP[8].xyz, TEMP[0].xyzx 22: DP4 TEMP[9].x, CONST[1], TEMP[8] 23: DP4 TEMP[10].x, CONST[2], TEMP[8] 24: MOV TEMP[9].y, TEMP[10].xxxx 25: DP4 TEMP[8].x, CONST[3], TEMP[8] 26: MOV TEMP[9].z, TEMP[8].xxxx 27: ADD TEMP[8].xyz, IN[2].xyzz, TEMP[9].xyzz 28: MUL TEMP[8].xyz, TEMP[8].xyzz, TEMP[5].xxxx 29: DP3 TEMP[9].x, TEMP[0].xyzz, TEMP[1].xyzz 30: MUL TEMP[9].xyz, TEMP[9].xxxx, TEMP[0].xyzz 31: MUL TEMP[9].xyz, IMM[0].zzzz, TEMP[9].xyzz 32: ADD TEMP[9].xyz, TEMP[1].xyzz, -TEMP[9].xyzz 33: MOV TEMP[10].xyz, TEMP[9].xyzx 34: FSLT TEMP[11].x, IMM[0].yyyy, CONST[10].wwww 35: UIF TEMP[11].xxxx :0 36: DP3 TEMP[11].x, TEMP[9].xyzz, TEMP[9].xyzz 37: RSQ TEMP[11].x, TEMP[11].xxxx 38: MUL TEMP[11].xyz, TEMP[9].xyzz, TEMP[11].xxxx 39: MOV TEMP[12].xyz, -IN[4].xyzx 40: ADD TEMP[13].xyz, CONST[8].xyzz, TEMP[12].xyzz 41: RCP TEMP[14].x, TEMP[11].xxxx 42: RCP TEMP[14].y, TEMP[11].yyyy 43: RCP TEMP[14].z, TEMP[11].zzzz 44: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[14].xyzz 45: ADD TEMP[12].xyz, CONST[9].xyzz, TEMP[12].xyzz 46: RCP TEMP[14].x, TEMP[11].xxxx 47: RCP TEMP[14].y, TEMP[11].yyyy 48: RCP TEMP[14].z, TEMP[11].zzzz 49: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[14].xyzz 50: FSLT TEMP[14].xyz, IMM[0].yyyy, TEMP[11].xyzz 51: UIF TEMP[14].xxxx :0 52: MOV TEMP[15].x, TEMP[13].xxxx 53: ELSE :0 54: MOV TEMP[15].x, TEMP[12].xxxx 55: ENDIF 56: UIF TEMP[14].yyyy :0 57: MOV TEMP[16].x, TEMP[13].yyyy 58: ELSE :0 59: MOV TEMP[16].x, TEMP[12].yyyy 60: ENDIF 61: UIF TEMP[14].zzzz :0 62: MOV TEMP[13].x, TEMP[13].zzzz 63: ELSE :0 64: MOV TEMP[13].x, TEMP[12].zzzz 65: ENDIF 66: ADD TEMP[12].xyz, CONST[8].xyzz, CONST[9].xyzz 67: MUL TEMP[12].xyz, TEMP[12].xyzz, IMM[0].wwww 68: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[16].xxxx 69: MIN TEMP[13].x, TEMP[14].xxxx, TEMP[13].xxxx 70: ADD TEMP[14].xyz, TEMP[12].xyzz, -CONST[10].xyzz 71: ADD TEMP[14].xyz, TEMP[14].xyzz, IN[4].xyzz 72: MAD TEMP[11].xyz, TEMP[11].xyzz, TEMP[13].xxxx, TEMP[14].xyzz 73: ADD TEMP[10].xyz, TEMP[11].xyzz, -TEMP[12].xyzz 74: ENDIF 75: ADD TEMP[11].x, IMM[0].xxxx, -CONST[22].xxxx 76: POW TEMP[11].x, TEMP[11].xxxx, IMM[1].xxxx 77: MUL TEMP[11].x, TEMP[11].xxxx, IMM[1].yyyy 78: MOV TEMP[10].xyz, TEMP[10].xyzz 79: MOV TEMP[10].w, TEMP[11].xxxx 80: TXL TEMP[10], TEMP[10], SAMP[0], CUBE 81: POW TEMP[11].x, TEMP[10].wwww, CONST[11].yyyy 82: MUL TEMP[11].x, CONST[11].xxxx, TEMP[11].xxxx 83: MUL TEMP[10].xyz, TEMP[11].xxxx, TEMP[10].xyzz 84: FSLT TEMP[11].x, CONST[9].wwww, IMM[1].zzzz 85: UIF TEMP[11].xxxx :0 86: MOV TEMP[11].xyz, TEMP[9].xyzx 87: FSLT TEMP[12].x, IMM[0].yyyy, CONST[14].wwww 88: UIF TEMP[12].xxxx :0 89: DP3 TEMP[12].x, TEMP[9].xyzz, TEMP[9].xyzz 90: RSQ TEMP[12].x, TEMP[12].xxxx 91: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[12].xxxx 92: MOV TEMP[12].xyz, -IN[4].xyzx 93: ADD TEMP[13].xyz, CONST[12].xyzz, TEMP[12].xyzz 94: RCP TEMP[14].x, TEMP[9].xxxx 95: RCP TEMP[14].y, TEMP[9].yyyy 96: RCP TEMP[14].z, TEMP[9].zzzz 97: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[14].xyzz 98: ADD TEMP[12].xyz, CONST[13].xyzz, TEMP[12].xyzz 99: RCP TEMP[14].x, TEMP[9].xxxx 100: RCP TEMP[14].y, TEMP[9].yyyy 101: RCP TEMP[14].z, TEMP[9].zzzz 102: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[14].xyzz 103: FSLT TEMP[14].xyz, IMM[0].yyyy, TEMP[9].xyzz 104: UIF TEMP[14].xxxx :0 105: MOV TEMP[15].x, TEMP[13].xxxx 106: ELSE :0 107: MOV TEMP[15].x, TEMP[12].xxxx 108: ENDIF 109: UIF TEMP[14].yyyy :0 110: MOV TEMP[16].x, TEMP[13].yyyy 111: ELSE :0 112: MOV TEMP[16].x, TEMP[12].yyyy 113: ENDIF 114: UIF TEMP[14].zzzz :0 115: MOV TEMP[13].x, TEMP[13].zzzz 116: ELSE :0 117: MOV TEMP[13].x, TEMP[12].zzzz 118: ENDIF 119: ADD TEMP[12].xyz, CONST[12].xyzz, CONST[13].xyzz 120: MUL TEMP[12].xyz, TEMP[12].xyzz, IMM[0].wwww 121: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[16].xxxx 122: MIN TEMP[13].x, TEMP[14].xxxx, TEMP[13].xxxx 123: ADD TEMP[14].xyz, TEMP[12].xyzz, -CONST[14].xyzz 124: ADD TEMP[14].xyz, TEMP[14].xyzz, IN[4].xyzz 125: MAD TEMP[9].xyz, TEMP[9].xyzz, TEMP[13].xxxx, TEMP[14].xyzz 126: ADD TEMP[11].xyz, TEMP[9].xyzz, -TEMP[12].xyzz 127: ENDIF 128: ADD TEMP[9].x, IMM[0].xxxx, -CONST[22].xxxx 129: POW TEMP[9].x, TEMP[9].xxxx, IMM[1].xxxx 130: MUL TEMP[9].x, TEMP[9].xxxx, IMM[1].yyyy 131: MOV TEMP[11].xyz, TEMP[11].xyzz 132: MOV TEMP[11].w, TEMP[9].xxxx 133: TXL TEMP[9], TEMP[11], SAMP[1], CUBE 134: POW TEMP[11].x, TEMP[9].wwww, CONST[15].yyyy 135: MUL TEMP[11].x, CONST[15].xxxx, TEMP[11].xxxx 136: MUL TEMP[9].xyz, TEMP[11].xxxx, TEMP[9].xyzz 137: LRP TEMP[7].xyz, CONST[9].wwww, TEMP[10].xyzz, TEMP[9].xyzz 138: ELSE :0 139: MOV TEMP[7].xyz, TEMP[10].xyzx 140: ENDIF 141: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[5].xxxx 142: MOV TEMP[1].xyz, -TEMP[1].xyzx 143: ADD TEMP[5].x, IMM[0].xxxx, -CONST[22].xxxx 144: ADD TEMP[9].xyz, CONST[0].xyzz, TEMP[1].xyzz 145: DP3 TEMP[10].x, TEMP[9].xyzz, TEMP[9].xyzz 146: RSQ TEMP[10].x, TEMP[10].xxxx 147: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[10].xxxx 148: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[1].xyzz 149: MAX TEMP[1].x, IMM[0].yyyy, TEMP[1].xxxx 150: DP3 TEMP[10].x, CONST[0].xyzz, TEMP[9].xyzz 151: MAX TEMP[10].x, IMM[0].yyyy, TEMP[10].xxxx 152: MUL TEMP[11].x, TEMP[5].xxxx, TEMP[5].xxxx 153: MUL TEMP[11].x, TEMP[11].xxxx, CONST[18].wwww 154: ADD TEMP[12].x, IMM[0].xxxx, -TEMP[5].xxxx 155: MAD TEMP[12].x, TEMP[12].xxxx, IMM[2].xxxx, IMM[2].yyyy 156: LG2 TEMP[12].x, TEMP[12].xxxx 157: RCP TEMP[12].x, TEMP[12].xxxx 158: MUL TEMP[12].x, IMM[1].wwww, TEMP[12].xxxx 159: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[12].xxxx 160: ADD TEMP[13].x, IMM[0].xxxx, -TEMP[6].xxxx 161: ADD TEMP[14].x, IMM[0].xxxx, -TEMP[1].xxxx 162: MUL TEMP[15].x, IMM[0].zzzz, TEMP[10].xxxx 163: MUL TEMP[5].x, TEMP[10].xxxx, TEMP[5].xxxx 164: MAD TEMP[5].x, TEMP[15].xxxx, TEMP[5].xxxx, IMM[0].wwww 165: ADD TEMP[10].x, IMM[0].xxxx, -TEMP[10].xxxx 166: ADD TEMP[15].x, IMM[0].xxxx, -TEMP[1].xxxx 167: ADD TEMP[4].x, IMM[0].xxxx, -TEMP[4].xxxx 168: ADD TEMP[4].x, CONST[22].xxxx, TEMP[4].xxxx 169: MOV_SAT TEMP[4].x, TEMP[4].xxxx 170: MUL TEMP[16].x, TEMP[15].xxxx, TEMP[15].xxxx 171: MUL TEMP[17].x, TEMP[15].xxxx, TEMP[15].xxxx 172: MUL TEMP[15].x, TEMP[17].xxxx, TEMP[15].xxxx 173: MUL TEMP[15].x, TEMP[16].xxxx, TEMP[15].xxxx 174: LRP TEMP[4].xyz, TEMP[15].xxxx, TEMP[4].xxxx, TEMP[3].xyzz 175: LRP TEMP[15].x, TEMP[6].xxxx, IMM[0].xxxx, TEMP[11].xxxx 176: LRP TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx, TEMP[11].xxxx 177: MAD TEMP[1].x, TEMP[15].xxxx, TEMP[1].xxxx, IMM[2].zzzz 178: RCP TEMP[1].x, TEMP[1].xxxx 179: DP3 TEMP[9].x, TEMP[0].xyzz, TEMP[9].xyzz 180: MAX TEMP[9].x, IMM[0].yyyy, TEMP[9].xxxx 181: POW TEMP[9].x, TEMP[9].xxxx, TEMP[12].xxxx 182: ADD TEMP[11].x, TEMP[12].xxxx, IMM[0].xxxx 183: MUL TEMP[11].x, TEMP[11].xxxx, CONST[18].yyyy 184: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[11].xxxx 185: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[9].xxxx 186: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[6].xxxx 187: MUL TEMP[1].x, TEMP[1].xxxx, CONST[18].xxxx 188: MAX TEMP[1].x, IMM[0].yyyy, TEMP[1].xxxx 189: MUL TEMP[1].xyz, TEMP[1].xxxx, CONST[17].xyzz 190: ADD TEMP[9].xyz, IMM[0].xxxx, -TEMP[3].xyzz 191: MUL TEMP[11].x, TEMP[10].xxxx, TEMP[10].xxxx 192: MUL TEMP[12].x, TEMP[10].xxxx, TEMP[10].xxxx 193: MUL TEMP[10].x, TEMP[12].xxxx, TEMP[10].xxxx 194: MUL TEMP[10].x, TEMP[11].xxxx, TEMP[10].xxxx 195: MAD TEMP[3].xyz, TEMP[9].xyzz, TEMP[10].xxxx, TEMP[3].xyzz 196: ADD TEMP[9].x, TEMP[5].xxxx, IMM[2].wwww 197: MUL TEMP[10].x, TEMP[13].xxxx, TEMP[13].xxxx 198: MUL TEMP[11].x, TEMP[13].xxxx, TEMP[13].xxxx 199: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[13].xxxx 200: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[11].xxxx 201: MAD TEMP[9].x, TEMP[9].xxxx, TEMP[10].xxxx, IMM[0].xxxx 202: ADD TEMP[5].x, TEMP[5].xxxx, IMM[2].wwww 203: MUL TEMP[10].x, TEMP[14].xxxx, TEMP[14].xxxx 204: MUL TEMP[11].x, TEMP[14].xxxx, TEMP[14].xxxx 205: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[14].xxxx 206: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[11].xxxx 207: MAD TEMP[5].x, TEMP[5].xxxx, TEMP[10].xxxx, IMM[0].xxxx 208: MUL TEMP[5].x, TEMP[9].xxxx, TEMP[5].xxxx 209: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[6].xxxx 210: MAD TEMP[5].xyz, CONST[17].xyzz, TEMP[5].xxxx, TEMP[8].xyzz 211: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[5].xyzz 212: MAD TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xyzz, TEMP[2].xyzz 213: MAD TEMP[0].xyz, TEMP[7].xyzz, TEMP[4].xyzz, TEMP[1].xyzz 214: MOV TEMP[0].xyz, TEMP[0].xyzx 215: MAD TEMP[1].x, IN[3].xxxx, CONST[5].zzzz, CONST[5].wwww 216: MOV_SAT TEMP[1].x, TEMP[1].xxxx 217: LRP TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[4].xyzz 218: MOV TEMP[0].xyz, TEMP[0].xyzx 219: MOV TEMP[0].w, IMM[0].xxxx 220: MOV OUT[0], TEMP[0] 221: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 172) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200) %57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208) %58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212) %59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216) %60 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224) %61 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228) %62 = call float @llvm.SI.load.const(<16 x i8> %23, i32 232) %63 = call float @llvm.SI.load.const(<16 x i8> %23, i32 236) %64 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240) %65 = call float @llvm.SI.load.const(<16 x i8> %23, i32 244) %66 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256) %67 = call float @llvm.SI.load.const(<16 x i8> %23, i32 260) %68 = call float @llvm.SI.load.const(<16 x i8> %23, i32 264) %69 = call float @llvm.SI.load.const(<16 x i8> %23, i32 268) %70 = call float @llvm.SI.load.const(<16 x i8> %23, i32 272) %71 = call float @llvm.SI.load.const(<16 x i8> %23, i32 276) %72 = call float @llvm.SI.load.const(<16 x i8> %23, i32 280) %73 = call float @llvm.SI.load.const(<16 x i8> %23, i32 288) %74 = call float @llvm.SI.load.const(<16 x i8> %23, i32 292) %75 = call float @llvm.SI.load.const(<16 x i8> %23, i32 300) %76 = call float @llvm.SI.load.const(<16 x i8> %23, i32 304) %77 = call float @llvm.SI.load.const(<16 x i8> %23, i32 308) %78 = call float @llvm.SI.load.const(<16 x i8> %23, i32 312) %79 = call float @llvm.SI.load.const(<16 x i8> %23, i32 336) %80 = call float @llvm.SI.load.const(<16 x i8> %23, i32 352) %81 = call float @llvm.SI.load.const(<16 x i8> %23, i32 384) %82 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %83 = load <32 x i8>, <32 x i8> addrspace(2)* %82, align 32, !tbaa !0 %84 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %85 = load <16 x i8>, <16 x i8> addrspace(2)* %84, align 16, !tbaa !0 %86 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %87 = bitcast <8 x i32> addrspace(2)* %86 to <32 x i8> addrspace(2)* %88 = load <32 x i8>, <32 x i8> addrspace(2)* %87, align 32, !tbaa !0 %89 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %90 = bitcast <4 x i32> addrspace(2)* %89 to <16 x i8> addrspace(2)* %91 = load <16 x i8>, <16 x i8> addrspace(2)* %90, align 16, !tbaa !0 %92 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %93 = bitcast <8 x i32> addrspace(2)* %92 to <32 x i8> addrspace(2)* %94 = load <32 x i8>, <32 x i8> addrspace(2)* %93, align 32, !tbaa !0 %95 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %96 = bitcast <4 x i32> addrspace(2)* %95 to <16 x i8> addrspace(2)* %97 = load <16 x i8>, <16 x i8> addrspace(2)* %96, align 16, !tbaa !0 %98 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %99 = bitcast <8 x i32> addrspace(2)* %98 to <32 x i8> addrspace(2)* %100 = load <32 x i8>, <32 x i8> addrspace(2)* %99, align 32, !tbaa !0 %101 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %102 = bitcast <4 x i32> addrspace(2)* %101 to <16 x i8> addrspace(2)* %103 = load <16 x i8>, <16 x i8> addrspace(2)* %102, align 16, !tbaa !0 %104 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %105 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %106 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %107 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %108 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %109 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %110 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %111 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %112 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %113 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %114 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %115 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7) %116 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %117 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %118 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %119 = fmul float %106, %106 %120 = fmul float %107, %107 %121 = fadd float %120, %119 %122 = fmul float %108, %108 %123 = fadd float %121, %122 %124 = call float @llvm.AMDGPU.rsq.clamped.f32(float %123) %125 = fmul float %106, %124 %126 = fmul float %107, %124 %127 = fmul float %108, %124 %128 = fmul float %113, %113 %129 = fmul float %114, %114 %130 = fadd float %129, %128 %131 = fmul float %115, %115 %132 = fadd float %130, %131 %133 = call float @llvm.AMDGPU.rsq.clamped.f32(float %132) %134 = fmul float %113, %133 %135 = fmul float %114, %133 %136 = fmul float %115, %133 %137 = bitcast float %104 to i32 %138 = bitcast float %105 to i32 %139 = insertelement <2 x i32> undef, i32 %137, i32 0 %140 = insertelement <2 x i32> %139, i32 %138, i32 1 %141 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %140, <32 x i8> %94, <16 x i8> %97, i32 2) %142 = extractelement <4 x float> %141, i32 0 %143 = extractelement <4 x float> %141, i32 1 %144 = extractelement <4 x float> %141, i32 2 %145 = fmul float %76, %142 %146 = fmul float %77, %143 %147 = fmul float %78, %144 %148 = call float @llvm.AMDGPU.lrp(float %79, float %145, float %66) %149 = call float @llvm.AMDGPU.lrp(float %79, float %146, float %67) %150 = call float @llvm.AMDGPU.lrp(float %79, float %147, float %68) %151 = fmul float %79, %69 %152 = fsub float %69, %151 %153 = fmul float %145, %152 %154 = fmul float %146, %152 %155 = fmul float %147, %152 %156 = bitcast float %104 to i32 %157 = bitcast float %105 to i32 %158 = insertelement <2 x i32> undef, i32 %156, i32 0 %159 = insertelement <2 x i32> %158, i32 %157, i32 1 %160 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %159, <32 x i8> %100, <16 x i8> %103, i32 2) %161 = extractelement <4 x float> %160, i32 1 %162 = fsub float 1.000000e+00, %81 %163 = fmul float %161, %81 %164 = fadd float %163, %162 %165 = fmul float %125, %24 %166 = fmul float %126, %25 %167 = fadd float %166, %165 %168 = fmul float %127, %26 %169 = fadd float %167, %168 %170 = call float @llvm.maxnum.f32(float %169, float 0.000000e+00) %171 = fmul float %27, %125 %172 = fmul float %28, %126 %173 = fadd float %171, %172 %174 = fmul float %29, %127 %175 = fadd float %173, %174 %176 = fadd float %175, %30 %177 = fmul float %31, %125 %178 = fmul float %32, %126 %179 = fadd float %177, %178 %180 = fmul float %33, %127 %181 = fadd float %179, %180 %182 = fadd float %181, %34 %183 = fmul float %35, %125 %184 = fmul float %36, %126 %185 = fadd float %183, %184 %186 = fmul float %37, %127 %187 = fadd float %185, %186 %188 = fadd float %187, %38 %189 = fadd float %109, %176 %190 = fadd float %110, %182 %191 = fadd float %111, %188 %192 = fmul float %189, %164 %193 = fmul float %190, %164 %194 = fmul float %191, %164 %195 = fmul float %125, %134 %196 = fmul float %126, %135 %197 = fadd float %196, %195 %198 = fmul float %127, %136 %199 = fadd float %197, %198 %200 = fmul float %199, %125 %201 = fmul float %199, %126 %202 = fmul float %199, %127 %203 = fmul float %200, 2.000000e+00 %204 = fmul float %201, 2.000000e+00 %205 = fmul float %202, 2.000000e+00 %206 = fsub float %134, %203 %207 = fsub float %135, %204 %208 = fsub float %136, %205 %209 = fcmp ogt float %51, 0.000000e+00 br i1 %209, label %IF, label %ENDIF IF: ; preds = %main_body %210 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %211 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %212 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %213 = fmul float %206, %206 %214 = fmul float %207, %207 %215 = fadd float %214, %213 %216 = fmul float %208, %208 %217 = fadd float %215, %216 %218 = call float @llvm.AMDGPU.rsq.clamped.f32(float %217) %219 = fmul float %206, %218 %220 = fmul float %207, %218 %221 = fmul float %208, %218 %222 = fsub float %44, %116 %223 = fsub float %45, %117 %224 = fsub float %46, %118 %225 = fdiv float 1.000000e+00, %219 %226 = fdiv float 1.000000e+00, %220 %227 = fdiv float 1.000000e+00, %221 %228 = fmul float %222, %225 %229 = fmul float %223, %226 %230 = fmul float %224, %227 %231 = fsub float %47, %116 %232 = fsub float %48, %117 %233 = fsub float %49, %118 %234 = fdiv float 1.000000e+00, %219 %235 = fdiv float 1.000000e+00, %220 %236 = fdiv float 1.000000e+00, %221 %237 = fmul float %231, %234 %238 = fmul float %232, %235 %239 = fmul float %233, %236 %240 = fcmp ogt float %219, 0.000000e+00 %241 = fcmp ogt float %220, 0.000000e+00 %242 = fcmp ogt float %221, 0.000000e+00 %. = select i1 %240, float %228, float %237 %temp64.0 = select i1 %241, float %229, float %238 %.96 = select i1 %242, float %230, float %239 %243 = fadd float %44, %47 %244 = fadd float %45, %48 %245 = fadd float %46, %49 %246 = fmul float %243, 5.000000e-01 %247 = fmul float %244, 5.000000e-01 %248 = fmul float %245, 5.000000e-01 %249 = call float @llvm.minnum.f32(float %., float %temp64.0) %250 = call float @llvm.minnum.f32(float %249, float %.96) %251 = fsub float %246, %212 %252 = fsub float %247, %211 %253 = fsub float %248, %210 %254 = fadd float %251, %116 %255 = fadd float %252, %117 %256 = fadd float %253, %118 %257 = fmul float %219, %250 %258 = fadd float %257, %254 %259 = fmul float %220, %250 %260 = fadd float %259, %255 %261 = fmul float %221, %250 %262 = fadd float %261, %256 %263 = fsub float %258, %246 %264 = fsub float %260, %247 %265 = fsub float %262, %248 br label %ENDIF ENDIF: ; preds = %main_body, %IF %temp40.0 = phi float [ %263, %IF ], [ %206, %main_body ] %temp41.0 = phi float [ %264, %IF ], [ %207, %main_body ] %temp42.0 = phi float [ %265, %IF ], [ %208, %main_body ] %266 = fsub float 1.000000e+00, %80 %267 = call float @llvm.pow.f32(float %266, float 7.500000e-01) %268 = fmul float %267, 7.000000e+00 %269 = insertelement <4 x float> undef, float %temp40.0, i32 0 %270 = insertelement <4 x float> %269, float %temp41.0, i32 1 %271 = insertelement <4 x float> %270, float %temp42.0, i32 2 %272 = insertelement <4 x float> %271, float %268, i32 3 %273 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %272) %274 = extractelement <4 x float> %273, i32 0 %275 = extractelement <4 x float> %273, i32 1 %276 = extractelement <4 x float> %273, i32 2 %277 = extractelement <4 x float> %273, i32 3 %278 = call float @llvm.fabs.f32(float %276) %279 = fdiv float 1.000000e+00, %278 %280 = fmul float %274, %279 %281 = fadd float %280, 1.500000e+00 %282 = fmul float %275, %279 %283 = fadd float %282, 1.500000e+00 %284 = bitcast float %283 to i32 %285 = bitcast float %281 to i32 %286 = bitcast float %277 to i32 %287 = bitcast float %268 to i32 %288 = insertelement <4 x i32> undef, i32 %284, i32 0 %289 = insertelement <4 x i32> %288, i32 %285, i32 1 %290 = insertelement <4 x i32> %289, i32 %286, i32 2 %291 = insertelement <4 x i32> %290, i32 %287, i32 3 %292 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %291, <32 x i8> %83, <16 x i8> %85, i32 4) %293 = extractelement <4 x float> %292, i32 0 %294 = extractelement <4 x float> %292, i32 1 %295 = extractelement <4 x float> %292, i32 2 %296 = extractelement <4 x float> %292, i32 3 %297 = call float @llvm.pow.f32(float %296, float %53) %298 = fmul float %52, %297 %299 = fmul float %298, %293 %300 = fmul float %298, %294 %301 = fmul float %298, %295 %302 = fcmp olt float %50, 0x3FEFFFEB00000000 br i1 %302, label %IF82, label %ENDIF81 IF82: ; preds = %ENDIF %303 = fcmp ogt float %63, 0.000000e+00 br i1 %303, label %IF85, label %ENDIF84 ENDIF81: ; preds = %ENDIF, %ENDIF84 %temp28.0 = phi float [ %531, %ENDIF84 ], [ %299, %ENDIF ] %temp29.0 = phi float [ %532, %ENDIF84 ], [ %300, %ENDIF ] %temp30.0 = phi float [ %533, %ENDIF84 ], [ %301, %ENDIF ] %304 = fmul float %temp28.0, %164 %305 = fmul float %temp29.0, %164 %306 = fmul float %temp30.0, %164 %307 = fsub float 1.000000e+00, %80 %308 = fsub float %24, %134 %309 = fsub float %25, %135 %310 = fsub float %26, %136 %311 = fmul float %308, %308 %312 = fmul float %309, %309 %313 = fadd float %312, %311 %314 = fmul float %310, %310 %315 = fadd float %313, %314 %316 = call float @llvm.AMDGPU.rsq.clamped.f32(float %315) %317 = fmul float %308, %316 %318 = fmul float %309, %316 %319 = fmul float %310, %316 %320 = fmul float %134, %125 %321 = fsub float -0.000000e+00, %320 %322 = fmul float %135, %126 %323 = fsub float %321, %322 %324 = fmul float %136, %127 %325 = fsub float %323, %324 %326 = call float @llvm.maxnum.f32(float %325, float 0.000000e+00) %327 = fmul float %24, %317 %328 = fmul float %25, %318 %329 = fadd float %328, %327 %330 = fmul float %26, %319 %331 = fadd float %329, %330 %332 = call float @llvm.maxnum.f32(float %331, float 0.000000e+00) %333 = fmul float %307, %307 %334 = fmul float %333, %75 %335 = fsub float 1.000000e+00, %307 %336 = fmul float %335, 0x3FEEF9DB20000000 %337 = fadd float %336, 0x3F9EB851E0000000 %338 = call float @llvm.log2.f32(float %337) %339 = fdiv float 1.000000e+00, %338 %340 = fmul float %339, 1.000000e+01 %341 = fmul float %340, %340 %342 = fsub float 1.000000e+00, %170 %343 = fsub float 1.000000e+00, %326 %344 = fmul float %332, 2.000000e+00 %345 = fmul float %332, %307 %346 = fmul float %344, %345 %347 = fadd float %346, 5.000000e-01 %348 = fsub float 1.000000e+00, %332 %349 = fsub float 1.000000e+00, %326 %350 = fsub float 1.000000e+00, %152 %351 = fadd float %80, %350 %352 = call float @llvm.AMDIL.clamp.(float %351, float 0.000000e+00, float 1.000000e+00) %353 = fmul float %349, %349 %354 = fmul float %349, %349 %355 = fmul float %354, %349 %356 = fmul float %353, %355 %357 = call float @llvm.AMDGPU.lrp(float %356, float %352, float %148) %358 = call float @llvm.AMDGPU.lrp(float %356, float %352, float %149) %359 = call float @llvm.AMDGPU.lrp(float %356, float %352, float %150) %360 = call float @llvm.AMDGPU.lrp(float %170, float 1.000000e+00, float %334) %361 = call float @llvm.AMDGPU.lrp(float %326, float 1.000000e+00, float %334) %362 = fmul float %360, %361 %363 = fadd float %362, 0x3F1A36E2E0000000 %364 = fdiv float 1.000000e+00, %363 %365 = fmul float %125, %317 %366 = fmul float %126, %318 %367 = fadd float %366, %365 %368 = fmul float %127, %319 %369 = fadd float %367, %368 %370 = call float @llvm.maxnum.f32(float %369, float 0.000000e+00) %371 = call float @llvm.pow.f32(float %370, float %341) %372 = fadd float %341, 1.000000e+00 %373 = fmul float %372, %74 %374 = fmul float %371, %373 %375 = fmul float %364, %374 %376 = fmul float %375, %170 %377 = fmul float %376, %73 %378 = call float @llvm.maxnum.f32(float %377, float 0.000000e+00) %379 = fmul float %378, %70 %380 = fmul float %378, %71 %381 = fmul float %378, %72 %382 = fsub float 1.000000e+00, %148 %383 = fsub float 1.000000e+00, %149 %384 = fsub float 1.000000e+00, %150 %385 = fmul float %348, %348 %386 = fmul float %348, %348 %387 = fmul float %386, %348 %388 = fmul float %385, %387 %389 = fmul float %382, %388 %390 = fadd float %389, %148 %391 = fmul float %383, %388 %392 = fadd float %391, %149 %393 = fmul float %384, %388 %394 = fadd float %393, %150 %395 = fadd float %347, -1.000000e+00 %396 = fmul float %342, %342 %397 = fmul float %342, %342 %398 = fmul float %397, %342 %399 = fmul float %396, %398 %400 = fmul float %395, %399 %401 = fadd float %400, 1.000000e+00 %402 = fadd float %347, -1.000000e+00 %403 = fmul float %343, %343 %404 = fmul float %343, %343 %405 = fmul float %404, %343 %406 = fmul float %403, %405 %407 = fmul float %402, %406 %408 = fadd float %407, 1.000000e+00 %409 = fmul float %401, %408 %410 = fmul float %409, %170 %411 = fmul float %70, %410 %412 = fadd float %411, %192 %413 = fmul float %71, %410 %414 = fadd float %413, %193 %415 = fmul float %72, %410 %416 = fadd float %415, %194 %417 = fmul float %153, %412 %418 = fmul float %154, %414 %419 = fmul float %155, %416 %420 = fmul float %379, %390 %421 = fadd float %420, %417 %422 = fmul float %380, %392 %423 = fadd float %422, %418 %424 = fmul float %381, %394 %425 = fadd float %424, %419 %426 = fmul float %304, %357 %427 = fadd float %426, %421 %428 = fmul float %305, %358 %429 = fadd float %428, %423 %430 = fmul float %306, %359 %431 = fadd float %430, %425 %432 = fmul float %112, %42 %433 = fadd float %432, %43 %434 = call float @llvm.AMDIL.clamp.(float %433, float 0.000000e+00, float 1.000000e+00) %435 = call float @llvm.AMDGPU.lrp(float %434, float %427, float %39) %436 = call float @llvm.AMDGPU.lrp(float %434, float %429, float %40) %437 = call float @llvm.AMDGPU.lrp(float %434, float %431, float %41) %438 = call i32 @llvm.SI.packf16(float %435, float %436) %439 = bitcast i32 %438 to float %440 = call i32 @llvm.SI.packf16(float %437, float 1.000000e+00) %441 = bitcast i32 %440 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %439, float %441, float %439, float %441) ret void IF85: ; preds = %IF82 %442 = fmul float %206, %206 %443 = fmul float %207, %207 %444 = fadd float %443, %442 %445 = fmul float %208, %208 %446 = fadd float %444, %445 %447 = call float @llvm.AMDGPU.rsq.clamped.f32(float %446) %448 = fmul float %206, %447 %449 = fmul float %207, %447 %450 = fmul float %208, %447 %451 = fsub float %54, %116 %452 = fsub float %55, %117 %453 = fsub float %56, %118 %454 = fdiv float 1.000000e+00, %448 %455 = fdiv float 1.000000e+00, %449 %456 = fdiv float 1.000000e+00, %450 %457 = fmul float %451, %454 %458 = fmul float %452, %455 %459 = fmul float %453, %456 %460 = fsub float %57, %116 %461 = fsub float %58, %117 %462 = fsub float %59, %118 %463 = fdiv float 1.000000e+00, %448 %464 = fdiv float 1.000000e+00, %449 %465 = fdiv float 1.000000e+00, %450 %466 = fmul float %460, %463 %467 = fmul float %461, %464 %468 = fmul float %462, %465 %469 = fcmp ogt float %448, 0.000000e+00 %470 = fcmp ogt float %449, 0.000000e+00 %471 = fcmp ogt float %450, 0.000000e+00 %.97 = select i1 %469, float %457, float %466 %temp64.1 = select i1 %470, float %458, float %467 %.98 = select i1 %471, float %459, float %468 %472 = fadd float %54, %57 %473 = fadd float %55, %58 %474 = fadd float %56, %59 %475 = fmul float %472, 5.000000e-01 %476 = fmul float %473, 5.000000e-01 %477 = fmul float %474, 5.000000e-01 %478 = call float @llvm.minnum.f32(float %.97, float %temp64.1) %479 = call float @llvm.minnum.f32(float %478, float %.98) %480 = fsub float %475, %60 %481 = fsub float %476, %61 %482 = fsub float %477, %62 %483 = fadd float %480, %116 %484 = fadd float %481, %117 %485 = fadd float %482, %118 %486 = fmul float %448, %479 %487 = fadd float %486, %483 %488 = fmul float %449, %479 %489 = fadd float %488, %484 %490 = fmul float %450, %479 %491 = fadd float %490, %485 %492 = fsub float %487, %475 %493 = fsub float %489, %476 %494 = fsub float %491, %477 br label %ENDIF84 ENDIF84: ; preds = %IF82, %IF85 %temp44.0 = phi float [ %492, %IF85 ], [ %206, %IF82 ] %temp45.0 = phi float [ %493, %IF85 ], [ %207, %IF82 ] %temp46.0 = phi float [ %494, %IF85 ], [ %208, %IF82 ] %495 = fsub float 1.000000e+00, %80 %496 = call float @llvm.pow.f32(float %495, float 7.500000e-01) %497 = fmul float %496, 7.000000e+00 %498 = insertelement <4 x float> undef, float %temp44.0, i32 0 %499 = insertelement <4 x float> %498, float %temp45.0, i32 1 %500 = insertelement <4 x float> %499, float %temp46.0, i32 2 %501 = insertelement <4 x float> %500, float %497, i32 3 %502 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %501) %503 = extractelement <4 x float> %502, i32 0 %504 = extractelement <4 x float> %502, i32 1 %505 = extractelement <4 x float> %502, i32 2 %506 = extractelement <4 x float> %502, i32 3 %507 = call float @llvm.fabs.f32(float %505) %508 = fdiv float 1.000000e+00, %507 %509 = fmul float %503, %508 %510 = fadd float %509, 1.500000e+00 %511 = fmul float %504, %508 %512 = fadd float %511, 1.500000e+00 %513 = bitcast float %512 to i32 %514 = bitcast float %510 to i32 %515 = bitcast float %506 to i32 %516 = bitcast float %497 to i32 %517 = insertelement <4 x i32> undef, i32 %513, i32 0 %518 = insertelement <4 x i32> %517, i32 %514, i32 1 %519 = insertelement <4 x i32> %518, i32 %515, i32 2 %520 = insertelement <4 x i32> %519, i32 %516, i32 3 %521 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %520, <32 x i8> %88, <16 x i8> %91, i32 4) %522 = extractelement <4 x float> %521, i32 0 %523 = extractelement <4 x float> %521, i32 1 %524 = extractelement <4 x float> %521, i32 2 %525 = extractelement <4 x float> %521, i32 3 %526 = call float @llvm.pow.f32(float %525, float %65) %527 = fmul float %64, %526 %528 = fmul float %527, %522 %529 = fmul float %527, %523 %530 = fmul float %527, %524 %531 = call float @llvm.AMDGPU.lrp(float %50, float %299, float %528) %532 = call float @llvm.AMDGPU.lrp(float %50, float %300, float %529) %533 = call float @llvm.AMDGPU.lrp(float %50, float %301, float %530) br label %ENDIF81 } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v13, v0, 0, 0, [m0] ; C8340000 v_interp_p2_f32 v13, [v13], v1, 0, 0, [m0] ; C8350001 v_interp_p1_f32 v14, v0, 1, 0, [m0] ; C8380100 v_interp_p2_f32 v14, [v14], v1, 1, 0, [m0] ; C8390101 v_interp_p1_f32 v2, v0, 0, 1, [m0] ; C8080400 v_interp_p2_f32 v2, [v2], v1, 0, 1, [m0] ; C8090401 v_interp_p1_f32 v3, v0, 1, 1, [m0] ; C80C0500 v_interp_p2_f32 v3, [v3], v1, 1, 1, [m0] ; C80D0501 v_interp_p1_f32 v6, v0, 2, 1, [m0] ; C8180600 v_interp_p2_f32 v6, [v6], v1, 2, 1, [m0] ; C8190601 v_interp_p1_f32 v10, v0, 0, 2, [m0] ; C8280800 v_interp_p2_f32 v10, [v10], v1, 0, 2, [m0] ; C8290801 v_interp_p1_f32 v11, v0, 1, 2, [m0] ; C82C0900 v_interp_p2_f32 v11, [v11], v1, 1, 2, [m0] ; C82D0901 v_interp_p1_f32 v12, v0, 2, 2, [m0] ; C8300A00 v_interp_p2_f32 v12, [v12], v1, 2, 2, [m0] ; C8310A01 v_interp_p1_f32 v7, v0, 0, 3, [m0] ; C81C0C00 v_interp_p2_f32 v7, [v7], v1, 0, 3, [m0] ; C81D0C01 v_interp_p1_f32 v15, v0, 1, 3, [m0] ; C83C0D00 v_interp_p2_f32 v15, [v15], v1, 1, 3, [m0] ; C83D0D01 v_interp_p1_f32 v16, v0, 2, 3, [m0] ; C8400E00 v_interp_p2_f32 v16, [v16], v1, 2, 3, [m0] ; C8410E01 v_interp_p1_f32 v17, v0, 3, 3, [m0] ; C8440F00 v_interp_p2_f32 v17, [v17], v1, 3, 3, [m0] ; C8450F01 v_mul_f32_e32 v4, v2, v2 ; 10080502 v_mac_f32_e32 v4, v3, v3 ; 3E080703 v_mac_f32_e32 v4, v6, v6 ; 3E080D06 v_rsq_clamp_f32_e32 v8, v4 ; 7E105904 v_mul_f32_e32 v4, v15, v15 ; 10081F0F v_mac_f32_e32 v4, v16, v16 ; 3E082110 v_mac_f32_e32 v4, v17, v17 ; 3E082311 v_rsq_clamp_f32_e32 v18, v4 ; 7E245904 v_mul_f32_e32 v5, v8, v2 ; 100A0508 v_mul_f32_e32 v4, v8, v3 ; 10080708 v_mul_f32_e32 v3, v8, v6 ; 10060D08 v_mul_f32_e32 v9, v18, v15 ; 10121F12 v_mul_f32_e32 v8, v18, v16 ; 10102112 v_mul_f32_e32 v2, v9, v5 ; 10040B09 v_mac_f32_e32 v2, v8, v4 ; 3E040908 v_mul_f32_e32 v6, v18, v17 ; 100C2312 v_mac_f32_e32 v2, v6, v3 ; 3E040706 v_mul_f32_e32 v19, v5, v2 ; 10260505 v_mac_f32_e32 v19, v5, v2 ; 3E260505 v_mul_f32_e32 v21, v4, v2 ; 102A0504 v_mac_f32_e32 v21, v4, v2 ; 3E2A0504 v_mad_f32 v20, v15, v18, -v19 ; D2820014 844E250F v_mad_f32 v21, v16, v18, -v21 ; D2820015 84562510 v_mul_f32_e32 v15, v3, v2 ; 101E0503 v_mac_f32_e32 v15, v3, v2 ; 3E1E0503 s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300 v_mad_f32 v22, v17, v18, -v15 ; D2820016 843E2511 v_interp_p1_f32 v24, v0, 0, 4, [m0] ; C8601000 v_interp_p2_f32 v24, [v24], v1, 0, 4, [m0] ; C8611001 s_load_dwordx4 s[0:3], s[4:5], 0x8 ; C0800508 s_load_dwordx8 s[12:19], s[6:7], 0x10 ; C0C60710 v_interp_p1_f32 v23, v0, 1, 4, [m0] ; C85C1100 v_interp_p2_f32 v23, [v23], v1, 1, 4, [m0] ; C85D1101 v_interp_p1_f32 v25, v0, 2, 4, [m0] ; C8641200 v_interp_p2_f32 v25, [v25], v1, 2, 4, [m0] ; C8651201 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s20, s[8:11], 0x4c ; C20A094C s_buffer_load_dword s21, s[8:11], 0x4d ; C20A894D s_buffer_load_dword s22, s[8:11], 0x4e ; C20B094E s_load_dwordx4 s[24:27], s[4:5], 0xc ; C08C050C s_load_dwordx8 s[32:39], s[6:7], 0x18 ; C0D00718 image_sample v[0:2], 7, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[12:19], s[0:3] ; F0800700 0003000D s_buffer_load_dword s0, s[8:11], 0x41 ; C2000941 s_buffer_load_dword s1, s[8:11], 0x42 ; C2008942 s_buffer_load_dword s28, s[8:11], 0x54 ; C20E0954 s_buffer_load_dword s2, s[8:11], 0x40 ; C2010940 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v17, s20, v0 ; 10220014 v_mul_f32_e32 v18, s21, v1 ; 10240215 v_mul_f32_e32 v19, s22, v2 ; 10260416 s_buffer_load_dword s12, s[8:11], 0x27 ; C2060927 s_buffer_load_dword s13, s[8:11], 0x2b ; C206892B s_buffer_load_dword s29, s[8:11], 0x2c ; C20E892C s_buffer_load_dword s30, s[8:11], 0x2d ; C20F092D s_buffer_load_dword s3, s[8:11], 0x58 ; C2018958 v_sub_f32_e64 v0, 1.0, s28 ; D2080000 000038F2 v_mul_f32_e32 v2, s2, v0 ; 10040002 v_mul_f32_e32 v1, s0, v0 ; 10020000 v_mul_f32_e32 v0, s1, v0 ; 10000001 v_mac_f32_e32 v2, s28, v17 ; 3E04221C v_mov_b32_e32 v26, v20 ; 7E340314 v_mac_f32_e32 v1, s28, v18 ; 3E02241C v_mov_b32_e32 v27, v21 ; 7E360315 v_mac_f32_e32 v0, s28, v19 ; 3E00261C v_mov_b32_e32 v28, v22 ; 7E380316 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_lt_f32_e64 s[0:1], 0, s13 ; D0020000 00001A80 image_sample v[13:16], 15, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[32:39], s[24:27] ; F0800F00 00C80D0D s_waitcnt vmcnt(0) ; BF8C0770 s_and_saveexec_b64 s[20:21], s[0:1] ; BE942400 s_xor_b64 s[20:21], exec, s[20:21] ; 8994147E s_cbranch_execz BB0_2 ; BF880000 s_buffer_load_dword s0, s[8:11], 0x20 ; C2000920 s_buffer_load_dword s1, s[8:11], 0x21 ; C2008921 s_buffer_load_dword s2, s[8:11], 0x22 ; C2010922 s_buffer_load_dword s13, s[8:11], 0x24 ; C2068924 s_buffer_load_dword s14, s[8:11], 0x25 ; C2070925 v_mul_f32_e32 v13, v20, v20 ; 101A2914 v_mac_f32_e32 v13, v21, v21 ; 3E1A2B15 v_mac_f32_e32 v13, v22, v22 ; 3E1A2D16 v_rsq_clamp_f32_e32 v13, v13 ; 7E1A590D s_buffer_load_dword s15, s[8:11], 0x26 ; C2078926 s_buffer_load_dword s16, s[8:11], 0x28 ; C2080928 s_buffer_load_dword s17, s[8:11], 0x29 ; C2088929 s_buffer_load_dword s18, s[8:11], 0x2a ; C209092A v_mul_f32_e32 v15, v13, v20 ; 101E290D v_mul_f32_e32 v16, v13, v21 ; 10202B0D v_mul_f32_e32 v13, v13, v22 ; 101A2D0D v_rcp_f32_e32 v26, v15 ; 7E34550F s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v27, s0, v24 ; 08363000 v_sub_f32_e32 v28, s1, v23 ; 08382E01 v_rcp_f32_e32 v29, v16 ; 7E3A5510 v_mul_f32_e32 v27, v26, v27 ; 1036371A v_sub_f32_e32 v30, s13, v24 ; 083C300D v_mul_f32_e32 v26, v26, v30 ; 10343D1A v_cmp_lt_f32_e32 vcc, 0, v15 ; 7C021E80 v_cndmask_b32_e32 v26, v26, v27 ; 0034371A v_rcp_f32_e32 v27, v13 ; 7E36550D v_mul_f32_e32 v28, v29, v28 ; 1038391D v_sub_f32_e32 v30, s14, v23 ; 083C2E0E v_mul_f32_e32 v29, v29, v30 ; 103A3D1D v_cmp_lt_f32_e32 vcc, 0, v16 ; 7C022080 v_cndmask_b32_e32 v28, v29, v28 ; 0038391D v_sub_f32_e32 v29, s2, v25 ; 083A3202 v_mul_f32_e32 v29, v27, v29 ; 103A3B1B v_sub_f32_e32 v30, s15, v25 ; 083C320F v_mul_f32_e32 v27, v27, v30 ; 10363D1B v_cmp_lt_f32_e32 vcc, 0, v13 ; 7C021A80 v_cndmask_b32_e32 v27, v27, v29 ; 00363B1B v_min3_f32 v26, v26, v28, v27 ; D2A2001A 046E391A v_mov_b32_e32 v27, s13 ; 7E36020D v_add_f32_e32 v27, s0, v27 ; 06363600 v_mov_b32_e32 v28, s14 ; 7E38020E v_add_f32_e32 v28, s1, v28 ; 06383801 v_mov_b32_e32 v29, s15 ; 7E3A020F v_add_f32_e32 v29, s2, v29 ; 063A3A02 v_mad_f32 v30, 0.5, v27, -s16 ; D282001E 804236F0 v_add_f32_e32 v30, v24, v30 ; 063C3D18 v_mac_f32_e32 v30, v26, v15 ; 3E3C1F1A v_mad_f32 v15, 0.5, v28, -s17 ; D282000F 804638F0 v_add_f32_e32 v15, v23, v15 ; 061E1F17 v_mac_f32_e32 v15, v26, v16 ; 3E1E211A v_mad_f32 v16, 0.5, v29, -s18 ; D2820010 804A3AF0 v_add_f32_e32 v16, v25, v16 ; 06202119 v_mac_f32_e32 v16, v26, v13 ; 3E201B1A v_mad_f32 v26, 0.5, -v27, v30 ; D282001A 447A36F0 v_mad_f32 v27, 0.5, -v28, v15 ; D282001B 443E38F0 v_mad_f32 v28, 0.5, -v29, v16 ; D282001C 44423AF0 s_or_b64 exec, exec, s[20:21] ; 88FE147E s_buffer_load_dword s14, s[8:11], 0x17 ; C2070917 s_buffer_load_dword s15, s[8:11], 0x43 ; C2078943 s_buffer_load_dword s13, s[8:11], 0x60 ; C2068960 s_buffer_load_dword s0, s[8:11], 0x0 ; C2000900 s_buffer_load_dword s1, s[8:11], 0x1 ; C2008901 s_buffer_load_dword s2, s[8:11], 0x2 ; C2010902 s_buffer_load_dword s16, s[8:11], 0x4 ; C2080904 s_buffer_load_dword s17, s[8:11], 0x5 ; C2088905 s_buffer_load_dword s18, s[8:11], 0x6 ; C2090906 s_buffer_load_dword s20, s[8:11], 0x7 ; C20A0907 s_buffer_load_dword s19, s[8:11], 0x8 ; C2098908 s_buffer_load_dword s21, s[8:11], 0x9 ; C20A8909 s_buffer_load_dword s22, s[8:11], 0xa ; C20B090A s_buffer_load_dword s23, s[8:11], 0xb ; C20B890B s_buffer_load_dword s24, s[8:11], 0xc ; C20C090C s_buffer_load_dword s25, s[8:11], 0xd ; C20C890D s_buffer_load_dword s26, s[8:11], 0xe ; C20D090E s_buffer_load_dword s27, s[8:11], 0xf ; C20D890F v_sub_f32_e64 v13, 1.0, s3 ; D208000D 000006F2 v_log_f32_e32 v13, v13 ; 7E1A4F0D s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500 s_load_dwordx8 s[36:43], s[6:7], 0x0 ; C0D20700 v_mul_legacy_f32_e32 v13, 0x3f400000, v13 ; 0E1A1AFF 3F400000 v_exp_f32_e32 v13, v13 ; 7E1A4B0D v_mul_f32_e32 v29, 0x40e00000, v13 ; 103A1AFF 40E00000 v_cubeid_f32 v33, v26, v27, v28 ; D2880021 0472371A v_cubema_f32 v32, v26, v27, v28 ; D28E0020 0472371A v_cubesc_f32 v31, v26, v27, v28 ; D28A001F 0472371A v_cubetc_f32 v30, v26, v27, v28 ; D28C001E 0472371A v_mov_b32_e32 v26, 0x3fc00000 ; 7E3402FF 3FC00000 v_rcp_f32_e64 v13, |v32| ; D354010D 00000120 v_mad_f32 v27, v13, v30, v26 ; D282001B 046A3D0D v_mac_f32_e32 v26, v13, v31 ; 3E343F0D v_mov_b32_e32 v28, v33 ; 7E380321 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[26:29], 15, 0, 0, 0, 0, 0, 0, 0, v[26:29], s[36:43], s[32:35] ; F0900F00 01091A1A s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v13, v29 ; 7E1A4F1D v_mul_legacy_f32_e32 v13, s30, v13 ; 0E1A1A1E v_exp_f32_e32 v13, v13 ; 7E1A4B0D v_mul_f32_e32 v13, s29, v13 ; 101A1A1D v_mul_f32_e32 v16, v26, v13 ; 10201B1A v_mul_f32_e32 v15, v27, v13 ; 101E1B1B v_mul_f32_e32 v13, v28, v13 ; 101A1B1C v_mov_b32_e32 v26, s28 ; 7E34021C v_mov_b32_e32 v27, 0x3f7fff58 ; 7E3602FF 3F7FFF58 v_cmp_lt_f32_e32 vcc, s12, v27 ; 7C02360C s_and_saveexec_b64 s[28:29], vcc ; BE9C246A s_xor_b64 s[28:29], exec, s[28:29] ; 899C1C7E s_cbranch_execz BB0_6 ; BF880000 s_buffer_load_dword s32, s[8:11], 0x3b ; C210093B s_buffer_load_dword s30, s[8:11], 0x3c ; C20F093C s_buffer_load_dword s31, s[8:11], 0x3d ; C20F893D s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_lt_f32_e64 s[32:33], 0, s32 ; D0020020 00004080 s_and_saveexec_b64 s[32:33], s[32:33] ; BEA02420 s_xor_b64 s[32:33], exec, s[32:33] ; 89A0207E s_cbranch_execz BB0_7 ; BF880000 s_buffer_load_dword s34, s[8:11], 0x36 ; C2110936 s_buffer_load_dword s35, s[8:11], 0x38 ; C2118938 s_buffer_load_dword s36, s[8:11], 0x39 ; C2120939 s_buffer_load_dword s37, s[8:11], 0x3a ; C212893A s_buffer_load_dword s38, s[8:11], 0x30 ; C2130930 s_buffer_load_dword s39, s[8:11], 0x31 ; C2138931 s_buffer_load_dword s40, s[8:11], 0x32 ; C2140932 s_buffer_load_dword s41, s[8:11], 0x34 ; C2148934 s_buffer_load_dword s42, s[8:11], 0x35 ; C2150935 v_mul_f32_e32 v27, v20, v20 ; 10362914 v_mac_f32_e32 v27, v21, v21 ; 3E362B15 v_mac_f32_e32 v27, v22, v22 ; 3E362D16 v_rsq_clamp_f32_e32 v27, v27 ; 7E36591B s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v28, s34, v25 ; 08383222 v_mov_b32_e32 v29, s34 ; 7E3A0222 v_sub_f32_e32 v30, s38, v24 ; 083C3026 v_sub_f32_e32 v31, s39, v23 ; 083E2E27 v_add_f32_e32 v29, s40, v29 ; 063A3A28 v_sub_f32_e32 v32, s40, v25 ; 08403228 v_mad_f32 v33, 0.5, v29, -s37 ; D2820021 80963AF0 v_add_f32_e32 v25, v25, v33 ; 06324319 v_mul_f32_e32 v20, v27, v20 ; 1028291B v_mul_f32_e32 v21, v27, v21 ; 102A2B1B v_mul_f32_e32 v22, v27, v22 ; 102C2D1B v_rcp_f32_e32 v27, v20 ; 7E365514 v_rcp_f32_e32 v33, v21 ; 7E425515 v_rcp_f32_e32 v34, v22 ; 7E445516 v_sub_f32_e32 v35, s41, v24 ; 08463029 v_mov_b32_e32 v36, s41 ; 7E480229 v_add_f32_e32 v36, s38, v36 ; 06484826 v_mul_f32_e32 v30, v27, v30 ; 103C3D1B v_mul_f32_e32 v27, v27, v35 ; 1036471B v_mul_f32_e32 v31, v33, v31 ; 103E3F21 v_mul_f32_e32 v32, v34, v32 ; 10404122 v_mul_f32_e32 v28, v34, v28 ; 10383922 v_mad_f32 v34, 0.5, v36, -s35 ; D2820022 808E48F0 v_add_f32_e32 v24, v24, v34 ; 06304518 v_sub_f32_e32 v34, s42, v23 ; 08442E2A v_mov_b32_e32 v35, s42 ; 7E46022A v_mul_f32_e32 v33, v33, v34 ; 10424521 v_add_f32_e32 v34, s39, v35 ; 06444627 v_cmp_lt_f32_e32 vcc, 0, v20 ; 7C022880 v_cndmask_b32_e32 v27, v27, v30 ; 00363D1B v_cmp_lt_f32_e32 vcc, 0, v21 ; 7C022A80 v_cndmask_b32_e32 v30, v33, v31 ; 003C3F21 v_cmp_lt_f32_e32 vcc, 0, v22 ; 7C022C80 v_cndmask_b32_e32 v28, v28, v32 ; 0038411C v_min3_f32 v27, v27, v30, v28 ; D2A2001B 04723D1B v_mad_f32 v28, 0.5, v34, -s36 ; D282001C 809244F0 v_add_f32_e32 v23, v23, v28 ; 062E3917 v_mac_f32_e32 v24, v27, v20 ; 3E30291B v_mac_f32_e32 v23, v27, v21 ; 3E2E2B1B v_mac_f32_e32 v25, v27, v22 ; 3E322D1B v_mad_f32 v20, 0.5, -v36, v24 ; D2820014 446248F0 v_mad_f32 v21, 0.5, -v34, v23 ; D2820015 445E44F0 v_mad_f32 v22, 0.5, -v29, v25 ; D2820016 44663AF0 s_or_b64 exec, exec, s[32:33] ; 88FE207E v_sub_f32_e64 v23, 1.0, s3 ; D2080017 000006F2 v_log_f32_e32 v23, v23 ; 7E2E4F17 s_load_dwordx4 s[32:35], s[4:5], 0x4 ; C0900504 v_mul_legacy_f32_e32 v23, 0x3f400000, v23 ; 0E2E2EFF 3F400000 v_exp_f32_e32 v23, v23 ; 7E2E4B17 v_mul_f32_e32 v23, 0x40e00000, v23 ; 102E2EFF 40E00000 v_cubeid_f32 v30, v20, v21, v22 ; D288001E 045A2B14 v_cubema_f32 v29, v20, v21, v22 ; D28E001D 045A2B14 s_load_dwordx8 s[36:43], s[6:7], 0x8 ; C0D20708 v_cubesc_f32 v28, v20, v21, v22 ; D28A001C 045A2B14 v_cubetc_f32 v27, v20, v21, v22 ; D28C001B 045A2B14 v_rcp_f32_e64 v22, |v29| ; D3540116 0000011D v_mov_b32_e32 v20, 0x3fc00000 ; 7E2802FF 3FC00000 v_mad_f32 v21, v22, v27, v20 ; D2820015 04523716 v_mac_f32_e32 v20, v22, v28 ; 3E283916 v_mov_b32_e32 v22, v30 ; 7E2C031E s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[20:23], 15, 0, 0, 0, 0, 0, 0, 0, v[20:23], s[36:43], s[32:35] ; F0900F00 01091414 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v23, v23 ; 7E2E4F17 v_sub_f32_e64 v24, 1.0, s12 ; D2080018 000018F2 v_mul_legacy_f32_e32 v23, s31, v23 ; 0E2E2E1F v_exp_f32_e32 v23, v23 ; 7E2E4B17 v_mul_f32_e32 v23, s30, v23 ; 102E2E1E v_mul_f32_e32 v20, v20, v23 ; 10282F14 v_mul_f32_e32 v21, v21, v23 ; 102A2F15 v_mul_f32_e32 v22, v22, v23 ; 102C2F16 v_mul_f32_e32 v20, v20, v24 ; 10283114 v_mul_f32_e32 v21, v21, v24 ; 102A3115 v_mul_f32_e32 v22, v22, v24 ; 102C3116 v_mac_f32_e32 v20, s12, v16 ; 3E28200C v_mac_f32_e32 v21, s12, v15 ; 3E2A1E0C v_mac_f32_e32 v22, s12, v13 ; 3E2C1A0C v_mov_b32_e32 v13, v22 ; 7E1A0316 v_mov_b32_e32 v15, v21 ; 7E1E0315 v_mov_b32_e32 v16, v20 ; 7E200314 s_or_b64 exec, exec, s[28:29] ; 88FE1C7E v_mad_f32 v22, -v26, s15, s15 ; D2820016 203C1F1A v_mov_b32_e32 v20, s14 ; 7E28020E v_mul_f32_e32 v21, v22, v17 ; 102A2316 v_mul_f32_e32 v18, v22, v18 ; 10242516 v_mul_f32_e32 v17, v22, v19 ; 10222716 v_mul_f32_e32 v19, s17, v4 ; 10260811 v_mac_f32_e32 v19, s16, v5 ; 3E260A10 v_mac_f32_e32 v19, s18, v3 ; 3E260612 v_add_f32_e32 v19, s20, v19 ; 06262614 v_add_f32_e32 v23, v19, v10 ; 062E1513 v_mul_f32_e32 v10, s21, v4 ; 10140815 v_mac_f32_e32 v10, s19, v5 ; 3E140A13 v_mac_f32_e32 v10, s22, v3 ; 3E140616 v_add_f32_e32 v10, s23, v10 ; 06141417 v_add_f32_e32 v11, v10, v11 ; 0616170A v_mul_f32_e32 v10, s25, v4 ; 10140819 v_mac_f32_e32 v10, s24, v5 ; 3E140A18 v_mac_f32_e32 v10, s26, v3 ; 3E14061A v_add_f32_e32 v10, s27, v10 ; 0614141B v_add_f32_e32 v12, v10, v12 ; 0618190A s_buffer_load_dword s6, s[8:11], 0x10 ; C2030910 s_buffer_load_dword s5, s[8:11], 0x11 ; C2028911 s_buffer_load_dword s4, s[8:11], 0x12 ; C2020912 s_buffer_load_dword s17, s[8:11], 0x16 ; C2088916 s_buffer_load_dword s14, s[8:11], 0x44 ; C2070944 s_buffer_load_dword s7, s[8:11], 0x45 ; C2038945 s_buffer_load_dword s12, s[8:11], 0x46 ; C2060946 s_buffer_load_dword s15, s[8:11], 0x48 ; C2078948 s_buffer_load_dword s16, s[8:11], 0x49 ; C2080949 s_buffer_load_dword s8, s[8:11], 0x4b ; C204094B v_sub_f32_e64 v19, 1.0, s13 ; D2080013 00001AF2 v_mac_f32_e32 v19, s13, v14 ; 3E261C0D v_mul_f32_e32 v10, s0, v5 ; 10140A00 v_mac_f32_e32 v10, s1, v4 ; 3E140801 v_mac_f32_e32 v10, s2, v3 ; 3E140602 v_max_f32_e32 v10, 0, v10 ; 20141480 v_mul_f32_e32 v14, v19, v23 ; 101C2F13 v_mul_f32_e32 v11, v19, v11 ; 10161713 v_mul_f32_e32 v12, v19, v12 ; 10181913 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v20, s17, v7 ; 3E280E11 v_mul_f32_e32 v7, v19, v16 ; 100E2113 v_mul_f32_e32 v15, v19, v15 ; 101E1F13 v_mul_f32_e32 v13, v19, v13 ; 101A1B13 v_sub_f32_e32 v16, 1.0, v22 ; 08202CF2 v_add_f32_e32 v16, s3, v16 ; 06202003 v_sub_f32_e64 v19, 1.0, s3 ; D2080013 000006F2 v_add_f32_e64 v16, 0, v16 clamp ; D2060810 00022080 v_sub_f32_e32 v22, s0, v9 ; 082C1200 v_sub_f32_e32 v23, s1, v8 ; 082E1001 v_mul_f32_e32 v24, v22, v22 ; 10302D16 v_mac_f32_e32 v24, v23, v23 ; 3E302F17 v_sub_f32_e32 v25, s2, v6 ; 08320C02 v_mac_f32_e32 v24, v25, v25 ; 3E303319 v_rsq_clamp_f32_e32 v24, v24 ; 7E305918 v_mul_f32_e32 v22, v24, v22 ; 102C2D18 v_mul_f32_e32 v23, v24, v23 ; 102E2F18 v_mul_f32_e32 v24, v24, v25 ; 10303318 v_mul_f32_e32 v9, v9, v5 ; 10120B09 v_mad_f32 v8, -v8, v4, -v9 ; D2820008 A4260908 v_mul_f32_e32 v5, v22, v5 ; 100A0B16 v_mac_f32_e32 v5, v23, v4 ; 3E0A0917 v_mul_f32_e32 v4, s0, v22 ; 10082C00 v_mac_f32_e32 v4, s1, v23 ; 3E082E01 v_mad_f32 v6, -v6, v3, v8 ; D2820006 24220706 v_mac_f32_e32 v4, s2, v24 ; 3E083002 v_mac_f32_e32 v5, v24, v3 ; 3E0A0718 v_max_f32_e32 v3, 0, v4 ; 20060880 v_sub_f32_e32 v4, 1.0, v3 ; 080806F2 v_mul_f32_e32 v8, v4, v4 ; 10100904 v_mul_f32_e32 v4, v4, v8 ; 10081104 v_mul_f32_e32 v4, v4, v8 ; 10081104 v_max_f32_e32 v6, 0, v6 ; 200C0C80 v_sub_f32_e32 v8, 1.0, v6 ; 08100CF2 v_mul_f32_e32 v9, v8, v8 ; 10121108 v_mul_f32_e32 v22, v8, v9 ; 102C1308 v_mad_f32 v23, -v9, v22, 1.0 ; D2820017 23CA2D09 v_mul_f32_e32 v24, v2, v23 ; 10302F02 v_sub_f32_e32 v25, 1.0, v2 ; 083204F2 v_mac_f32_e32 v2, v4, v25 ; 3E043304 v_mul_f32_e32 v25, v1, v23 ; 10322F01 v_sub_f32_e32 v26, 1.0, v1 ; 083402F2 v_mac_f32_e32 v1, v4, v26 ; 3E023504 v_mul_f32_e32 v23, v0, v23 ; 102E2F00 v_sub_f32_e32 v26, 1.0, v0 ; 083400F2 v_mac_f32_e32 v0, v4, v26 ; 3E003504 v_sub_f32_e32 v4, 1.0, v19 ; 080826F2 v_mov_b32_e32 v26, 0x3cf5c28f ; 7E3402FF 3CF5C28F v_madmk_f32_e32 v4, v4, v26, 0x3f77ced9 ; 40083504 3F77CED9 v_add_f32_e32 v26, v3, v3 ; 06340703 v_mul_f32_e32 v3, v19, v3 ; 10060713 v_mad_f32 v3, v26, v3, 0.5 ; D2820003 03C2071A v_mul_f32_e32 v9, v22, v9 ; 10121316 v_mac_f32_e32 v24, v16, v9 ; 3E301310 v_mac_f32_e32 v25, v16, v9 ; 3E321310 v_mac_f32_e32 v23, v16, v9 ; 3E2E1310 v_mul_f32_e32 v16, v19, v19 ; 10202713 v_log_f32_e32 v4, v4 ; 7E084F04 v_mul_f32_e32 v16, s8, v16 ; 10202008 v_mul_f32_e32 v8, v16, v8 ; 10101110 v_mac_f32_e32 v8, 1.0, v6 ; 3E100CF2 v_rcp_f32_e32 v4, v4 ; 7E085504 v_sub_f32_e32 v6, 1.0, v10 ; 080C14F2 v_mul_f32_e32 v16, v16, v6 ; 10200D10 v_mac_f32_e32 v16, 1.0, v10 ; 3E2014F2 v_max_f32_e32 v5, 0, v5 ; 200A0A80 v_log_f32_e32 v5, v5 ; 7E0A4F05 v_madak_f32_e32 v8, v16, v8, 0x38d1b717 ; 42101110 38D1B717 v_mul_f32_e32 v4, 0x41200000, v4 ; 100808FF 41200000 v_mul_f32_e32 v16, v4, v4 ; 10200904 v_mul_legacy_f32_e32 v5, v16, v5 ; 0E0A0B10 v_rcp_f32_e32 v8, v8 ; 7E105508 v_mad_f32 v4, v4, v4, 1.0 ; D2820004 03CA0904 v_mul_f32_e32 v4, s16, v4 ; 10080810 v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_mul_f32_e32 v4, v4, v5 ; 10080B04 v_mul_f32_e32 v4, v4, v8 ; 10081104 v_mul_f32_e32 v4, v10, v4 ; 1008090A v_mul_f32_e32 v4, s15, v4 ; 1008080F v_mul_f32_e32 v5, v6, v6 ; 100A0D06 v_mul_f32_e32 v6, v6, v5 ; 100C0B06 v_mul_f32_e32 v5, v6, v5 ; 100A0B06 v_add_f32_e32 v3, -1.0, v3 ; 060606F3 v_mad_f32 v5, v3, v5, 1.0 ; D2820005 03CA0B03 v_mad_f32 v3, v3, v9, 1.0 ; D2820003 03CA1303 v_mul_f32_e32 v3, v3, v5 ; 10060B03 v_mul_f32_e32 v3, v10, v3 ; 1006070A v_mac_f32_e32 v14, s14, v3 ; 3E1C060E v_mul_f32_e32 v5, v14, v21 ; 100A2B0E v_max_f32_e32 v4, 0, v4 ; 20080880 v_mul_f32_e32 v6, s14, v4 ; 100C080E v_mac_f32_e32 v5, v2, v6 ; 3E0A0D02 v_mac_f32_e32 v11, s7, v3 ; 3E160607 v_mac_f32_e32 v12, s12, v3 ; 3E18060C v_mul_f32_e32 v2, s7, v4 ; 10040807 v_mul_f32_e32 v3, s12, v4 ; 1006080C v_mul_f32_e32 v4, v11, v18 ; 1008250B v_mul_f32_e32 v6, v12, v17 ; 100C230C v_mac_f32_e32 v4, v1, v2 ; 3E080501 v_mac_f32_e32 v6, v0, v3 ; 3E0C0700 v_mac_f32_e32 v5, v24, v7 ; 3E0A0F18 v_mac_f32_e32 v4, v25, v15 ; 3E081F19 v_mac_f32_e32 v6, v23, v13 ; 3E0C1B17 v_add_f32_e64 v0, 0, v20 clamp ; D2060800 00022880 v_sub_f32_e32 v1, 1.0, v0 ; 080200F2 v_mul_f32_e32 v2, s6, v1 ; 10040206 v_mac_f32_e32 v2, v5, v0 ; 3E040105 v_mul_f32_e32 v3, s5, v1 ; 10060205 v_mac_f32_e32 v3, v4, v0 ; 3E060104 v_mul_f32_e32 v1, s4, v1 ; 10020204 v_mac_f32_e32 v1, v6, v0 ; 3E020106 v_cvt_pkrtz_f16_f32_e32 v0, v2, v3 ; 5E000702 v_cvt_pkrtz_f16_f32_e64 v1, v1, 1.0 ; D25E0001 0001E501 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 48 VGPRS: 40 Code Size: 2112 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL OUT[5], GENERIC[4] DCL OUT[6], GENERIC[5] DCL OUT[7], GENERIC[6] DCL CONST[0..19] DCL TEMP[0..9], LOCAL IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MUL TEMP[0], CONST[5], IN[0].xxxx 1: MAD TEMP[0], CONST[6], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[7], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0].xyz, CONST[8], IN[0].wwww, TEMP[0] 4: MUL TEMP[1], CONST[16], IN[0].xxxx 5: MAD TEMP[1], CONST[17], IN[0].yyyy, TEMP[1] 6: MAD TEMP[1], CONST[18], IN[0].zzzz, TEMP[1] 7: MAD TEMP[1], CONST[19], IN[0].wwww, TEMP[1] 8: MAD TEMP[2].xy, IN[2].xyyy, CONST[13].xyyy, CONST[13].zwww 9: FSEQ TEMP[3].x, CONST[15].xxxx, IMM[0].xxxx 10: UIF TEMP[3].xxxx :0 11: MOV TEMP[3].xy, IN[2].xyxx 12: ELSE :0 13: MOV TEMP[3].xy, IN[3].xyxx 14: ENDIF 15: MAD TEMP[3].xy, TEMP[3].xyyy, CONST[14].xyyy, CONST[14].zwww 16: MOV TEMP[2].zw, TEMP[3].yyxy 17: MOV TEMP[3].x, CONST[9].xxxx 18: MOV TEMP[3].y, CONST[10].xxxx 19: MOV TEMP[3].z, CONST[11].xxxx 20: MOV TEMP[4].x, CONST[9].yyyy 21: MOV TEMP[4].y, CONST[10].yyyy 22: MOV TEMP[4].z, CONST[11].yyyy 23: MOV TEMP[5].x, CONST[9].zzzz 24: MOV TEMP[5].y, CONST[10].zzzz 25: MOV TEMP[5].z, CONST[11].zzzz 26: MUL TEMP[3].xyz, TEMP[3].xyzz, IN[1].xxxx 27: MAD TEMP[3].xyz, TEMP[4].xyzz, IN[1].yyyy, TEMP[3].xyzz 28: MAD TEMP[3].xyz, TEMP[5].xyzz, IN[1].zzzz, TEMP[3].xyzz 29: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz 30: RSQ TEMP[4].x, TEMP[4].xxxx 31: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx 32: MUL TEMP[4].xyz, CONST[5].xyzz, IN[4].xxxx 33: MAD TEMP[4].xyz, CONST[6].xyzz, IN[4].yyyy, TEMP[4].xyzz 34: MAD TEMP[4].xyz, CONST[7].xyzz, IN[4].zzzz, TEMP[4].xyzz 35: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz 36: RSQ TEMP[5].x, TEMP[5].xxxx 37: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx 38: MUL TEMP[5].xyz, TEMP[3].zxyy, TEMP[4].yzxx 39: MAD TEMP[5].xyz, TEMP[3].yzxx, TEMP[4].zxyy, -TEMP[5].xyzz 40: MUL TEMP[5].xyz, TEMP[5].xyzz, IN[4].wwww 41: MOV TEMP[4].xyz, TEMP[4].xyzx 42: MOV TEMP[5].xyz, TEMP[5].xyzx 43: MOV TEMP[6].xyz, TEMP[3].xyzx 44: MUL TEMP[7], TEMP[3].xyzz, TEMP[3].yzzx 45: DP4 TEMP[8].x, CONST[1], TEMP[7] 46: DP4 TEMP[9].x, CONST[2], TEMP[7] 47: MOV TEMP[8].y, TEMP[9].xxxx 48: DP4 TEMP[7].x, CONST[3], TEMP[7] 49: MOV TEMP[8].z, TEMP[7].xxxx 50: MUL TEMP[7].x, TEMP[3].yyyy, TEMP[3].yyyy 51: MAD TEMP[3].x, TEMP[3].xxxx, TEMP[3].xxxx, -TEMP[7].xxxx 52: MAD TEMP[3].xyz, CONST[4].xyzz, TEMP[3].xxxx, TEMP[8].xyzz 53: ADD TEMP[7].xyz, TEMP[0].xyzz, -CONST[0].xyzz 54: MOV TEMP[7].yzw, TEMP[7].yxyz 55: MOV TEMP[7].x, TEMP[1].zzzz 56: MOV TEMP[0].xyz, TEMP[0].xyzx 57: MOV OUT[7], TEMP[0] 58: MOV OUT[1], TEMP[2] 59: MOV OUT[3], TEMP[5] 60: MOV OUT[2], TEMP[4] 61: MOV OUT[4], TEMP[6] 62: MOV OUT[5], TEMP[3] 63: MOV OUT[0], TEMP[1] 64: MOV OUT[6], TEMP[7] 65: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236) %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240) %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256) %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260) %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264) %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 268) %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272) %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276) %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280) %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284) %72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288) %73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292) %74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296) %75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300) %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304) %77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308) %78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312) %79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316) %80 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %81 = load <16 x i8>, <16 x i8> addrspace(2)* %80, align 16, !tbaa !0 %82 = add i32 %5, %7 %83 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %81, i32 0, i32 %82) %84 = extractelement <4 x float> %83, i32 0 %85 = extractelement <4 x float> %83, i32 1 %86 = extractelement <4 x float> %83, i32 2 %87 = extractelement <4 x float> %83, i32 3 %88 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %89 = load <16 x i8>, <16 x i8> addrspace(2)* %88, align 16, !tbaa !0 %90 = add i32 %5, %7 %91 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %89, i32 0, i32 %90) %92 = extractelement <4 x float> %91, i32 0 %93 = extractelement <4 x float> %91, i32 1 %94 = extractelement <4 x float> %91, i32 2 %95 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %96 = load <16 x i8>, <16 x i8> addrspace(2)* %95, align 16, !tbaa !0 %97 = add i32 %5, %7 %98 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %96, i32 0, i32 %97) %99 = extractelement <4 x float> %98, i32 0 %100 = extractelement <4 x float> %98, i32 1 %101 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %102 = load <16 x i8>, <16 x i8> addrspace(2)* %101, align 16, !tbaa !0 %103 = add i32 %5, %7 %104 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %102, i32 0, i32 %103) %105 = extractelement <4 x float> %104, i32 0 %106 = extractelement <4 x float> %104, i32 1 %107 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4 %108 = load <16 x i8>, <16 x i8> addrspace(2)* %107, align 16, !tbaa !0 %109 = add i32 %5, %7 %110 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %108, i32 0, i32 %109) %111 = extractelement <4 x float> %110, i32 0 %112 = extractelement <4 x float> %110, i32 1 %113 = extractelement <4 x float> %110, i32 2 %114 = extractelement <4 x float> %110, i32 3 %115 = fmul float %31, %84 %116 = fmul float %32, %84 %117 = fmul float %33, %84 %118 = fmul float %34, %84 %119 = fmul float %35, %85 %120 = fadd float %119, %115 %121 = fmul float %36, %85 %122 = fadd float %121, %116 %123 = fmul float %37, %85 %124 = fadd float %123, %117 %125 = fmul float %38, %85 %126 = fadd float %125, %118 %127 = fmul float %39, %86 %128 = fadd float %127, %120 %129 = fmul float %40, %86 %130 = fadd float %129, %122 %131 = fmul float %41, %86 %132 = fadd float %131, %124 %133 = fmul float %42, %86 %134 = fadd float %133, %126 %135 = fmul float %43, %87 %136 = fadd float %135, %128 %137 = fmul float %44, %87 %138 = fadd float %137, %130 %139 = fmul float %45, %87 %140 = fadd float %139, %132 %141 = fmul float %64, %84 %142 = fmul float %65, %84 %143 = fmul float %66, %84 %144 = fmul float %67, %84 %145 = fmul float %68, %85 %146 = fadd float %145, %141 %147 = fmul float %69, %85 %148 = fadd float %147, %142 %149 = fmul float %70, %85 %150 = fadd float %149, %143 %151 = fmul float %71, %85 %152 = fadd float %151, %144 %153 = fmul float %72, %86 %154 = fadd float %153, %146 %155 = fmul float %73, %86 %156 = fadd float %155, %148 %157 = fmul float %74, %86 %158 = fadd float %157, %150 %159 = fmul float %75, %86 %160 = fadd float %159, %152 %161 = fmul float %76, %87 %162 = fadd float %161, %154 %163 = fmul float %77, %87 %164 = fadd float %163, %156 %165 = fmul float %78, %87 %166 = fadd float %165, %158 %167 = fmul float %79, %87 %168 = fadd float %167, %160 %169 = fmul float %99, %55 %170 = fadd float %169, %57 %171 = fmul float %100, %56 %172 = fadd float %171, %58 %173 = fcmp oeq float %63, 0.000000e+00 %. = select i1 %173, float %99, float %105 %.40 = select i1 %173, float %100, float %106 %174 = fmul float %., %59 %175 = fadd float %174, %61 %176 = fmul float %.40, %60 %177 = fadd float %176, %62 %178 = fmul float %46, %92 %179 = fmul float %49, %92 %180 = fmul float %52, %92 %181 = fmul float %47, %93 %182 = fadd float %181, %178 %183 = fmul float %50, %93 %184 = fadd float %183, %179 %185 = fmul float %53, %93 %186 = fadd float %185, %180 %187 = fmul float %48, %94 %188 = fadd float %187, %182 %189 = fmul float %51, %94 %190 = fadd float %189, %184 %191 = fmul float %54, %94 %192 = fadd float %191, %186 %193 = fmul float %188, %188 %194 = fmul float %190, %190 %195 = fadd float %194, %193 %196 = fmul float %192, %192 %197 = fadd float %195, %196 %198 = call float @llvm.AMDGPU.rsq.clamped.f32(float %197) %199 = fmul float %188, %198 %200 = fmul float %190, %198 %201 = fmul float %192, %198 %202 = fmul float %31, %111 %203 = fmul float %32, %111 %204 = fmul float %33, %111 %205 = fmul float %35, %112 %206 = fadd float %205, %202 %207 = fmul float %36, %112 %208 = fadd float %207, %203 %209 = fmul float %37, %112 %210 = fadd float %209, %204 %211 = fmul float %39, %113 %212 = fadd float %211, %206 %213 = fmul float %40, %113 %214 = fadd float %213, %208 %215 = fmul float %41, %113 %216 = fadd float %215, %210 %217 = fmul float %212, %212 %218 = fmul float %214, %214 %219 = fadd float %218, %217 %220 = fmul float %216, %216 %221 = fadd float %219, %220 %222 = call float @llvm.AMDGPU.rsq.clamped.f32(float %221) %223 = fmul float %212, %222 %224 = fmul float %214, %222 %225 = fmul float %216, %222 %226 = fmul float %201, %224 %227 = fmul float %199, %225 %228 = fmul float %200, %223 %229 = fmul float %200, %225 %230 = fsub float %229, %226 %231 = fmul float %201, %223 %232 = fsub float %231, %227 %233 = fmul float %199, %224 %234 = fsub float %233, %228 %235 = fmul float %230, %114 %236 = fmul float %232, %114 %237 = fmul float %234, %114 %238 = fmul float %199, %200 %239 = fmul float %200, %201 %240 = fmul float %201, %201 %241 = fmul float %201, %199 %242 = fmul float %16, %238 %243 = fmul float %17, %239 %244 = fadd float %242, %243 %245 = fmul float %18, %240 %246 = fadd float %244, %245 %247 = fmul float %19, %241 %248 = fadd float %246, %247 %249 = fmul float %20, %238 %250 = fmul float %21, %239 %251 = fadd float %249, %250 %252 = fmul float %22, %240 %253 = fadd float %251, %252 %254 = fmul float %23, %241 %255 = fadd float %253, %254 %256 = fmul float %24, %238 %257 = fmul float %25, %239 %258 = fadd float %256, %257 %259 = fmul float %26, %240 %260 = fadd float %258, %259 %261 = fmul float %27, %241 %262 = fadd float %260, %261 %263 = fmul float %200, %200 %264 = fmul float %199, %199 %265 = fsub float %264, %263 %266 = fmul float %28, %265 %267 = fadd float %266, %248 %268 = fmul float %29, %265 %269 = fadd float %268, %255 %270 = fmul float %30, %265 %271 = fadd float %270, %262 %272 = fsub float %136, %13 %273 = fsub float %138, %14 %274 = fsub float %140, %15 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %170, float %172, float %175, float %177) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %223, float %224, float %225, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %235, float %236, float %237, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %199, float %200, float %201, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %267, float %269, float %271, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %166, float %272, float %273, float %274) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %136, float %138, float %140, float %134) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %162, float %164, float %166, float %168) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0 ; 7E020280 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[28:31], s[2:3], 0x0 ; C08E0300 s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 s_load_dwordx4 s[12:15], s[8:9], 0x8 ; C0860908 s_load_dwordx4 s[16:19], s[8:9], 0xc ; C088090C s_load_dwordx4 s[8:11], s[8:9], 0x10 ; C0840910 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s27, s[28:31], 0x20 ; C20D9D20 buffer_load_format_xyzw v[2:5], v0, s[0:3], 0 idxen ; E00C2000 80000200 buffer_load_format_xyzw v[6:9], v0, s[4:7], 0 idxen ; E00C2000 80010600 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[9:12], v0, s[12:15], 0 idxen ; E00C2000 80030900 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[11:14], v0, s[16:19], 0 idxen ; E00C2000 80040B00 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[13:16], v0, s[8:11], 0 idxen ; E00C2000 80020D00 s_buffer_load_dword s32, s[28:31], 0x21 ; C2101D21 s_buffer_load_dword s33, s[28:31], 0x22 ; C2109D22 s_buffer_load_dword s34, s[28:31], 0x24 ; C2111D24 s_buffer_load_dword s35, s[28:31], 0x25 ; C2119D25 s_buffer_load_dword s1, s[28:31], 0x10 ; C2009D10 s_buffer_load_dword s2, s[28:31], 0x11 ; C2011D11 s_buffer_load_dword s0, s[28:31], 0x12 ; C2001D12 s_buffer_load_dword s18, s[28:31], 0x14 ; C2091D14 s_buffer_load_dword s17, s[28:31], 0x15 ; C2089D15 s_buffer_load_dword s36, s[28:31], 0x26 ; C2121D26 s_buffer_load_dword s37, s[28:31], 0x28 ; C2129D28 s_buffer_load_dword s38, s[28:31], 0x29 ; C2131D29 s_buffer_load_dword s39, s[28:31], 0x2a ; C2139D2A s_buffer_load_dword s40, s[28:31], 0x2c ; C2141D2C s_buffer_load_dword s20, s[28:31], 0x16 ; C20A1D16 s_buffer_load_dword s41, s[28:31], 0x17 ; C2149D17 s_buffer_load_dword s26, s[28:31], 0x18 ; C20D1D18 s_buffer_load_dword s25, s[28:31], 0x19 ; C20C9D19 s_buffer_load_dword s21, s[28:31], 0x1a ; C20A9D1A s_buffer_load_dword s42, s[28:31], 0x1b ; C2151D1B s_buffer_load_dword s23, s[28:31], 0x1c ; C20B9D1C s_buffer_load_dword s24, s[28:31], 0x1d ; C20C1D1D s_buffer_load_dword s22, s[28:31], 0x1e ; C20B1D1E s_buffer_load_dword s43, s[28:31], 0x1f ; C2159D1F s_buffer_load_dword s44, s[28:31], 0x2d ; C2161D2D s_buffer_load_dword s45, s[28:31], 0x2e ; C2169D2E s_buffer_load_dword s46, s[28:31], 0x34 ; C2171D34 s_buffer_load_dword s47, s[28:31], 0x35 ; C2179D35 s_buffer_load_dword s3, s[28:31], 0x36 ; C2019D36 s_buffer_load_dword s4, s[28:31], 0x3c ; C2021D3C s_buffer_load_dword s48, s[28:31], 0x40 ; C2181D40 s_buffer_load_dword s49, s[28:31], 0x41 ; C2189D41 s_buffer_load_dword s50, s[28:31], 0x42 ; C2191D42 s_buffer_load_dword s51, s[28:31], 0x43 ; C2199D43 s_buffer_load_dword s6, s[28:31], 0x37 ; C2031D37 s_buffer_load_dword s52, s[28:31], 0x38 ; C21A1D38 s_buffer_load_dword s53, s[28:31], 0x39 ; C21A9D39 s_buffer_load_dword s8, s[28:31], 0x3a ; C2041D3A s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v0, s3 ; 7E000203 s_buffer_load_dword s10, s[28:31], 0x3b ; C2051D3B v_cmp_eq_f32_e64 vcc, 0, s4 ; D004006A 00000880 s_buffer_load_dword s3, s[28:31], 0x0 ; C2019D00 s_buffer_load_dword s4, s[28:31], 0x1 ; C2021D01 s_buffer_load_dword s5, s[28:31], 0x2 ; C2029D02 s_buffer_load_dword s9, s[28:31], 0x4 ; C2049D04 v_mov_b32_e32 v17, s6 ; 7E220206 s_buffer_load_dword s14, s[28:31], 0x5 ; C2071D05 s_buffer_load_dword s7, s[28:31], 0x6 ; C2039D06 s_buffer_load_dword s6, s[28:31], 0x7 ; C2031D07 v_mov_b32_e32 v18, s8 ; 7E240208 s_buffer_load_dword s12, s[28:31], 0x8 ; C2061D08 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v19, s10 ; 7E26020A s_buffer_load_dword s16, s[28:31], 0x9 ; C2081D09 s_buffer_load_dword s10, s[28:31], 0xa ; C2051D0A s_buffer_load_dword s8, s[28:31], 0xb ; C2041D0B s_buffer_load_dword s15, s[28:31], 0xc ; C2079D0C s_buffer_load_dword s19, s[28:31], 0xd ; C2099D0D s_buffer_load_dword s13, s[28:31], 0xe ; C2069D0E s_buffer_load_dword s11, s[28:31], 0xf ; C2059D0F s_buffer_load_dword s54, s[28:31], 0x44 ; C21B1D44 s_buffer_load_dword s55, s[28:31], 0x45 ; C21B9D45 s_buffer_load_dword s56, s[28:31], 0x46 ; C21C1D46 s_buffer_load_dword s57, s[28:31], 0x47 ; C21C9D47 s_buffer_load_dword s58, s[28:31], 0x48 ; C21D1D48 s_buffer_load_dword s59, s[28:31], 0x49 ; C21D9D49 s_buffer_load_dword s60, s[28:31], 0x4a ; C21E1D4A s_buffer_load_dword s61, s[28:31], 0x4b ; C21E9D4B s_buffer_load_dword s62, s[28:31], 0x4c ; C21F1D4C s_buffer_load_dword s63, s[28:31], 0x4d ; C21F9D4D s_buffer_load_dword s64, s[28:31], 0x4e ; C2201D4E s_buffer_load_dword s28, s[28:31], 0x4f ; C20E1D4F v_mul_f32_e32 v20, s41, v2 ; 10280429 v_mac_f32_e32 v20, s42, v3 ; 3E28062A v_mac_f32_e32 v20, s43, v4 ; 3E28082B v_mac_f32_e32 v0, s46, v9 ; 3E00122E v_mac_f32_e32 v17, s47, v10 ; 3E22142F v_mul_f32_e32 v21, s48, v2 ; 102A0430 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v21, s54, v3 ; 3E2A0636 v_mac_f32_e32 v21, s58, v4 ; 3E2A083A v_mac_f32_e32 v21, s62, v5 ; 3E2A0A3E v_mul_f32_e32 v22, s49, v2 ; 102C0431 v_mac_f32_e32 v22, s55, v3 ; 3E2C0637 v_mac_f32_e32 v22, s59, v4 ; 3E2C083B v_mac_f32_e32 v22, s63, v5 ; 3E2C0A3F v_mul_f32_e32 v23, s50, v2 ; 102E0432 v_mac_f32_e32 v23, s56, v3 ; 3E2E0638 v_mac_f32_e32 v23, s60, v4 ; 3E2E083C v_mac_f32_e32 v23, s64, v5 ; 3E2E0A40 v_mul_f32_e32 v24, s51, v2 ; 10300433 v_mac_f32_e32 v24, s57, v3 ; 3E300639 v_mac_f32_e32 v24, s61, v4 ; 3E30083D v_mac_f32_e32 v24, s28, v5 ; 3E300A1C v_cndmask_b32_e32 v9, v11, v9 ; 0012130B v_cndmask_b32_e32 v10, v12, v10 ; 0014150C v_mul_f32_e32 v11, s34, v6 ; 10160C22 v_mac_f32_e32 v11, s35, v7 ; 3E160E23 v_mul_f32_e32 v12, s37, v6 ; 10180C25 v_mac_f32_e32 v12, s38, v7 ; 3E180E26 v_mul_f32_e32 v6, s40, v6 ; 100C0C28 v_mac_f32_e32 v6, s44, v7 ; 3E0C0E2C v_mac_f32_e32 v11, s36, v8 ; 3E161024 v_mac_f32_e32 v12, s39, v8 ; 3E181027 v_mac_f32_e32 v6, s45, v8 ; 3E0C102D v_mul_f32_e32 v7, s18, v2 ; 100E0412 v_mac_f32_e32 v7, s26, v3 ; 3E0E061A v_mac_f32_e32 v7, s23, v4 ; 3E0E0817 v_mac_f32_e32 v7, s27, v5 ; 3E0E0A1B v_mul_f32_e32 v8, s17, v2 ; 10100411 v_mac_f32_e32 v8, s25, v3 ; 3E100619 v_mac_f32_e32 v8, s24, v4 ; 3E100818 v_mac_f32_e32 v8, s32, v5 ; 3E100A20 v_mul_f32_e32 v2, s20, v2 ; 10040414 v_mac_f32_e32 v2, s21, v3 ; 3E040615 v_mac_f32_e32 v2, s22, v4 ; 3E040816 v_mac_f32_e32 v2, s33, v5 ; 3E040A21 v_mac_f32_e32 v18, s52, v9 ; 3E241234 v_mac_f32_e32 v19, s53, v10 ; 3E261435 exp 15, 32, 0, 0, 0, v0, v17, v18, v19 ; F800020F 13121100 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s18, v13 ; 10001A12 v_mac_f32_e32 v0, s26, v14 ; 3E001C1A v_mul_f32_e32 v3, s17, v13 ; 10061A11 v_mac_f32_e32 v3, s25, v14 ; 3E061C19 v_mul_f32_e32 v4, s20, v13 ; 10081A14 v_mac_f32_e32 v4, s21, v14 ; 3E081C15 v_mac_f32_e32 v0, s23, v15 ; 3E001E17 v_mac_f32_e32 v3, s24, v15 ; 3E061E18 v_mac_f32_e32 v4, s22, v15 ; 3E081E16 v_mul_f32_e32 v5, v11, v11 ; 100A170B v_mac_f32_e32 v5, v12, v12 ; 3E0A190C v_mac_f32_e32 v5, v6, v6 ; 3E0A0D06 v_rsq_clamp_f32_e32 v5, v5 ; 7E0A5905 v_mul_f32_e32 v9, v0, v0 ; 10120100 v_mac_f32_e32 v9, v3, v3 ; 3E120703 v_mac_f32_e32 v9, v4, v4 ; 3E120904 v_rsq_clamp_f32_e32 v9, v9 ; 7E125909 v_mul_f32_e32 v10, v5, v11 ; 10141705 v_mul_f32_e32 v11, v5, v12 ; 10161905 v_mul_f32_e32 v5, v5, v6 ; 100A0D05 v_mul_f32_e32 v0, v9, v0 ; 10000109 v_mul_f32_e32 v3, v9, v3 ; 10060709 v_mul_f32_e32 v4, v9, v4 ; 10080909 v_mul_f32_e32 v6, v3, v5 ; 100C0B03 v_mad_f32 v6, v11, v4, -v6 ; D2820006 841A090B v_mul_f32_e32 v9, v4, v10 ; 10121504 v_mad_f32 v9, v5, v0, -v9 ; D2820009 84260105 v_mul_f32_e32 v12, v0, v11 ; 10181700 v_mad_f32 v12, v10, v3, -v12 ; D282000C 8432070A v_mul_f32_e32 v6, v16, v6 ; 100C0D10 v_mul_f32_e32 v9, v16, v9 ; 10121310 v_mul_f32_e32 v12, v16, v12 ; 10181910 exp 15, 33, 0, 0, 0, v0, v3, v4, v1 ; F800021F 01040300 exp 15, 34, 0, 0, 0, v6, v9, v12, v1 ; F800022F 010C0906 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, v5, v11 ; 10001705 v_mul_f32_e32 v3, s14, v0 ; 1006000E v_mul_f32_e32 v4, s16, v0 ; 10080010 v_mul_f32_e32 v0, s19, v0 ; 10000013 v_mul_f32_e32 v6, v11, v10 ; 100C150B v_mac_f32_e32 v3, s9, v6 ; 3E060C09 v_mac_f32_e32 v4, s12, v6 ; 3E080C0C v_mac_f32_e32 v0, s15, v6 ; 3E000C0F v_mul_f32_e32 v6, v5, v5 ; 100C0B05 v_mac_f32_e32 v3, s7, v6 ; 3E060C07 v_mac_f32_e32 v4, s10, v6 ; 3E080C0A v_mac_f32_e32 v0, s13, v6 ; 3E000C0D v_mul_f32_e32 v6, v10, v5 ; 100C0B0A v_mac_f32_e32 v3, s6, v6 ; 3E060C06 v_mac_f32_e32 v4, s8, v6 ; 3E080C08 v_mac_f32_e32 v0, s11, v6 ; 3E000C0B v_mul_f32_e32 v6, v11, v11 ; 100C170B v_mad_f32 v6, v10, v10, -v6 ; D2820006 841A150A v_mac_f32_e32 v3, s1, v6 ; 3E060C01 v_mac_f32_e32 v4, s2, v6 ; 3E080C02 v_mac_f32_e32 v0, s0, v6 ; 3E000C00 v_subrev_f32_e32 v6, s3, v7 ; 0A0C0E03 v_subrev_f32_e32 v9, s4, v8 ; 0A121004 v_subrev_f32_e32 v12, s5, v2 ; 0A180405 exp 15, 35, 0, 0, 0, v10, v11, v5, v1 ; F800023F 01050B0A exp 15, 36, 0, 0, 0, v3, v4, v0, v1 ; F800024F 01000403 exp 15, 37, 0, 0, 0, v23, v6, v9, v12 ; F800025F 0C090617 exp 15, 38, 0, 0, 0, v7, v8, v2, v20 ; F800026F 14020807 exp 15, 12, 0, 1, 0, v21, v22, v23, v24 ; F80008CF 18171615 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 72 VGPRS: 28 Code Size: 892 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL IN[4], GENERIC[4], PERSPECTIVE DCL IN[5], GENERIC[5], PERSPECTIVE DCL IN[6], GENERIC[6], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SVIEW[0], CUBE, FLOAT DCL SVIEW[1], CUBE, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL SVIEW[4], 2D, FLOAT DCL CONST[0..5] DCL CONST[8..19] DCL CONST[22..24] DCL CONST[26] DCL TEMP[0..18], LOCAL IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 0.0000} IMM[1] FLT32 { 0.5000, 0.7500, 7.0000, 1.0000} IMM[2] FLT32 { 10.0000, 0.9680, 0.0300, 0.0001} 0: MOV TEMP[0].x, IN[1].xxxx 1: MOV TEMP[0].y, IN[2].xxxx 2: MOV TEMP[0].z, IN[3].xxxx 3: MOV TEMP[1].x, IN[1].yyyy 4: MOV TEMP[1].y, IN[2].yyyy 5: MOV TEMP[1].z, IN[3].yyyy 6: MOV TEMP[2].x, IN[1].zzzz 7: MOV TEMP[2].y, IN[2].zzzz 8: MOV TEMP[2].z, IN[3].zzzz 9: MOV TEMP[3].xy, IN[0].xyyy 10: TEX TEMP[3], TEMP[3], SAMP[2], 2D 11: MUL TEMP[4].x, TEMP[3].wwww, CONST[19].wwww 12: MOV TEMP[5].xy, IN[0].xyyy 13: TEX TEMP[5].yw, TEMP[5], SAMP[3], 2D 14: MAD TEMP[5].xy, TEMP[5].wyyy, IMM[0].xxxx, IMM[0].yyyy 15: MUL TEMP[5].xy, TEMP[5].xyyy, CONST[22].xxxx 16: DP2 TEMP[6].x, TEMP[5].xyyy, TEMP[5].xyyy 17: MOV_SAT TEMP[6].x, TEMP[6].xxxx 18: ADD TEMP[6].x, IMM[0].zzzz, -TEMP[6].xxxx 19: SQRT TEMP[6].x, TEMP[6].xxxx 20: MOV TEMP[5].z, TEMP[6].xxxx 21: DP3 TEMP[0].x, TEMP[5].xyzz, TEMP[0].xyzz 22: DP3 TEMP[1].x, TEMP[5].xyzz, TEMP[1].xyzz 23: MOV TEMP[0].y, TEMP[1].xxxx 24: DP3 TEMP[1].x, TEMP[5].xyzz, TEMP[2].xyzz 25: MOV TEMP[0].z, TEMP[1].xxxx 26: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[0].xyzz 27: RSQ TEMP[1].x, TEMP[1].xxxx 28: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xxxx 29: DP3 TEMP[1].x, IN[5].yzww, IN[5].yzww 30: RSQ TEMP[1].x, TEMP[1].xxxx 31: MUL TEMP[1].xyz, IN[5].yzww, TEMP[1].xxxx 32: MUL TEMP[2].xyz, CONST[19].xyzz, TEMP[3].xyzz 33: LRP TEMP[3].xyz, CONST[23].xxxx, TEMP[2].xyzz, CONST[16].xyzz 34: MUL TEMP[5].x, CONST[23].xxxx, CONST[16].wwww 35: ADD TEMP[5].x, CONST[16].wwww, -TEMP[5].xxxx 36: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[5].xxxx 37: MOV TEMP[6].xy, IN[0].xyyy 38: TEX TEMP[6].y, TEMP[6], SAMP[4], 2D 39: ADD TEMP[7].x, IMM[0].zzzz, -CONST[26].xxxx 40: MAD TEMP[6].x, TEMP[6].yyyy, CONST[26].xxxx, TEMP[7].xxxx 41: DP3 TEMP[7].x, TEMP[0].xyzz, CONST[0].xyzz 42: MAX TEMP[7].x, IMM[0].wwww, TEMP[7].xxxx 43: MOV TEMP[8].xyz, IMM[0].wwww 44: MOV TEMP[9].w, IMM[0].zzzz 45: MOV TEMP[9].xyz, TEMP[0].xyzx 46: DP4 TEMP[10].x, CONST[1], TEMP[9] 47: DP4 TEMP[11].x, CONST[2], TEMP[9] 48: MOV TEMP[10].y, TEMP[11].xxxx 49: DP4 TEMP[9].x, CONST[3], TEMP[9] 50: MOV TEMP[10].z, TEMP[9].xxxx 51: ADD TEMP[9].xyz, IN[4].xyzz, TEMP[10].xyzz 52: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[6].xxxx 53: DP3 TEMP[10].x, TEMP[0].xyzz, TEMP[1].xyzz 54: MUL TEMP[10].xyz, TEMP[10].xxxx, TEMP[0].xyzz 55: MUL TEMP[10].xyz, IMM[0].xxxx, TEMP[10].xyzz 56: ADD TEMP[10].xyz, TEMP[1].xyzz, -TEMP[10].xyzz 57: MOV TEMP[11].xyz, TEMP[10].xyzx 58: FSLT TEMP[12].x, IMM[0].wwww, CONST[10].wwww 59: UIF TEMP[12].xxxx :0 60: DP3 TEMP[12].x, TEMP[10].xyzz, TEMP[10].xyzz 61: RSQ TEMP[12].x, TEMP[12].xxxx 62: MUL TEMP[12].xyz, TEMP[10].xyzz, TEMP[12].xxxx 63: MOV TEMP[13].xyz, -IN[6].xyzx 64: ADD TEMP[14].xyz, CONST[8].xyzz, TEMP[13].xyzz 65: RCP TEMP[15].x, TEMP[12].xxxx 66: RCP TEMP[15].y, TEMP[12].yyyy 67: RCP TEMP[15].z, TEMP[12].zzzz 68: MUL TEMP[14].xyz, TEMP[14].xyzz, TEMP[15].xyzz 69: ADD TEMP[13].xyz, CONST[9].xyzz, TEMP[13].xyzz 70: RCP TEMP[15].x, TEMP[12].xxxx 71: RCP TEMP[15].y, TEMP[12].yyyy 72: RCP TEMP[15].z, TEMP[12].zzzz 73: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[15].xyzz 74: FSLT TEMP[15].xyz, IMM[0].wwww, TEMP[12].xyzz 75: UIF TEMP[15].xxxx :0 76: MOV TEMP[16].x, TEMP[14].xxxx 77: ELSE :0 78: MOV TEMP[16].x, TEMP[13].xxxx 79: ENDIF 80: UIF TEMP[15].yyyy :0 81: MOV TEMP[17].x, TEMP[14].yyyy 82: ELSE :0 83: MOV TEMP[17].x, TEMP[13].yyyy 84: ENDIF 85: UIF TEMP[15].zzzz :0 86: MOV TEMP[14].x, TEMP[14].zzzz 87: ELSE :0 88: MOV TEMP[14].x, TEMP[13].zzzz 89: ENDIF 90: ADD TEMP[13].xyz, CONST[8].xyzz, CONST[9].xyzz 91: MUL TEMP[13].xyz, TEMP[13].xyzz, IMM[1].xxxx 92: MIN TEMP[15].x, TEMP[16].xxxx, TEMP[17].xxxx 93: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[14].xxxx 94: ADD TEMP[15].xyz, TEMP[13].xyzz, -CONST[10].xyzz 95: ADD TEMP[15].xyz, TEMP[15].xyzz, IN[6].xyzz 96: MAD TEMP[12].xyz, TEMP[12].xyzz, TEMP[14].xxxx, TEMP[15].xyzz 97: ADD TEMP[11].xyz, TEMP[12].xyzz, -TEMP[13].xyzz 98: ENDIF 99: ADD TEMP[12].x, IMM[0].zzzz, -CONST[24].xxxx 100: POW TEMP[12].x, TEMP[12].xxxx, IMM[1].yyyy 101: MUL TEMP[12].x, TEMP[12].xxxx, IMM[1].zzzz 102: MOV TEMP[11].xyz, TEMP[11].xyzz 103: MOV TEMP[11].w, TEMP[12].xxxx 104: TXL TEMP[11], TEMP[11], SAMP[0], CUBE 105: POW TEMP[12].x, TEMP[11].wwww, CONST[11].yyyy 106: MUL TEMP[12].x, CONST[11].xxxx, TEMP[12].xxxx 107: MUL TEMP[11].xyz, TEMP[12].xxxx, TEMP[11].xyzz 108: FSLT TEMP[12].x, CONST[9].wwww, IMM[1].wwww 109: UIF TEMP[12].xxxx :0 110: MOV TEMP[12].xyz, TEMP[10].xyzx 111: FSLT TEMP[13].x, IMM[0].wwww, CONST[14].wwww 112: UIF TEMP[13].xxxx :0 113: DP3 TEMP[13].x, TEMP[10].xyzz, TEMP[10].xyzz 114: RSQ TEMP[13].x, TEMP[13].xxxx 115: MUL TEMP[10].xyz, TEMP[10].xyzz, TEMP[13].xxxx 116: MOV TEMP[13].xyz, -IN[6].xyzx 117: ADD TEMP[14].xyz, CONST[12].xyzz, TEMP[13].xyzz 118: RCP TEMP[15].x, TEMP[10].xxxx 119: RCP TEMP[15].y, TEMP[10].yyyy 120: RCP TEMP[15].z, TEMP[10].zzzz 121: MUL TEMP[14].xyz, TEMP[14].xyzz, TEMP[15].xyzz 122: ADD TEMP[13].xyz, CONST[13].xyzz, TEMP[13].xyzz 123: RCP TEMP[15].x, TEMP[10].xxxx 124: RCP TEMP[15].y, TEMP[10].yyyy 125: RCP TEMP[15].z, TEMP[10].zzzz 126: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[15].xyzz 127: FSLT TEMP[15].xyz, IMM[0].wwww, TEMP[10].xyzz 128: UIF TEMP[15].xxxx :0 129: MOV TEMP[16].x, TEMP[14].xxxx 130: ELSE :0 131: MOV TEMP[16].x, TEMP[13].xxxx 132: ENDIF 133: UIF TEMP[15].yyyy :0 134: MOV TEMP[17].x, TEMP[14].yyyy 135: ELSE :0 136: MOV TEMP[17].x, TEMP[13].yyyy 137: ENDIF 138: UIF TEMP[15].zzzz :0 139: MOV TEMP[14].x, TEMP[14].zzzz 140: ELSE :0 141: MOV TEMP[14].x, TEMP[13].zzzz 142: ENDIF 143: ADD TEMP[13].xyz, CONST[12].xyzz, CONST[13].xyzz 144: MUL TEMP[13].xyz, TEMP[13].xyzz, IMM[1].xxxx 145: MIN TEMP[15].x, TEMP[16].xxxx, TEMP[17].xxxx 146: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[14].xxxx 147: ADD TEMP[15].xyz, TEMP[13].xyzz, -CONST[14].xyzz 148: ADD TEMP[15].xyz, TEMP[15].xyzz, IN[6].xyzz 149: MAD TEMP[10].xyz, TEMP[10].xyzz, TEMP[14].xxxx, TEMP[15].xyzz 150: ADD TEMP[12].xyz, TEMP[10].xyzz, -TEMP[13].xyzz 151: ENDIF 152: ADD TEMP[10].x, IMM[0].zzzz, -CONST[24].xxxx 153: POW TEMP[10].x, TEMP[10].xxxx, IMM[1].yyyy 154: MUL TEMP[10].x, TEMP[10].xxxx, IMM[1].zzzz 155: MOV TEMP[12].xyz, TEMP[12].xyzz 156: MOV TEMP[12].w, TEMP[10].xxxx 157: TXL TEMP[10], TEMP[12], SAMP[1], CUBE 158: POW TEMP[12].x, TEMP[10].wwww, CONST[15].yyyy 159: MUL TEMP[12].x, CONST[15].xxxx, TEMP[12].xxxx 160: MUL TEMP[10].xyz, TEMP[12].xxxx, TEMP[10].xyzz 161: LRP TEMP[8].xyz, CONST[9].wwww, TEMP[11].xyzz, TEMP[10].xyzz 162: ELSE :0 163: MOV TEMP[8].xyz, TEMP[11].xyzx 164: ENDIF 165: MUL TEMP[8].xyz, TEMP[8].xyzz, TEMP[6].xxxx 166: MOV TEMP[1].xyz, -TEMP[1].xyzx 167: ADD TEMP[6].x, IMM[0].zzzz, -CONST[24].xxxx 168: ADD TEMP[10].xyz, CONST[0].xyzz, TEMP[1].xyzz 169: DP3 TEMP[11].x, TEMP[10].xyzz, TEMP[10].xyzz 170: RSQ TEMP[11].x, TEMP[11].xxxx 171: MUL TEMP[10].xyz, TEMP[10].xyzz, TEMP[11].xxxx 172: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[1].xyzz 173: MAX TEMP[1].x, IMM[0].wwww, TEMP[1].xxxx 174: DP3 TEMP[11].x, CONST[0].xyzz, TEMP[10].xyzz 175: MAX TEMP[11].x, IMM[0].wwww, TEMP[11].xxxx 176: MUL TEMP[12].x, TEMP[6].xxxx, TEMP[6].xxxx 177: MUL TEMP[12].x, TEMP[12].xxxx, CONST[18].wwww 178: ADD TEMP[13].x, IMM[0].zzzz, -TEMP[6].xxxx 179: MAD TEMP[13].x, TEMP[13].xxxx, IMM[2].yyyy, IMM[2].zzzz 180: LG2 TEMP[13].x, TEMP[13].xxxx 181: RCP TEMP[13].x, TEMP[13].xxxx 182: MUL TEMP[13].x, IMM[2].xxxx, TEMP[13].xxxx 183: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[13].xxxx 184: ADD TEMP[14].x, IMM[0].zzzz, -TEMP[7].xxxx 185: ADD TEMP[15].x, IMM[0].zzzz, -TEMP[1].xxxx 186: MUL TEMP[16].x, IMM[0].xxxx, TEMP[11].xxxx 187: MUL TEMP[6].x, TEMP[11].xxxx, TEMP[6].xxxx 188: MAD TEMP[6].x, TEMP[16].xxxx, TEMP[6].xxxx, IMM[1].xxxx 189: ADD TEMP[11].x, IMM[0].zzzz, -TEMP[11].xxxx 190: ADD TEMP[16].x, IMM[0].zzzz, -TEMP[1].xxxx 191: ADD TEMP[5].x, IMM[0].zzzz, -TEMP[5].xxxx 192: ADD TEMP[5].x, CONST[24].xxxx, TEMP[5].xxxx 193: MOV_SAT TEMP[5].x, TEMP[5].xxxx 194: MUL TEMP[17].x, TEMP[16].xxxx, TEMP[16].xxxx 195: MUL TEMP[18].x, TEMP[16].xxxx, TEMP[16].xxxx 196: MUL TEMP[16].x, TEMP[18].xxxx, TEMP[16].xxxx 197: MUL TEMP[16].x, TEMP[17].xxxx, TEMP[16].xxxx 198: LRP TEMP[5].xyz, TEMP[16].xxxx, TEMP[5].xxxx, TEMP[3].xyzz 199: LRP TEMP[16].x, TEMP[7].xxxx, IMM[0].zzzz, TEMP[12].xxxx 200: LRP TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz, TEMP[12].xxxx 201: MAD TEMP[1].x, TEMP[16].xxxx, TEMP[1].xxxx, IMM[2].wwww 202: RCP TEMP[1].x, TEMP[1].xxxx 203: DP3 TEMP[10].x, TEMP[0].xyzz, TEMP[10].xyzz 204: MAX TEMP[10].x, IMM[0].wwww, TEMP[10].xxxx 205: POW TEMP[10].x, TEMP[10].xxxx, TEMP[13].xxxx 206: ADD TEMP[12].x, TEMP[13].xxxx, IMM[0].zzzz 207: MUL TEMP[12].x, TEMP[12].xxxx, CONST[18].yyyy 208: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[12].xxxx 209: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[10].xxxx 210: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[7].xxxx 211: MUL TEMP[1].x, TEMP[1].xxxx, CONST[18].xxxx 212: MAX TEMP[1].x, IMM[0].wwww, TEMP[1].xxxx 213: MUL TEMP[1].xyz, TEMP[1].xxxx, CONST[17].xyzz 214: ADD TEMP[10].xyz, IMM[0].zzzz, -TEMP[3].xyzz 215: MUL TEMP[12].x, TEMP[11].xxxx, TEMP[11].xxxx 216: MUL TEMP[13].x, TEMP[11].xxxx, TEMP[11].xxxx 217: MUL TEMP[11].x, TEMP[13].xxxx, TEMP[11].xxxx 218: MUL TEMP[11].x, TEMP[12].xxxx, TEMP[11].xxxx 219: MAD TEMP[3].xyz, TEMP[10].xyzz, TEMP[11].xxxx, TEMP[3].xyzz 220: ADD TEMP[10].x, TEMP[6].xxxx, IMM[0].yyyy 221: MUL TEMP[11].x, TEMP[14].xxxx, TEMP[14].xxxx 222: MUL TEMP[12].x, TEMP[14].xxxx, TEMP[14].xxxx 223: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[14].xxxx 224: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[12].xxxx 225: MAD TEMP[10].x, TEMP[10].xxxx, TEMP[11].xxxx, IMM[0].zzzz 226: ADD TEMP[6].x, TEMP[6].xxxx, IMM[0].yyyy 227: MUL TEMP[11].x, TEMP[15].xxxx, TEMP[15].xxxx 228: MUL TEMP[12].x, TEMP[15].xxxx, TEMP[15].xxxx 229: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[15].xxxx 230: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[12].xxxx 231: MAD TEMP[6].x, TEMP[6].xxxx, TEMP[11].xxxx, IMM[0].zzzz 232: MUL TEMP[6].x, TEMP[10].xxxx, TEMP[6].xxxx 233: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[7].xxxx 234: MAD TEMP[6].xyz, CONST[17].xyzz, TEMP[6].xxxx, TEMP[9].xyzz 235: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[6].xyzz 236: MAD TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xyzz, TEMP[2].xyzz 237: MAD TEMP[0].xyz, TEMP[8].xyzz, TEMP[5].xyzz, TEMP[1].xyzz 238: MOV TEMP[0].xyz, TEMP[0].xyzx 239: MAD TEMP[1].x, IN[5].xxxx, CONST[5].zzzz, CONST[5].wwww 240: MOV_SAT TEMP[1].x, TEMP[1].xxxx 241: LRP TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[4].xyzz 242: MOV TEMP[0].xyz, TEMP[0].xyzx 243: MOV TEMP[0].w, TEMP[4].xxxx 244: MOV OUT[0], TEMP[0] 245: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 172) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200) %57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208) %58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212) %59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216) %60 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224) %61 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228) %62 = call float @llvm.SI.load.const(<16 x i8> %23, i32 232) %63 = call float @llvm.SI.load.const(<16 x i8> %23, i32 236) %64 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240) %65 = call float @llvm.SI.load.const(<16 x i8> %23, i32 244) %66 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256) %67 = call float @llvm.SI.load.const(<16 x i8> %23, i32 260) %68 = call float @llvm.SI.load.const(<16 x i8> %23, i32 264) %69 = call float @llvm.SI.load.const(<16 x i8> %23, i32 268) %70 = call float @llvm.SI.load.const(<16 x i8> %23, i32 272) %71 = call float @llvm.SI.load.const(<16 x i8> %23, i32 276) %72 = call float @llvm.SI.load.const(<16 x i8> %23, i32 280) %73 = call float @llvm.SI.load.const(<16 x i8> %23, i32 288) %74 = call float @llvm.SI.load.const(<16 x i8> %23, i32 292) %75 = call float @llvm.SI.load.const(<16 x i8> %23, i32 300) %76 = call float @llvm.SI.load.const(<16 x i8> %23, i32 304) %77 = call float @llvm.SI.load.const(<16 x i8> %23, i32 308) %78 = call float @llvm.SI.load.const(<16 x i8> %23, i32 312) %79 = call float @llvm.SI.load.const(<16 x i8> %23, i32 316) %80 = call float @llvm.SI.load.const(<16 x i8> %23, i32 352) %81 = call float @llvm.SI.load.const(<16 x i8> %23, i32 368) %82 = call float @llvm.SI.load.const(<16 x i8> %23, i32 384) %83 = call float @llvm.SI.load.const(<16 x i8> %23, i32 416) %84 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %85 = load <32 x i8>, <32 x i8> addrspace(2)* %84, align 32, !tbaa !0 %86 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %87 = load <16 x i8>, <16 x i8> addrspace(2)* %86, align 16, !tbaa !0 %88 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %89 = bitcast <8 x i32> addrspace(2)* %88 to <32 x i8> addrspace(2)* %90 = load <32 x i8>, <32 x i8> addrspace(2)* %89, align 32, !tbaa !0 %91 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %92 = bitcast <4 x i32> addrspace(2)* %91 to <16 x i8> addrspace(2)* %93 = load <16 x i8>, <16 x i8> addrspace(2)* %92, align 16, !tbaa !0 %94 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %95 = bitcast <8 x i32> addrspace(2)* %94 to <32 x i8> addrspace(2)* %96 = load <32 x i8>, <32 x i8> addrspace(2)* %95, align 32, !tbaa !0 %97 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %98 = bitcast <4 x i32> addrspace(2)* %97 to <16 x i8> addrspace(2)* %99 = load <16 x i8>, <16 x i8> addrspace(2)* %98, align 16, !tbaa !0 %100 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %101 = bitcast <8 x i32> addrspace(2)* %100 to <32 x i8> addrspace(2)* %102 = load <32 x i8>, <32 x i8> addrspace(2)* %101, align 32, !tbaa !0 %103 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %104 = bitcast <4 x i32> addrspace(2)* %103 to <16 x i8> addrspace(2)* %105 = load <16 x i8>, <16 x i8> addrspace(2)* %104, align 16, !tbaa !0 %106 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %107 = bitcast <8 x i32> addrspace(2)* %106 to <32 x i8> addrspace(2)* %108 = load <32 x i8>, <32 x i8> addrspace(2)* %107, align 32, !tbaa !0 %109 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %110 = bitcast <4 x i32> addrspace(2)* %109 to <16 x i8> addrspace(2)* %111 = load <16 x i8>, <16 x i8> addrspace(2)* %110, align 16, !tbaa !0 %112 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %113 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %114 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %115 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %116 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %117 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %118 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %119 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %120 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %121 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %122 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %123 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %124 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %125 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %126 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7) %127 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %128 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7) %129 = call float @llvm.SI.fs.interp(i32 3, i32 5, i32 %5, <2 x i32> %7) %130 = call float @llvm.SI.fs.interp(i32 0, i32 6, i32 %5, <2 x i32> %7) %131 = call float @llvm.SI.fs.interp(i32 1, i32 6, i32 %5, <2 x i32> %7) %132 = call float @llvm.SI.fs.interp(i32 2, i32 6, i32 %5, <2 x i32> %7) %133 = bitcast float %112 to i32 %134 = bitcast float %113 to i32 %135 = insertelement <2 x i32> undef, i32 %133, i32 0 %136 = insertelement <2 x i32> %135, i32 %134, i32 1 %137 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %136, <32 x i8> %96, <16 x i8> %99, i32 2) %138 = extractelement <4 x float> %137, i32 0 %139 = extractelement <4 x float> %137, i32 1 %140 = extractelement <4 x float> %137, i32 2 %141 = extractelement <4 x float> %137, i32 3 %142 = fmul float %141, %79 %143 = bitcast float %112 to i32 %144 = bitcast float %113 to i32 %145 = insertelement <2 x i32> undef, i32 %143, i32 0 %146 = insertelement <2 x i32> %145, i32 %144, i32 1 %147 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %146, <32 x i8> %102, <16 x i8> %105, i32 2) %148 = extractelement <4 x float> %147, i32 1 %149 = extractelement <4 x float> %147, i32 3 %150 = fmul float %149, 2.000000e+00 %151 = fadd float %150, -1.000000e+00 %152 = fmul float %148, 2.000000e+00 %153 = fadd float %152, -1.000000e+00 %154 = fmul float %151, %80 %155 = fmul float %153, %80 %156 = fmul float %154, %154 %157 = fmul float %155, %155 %158 = fadd float %156, %157 %159 = call float @llvm.AMDIL.clamp.(float %158, float 0.000000e+00, float 1.000000e+00) %160 = fsub float 1.000000e+00, %159 %161 = call float @llvm.sqrt.f32(float %160) %162 = fmul float %154, %114 %163 = fmul float %155, %117 %164 = fadd float %163, %162 %165 = fmul float %161, %120 %166 = fadd float %164, %165 %167 = fmul float %154, %115 %168 = fmul float %155, %118 %169 = fadd float %168, %167 %170 = fmul float %161, %121 %171 = fadd float %169, %170 %172 = fmul float %154, %116 %173 = fmul float %155, %119 %174 = fadd float %173, %172 %175 = fmul float %161, %122 %176 = fadd float %174, %175 %177 = fmul float %166, %166 %178 = fmul float %171, %171 %179 = fadd float %178, %177 %180 = fmul float %176, %176 %181 = fadd float %179, %180 %182 = call float @llvm.AMDGPU.rsq.clamped.f32(float %181) %183 = fmul float %166, %182 %184 = fmul float %171, %182 %185 = fmul float %176, %182 %186 = fmul float %127, %127 %187 = fmul float %128, %128 %188 = fadd float %187, %186 %189 = fmul float %129, %129 %190 = fadd float %188, %189 %191 = call float @llvm.AMDGPU.rsq.clamped.f32(float %190) %192 = fmul float %127, %191 %193 = fmul float %128, %191 %194 = fmul float %129, %191 %195 = fmul float %76, %138 %196 = fmul float %77, %139 %197 = fmul float %78, %140 %198 = call float @llvm.AMDGPU.lrp(float %81, float %195, float %66) %199 = call float @llvm.AMDGPU.lrp(float %81, float %196, float %67) %200 = call float @llvm.AMDGPU.lrp(float %81, float %197, float %68) %201 = fmul float %81, %69 %202 = fsub float %69, %201 %203 = fmul float %195, %202 %204 = fmul float %196, %202 %205 = fmul float %197, %202 %206 = bitcast float %112 to i32 %207 = bitcast float %113 to i32 %208 = insertelement <2 x i32> undef, i32 %206, i32 0 %209 = insertelement <2 x i32> %208, i32 %207, i32 1 %210 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %209, <32 x i8> %108, <16 x i8> %111, i32 2) %211 = extractelement <4 x float> %210, i32 1 %212 = fsub float 1.000000e+00, %83 %213 = fmul float %211, %83 %214 = fadd float %213, %212 %215 = fmul float %183, %24 %216 = fmul float %184, %25 %217 = fadd float %216, %215 %218 = fmul float %185, %26 %219 = fadd float %217, %218 %220 = call float @llvm.maxnum.f32(float %219, float 0.000000e+00) %221 = fmul float %27, %183 %222 = fmul float %28, %184 %223 = fadd float %221, %222 %224 = fmul float %29, %185 %225 = fadd float %223, %224 %226 = fadd float %225, %30 %227 = fmul float %31, %183 %228 = fmul float %32, %184 %229 = fadd float %227, %228 %230 = fmul float %33, %185 %231 = fadd float %229, %230 %232 = fadd float %231, %34 %233 = fmul float %35, %183 %234 = fmul float %36, %184 %235 = fadd float %233, %234 %236 = fmul float %37, %185 %237 = fadd float %235, %236 %238 = fadd float %237, %38 %239 = fadd float %123, %226 %240 = fadd float %124, %232 %241 = fadd float %125, %238 %242 = fmul float %239, %214 %243 = fmul float %240, %214 %244 = fmul float %241, %214 %245 = fmul float %183, %192 %246 = fmul float %184, %193 %247 = fadd float %246, %245 %248 = fmul float %185, %194 %249 = fadd float %247, %248 %250 = fmul float %249, %183 %251 = fmul float %249, %184 %252 = fmul float %249, %185 %253 = fmul float %250, 2.000000e+00 %254 = fmul float %251, 2.000000e+00 %255 = fmul float %252, 2.000000e+00 %256 = fsub float %192, %253 %257 = fsub float %193, %254 %258 = fsub float %194, %255 %259 = fcmp ogt float %51, 0.000000e+00 br i1 %259, label %IF, label %ENDIF IF: ; preds = %main_body %260 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %261 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %262 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %263 = fmul float %256, %256 %264 = fmul float %257, %257 %265 = fadd float %264, %263 %266 = fmul float %258, %258 %267 = fadd float %265, %266 %268 = call float @llvm.AMDGPU.rsq.clamped.f32(float %267) %269 = fmul float %256, %268 %270 = fmul float %257, %268 %271 = fmul float %258, %268 %272 = fsub float %44, %130 %273 = fsub float %45, %131 %274 = fsub float %46, %132 %275 = fdiv float 1.000000e+00, %269 %276 = fdiv float 1.000000e+00, %270 %277 = fdiv float 1.000000e+00, %271 %278 = fmul float %272, %275 %279 = fmul float %273, %276 %280 = fmul float %274, %277 %281 = fsub float %47, %130 %282 = fsub float %48, %131 %283 = fsub float %49, %132 %284 = fdiv float 1.000000e+00, %269 %285 = fdiv float 1.000000e+00, %270 %286 = fdiv float 1.000000e+00, %271 %287 = fmul float %281, %284 %288 = fmul float %282, %285 %289 = fmul float %283, %286 %290 = fcmp ogt float %269, 0.000000e+00 %291 = fcmp ogt float %270, 0.000000e+00 %292 = fcmp ogt float %271, 0.000000e+00 %. = select i1 %290, float %278, float %287 %temp68.0 = select i1 %291, float %279, float %288 %.100 = select i1 %292, float %280, float %289 %293 = fadd float %44, %47 %294 = fadd float %45, %48 %295 = fadd float %46, %49 %296 = fmul float %293, 5.000000e-01 %297 = fmul float %294, 5.000000e-01 %298 = fmul float %295, 5.000000e-01 %299 = call float @llvm.minnum.f32(float %., float %temp68.0) %300 = call float @llvm.minnum.f32(float %299, float %.100) %301 = fsub float %296, %262 %302 = fsub float %297, %261 %303 = fsub float %298, %260 %304 = fadd float %301, %130 %305 = fadd float %302, %131 %306 = fadd float %303, %132 %307 = fmul float %269, %300 %308 = fadd float %307, %304 %309 = fmul float %270, %300 %310 = fadd float %309, %305 %311 = fmul float %271, %300 %312 = fadd float %311, %306 %313 = fsub float %308, %296 %314 = fsub float %310, %297 %315 = fsub float %312, %298 br label %ENDIF ENDIF: ; preds = %main_body, %IF %temp44.0 = phi float [ %313, %IF ], [ %256, %main_body ] %temp45.0 = phi float [ %314, %IF ], [ %257, %main_body ] %temp46.0 = phi float [ %315, %IF ], [ %258, %main_body ] %316 = fsub float 1.000000e+00, %82 %317 = call float @llvm.pow.f32(float %316, float 7.500000e-01) %318 = fmul float %317, 7.000000e+00 %319 = insertelement <4 x float> undef, float %temp44.0, i32 0 %320 = insertelement <4 x float> %319, float %temp45.0, i32 1 %321 = insertelement <4 x float> %320, float %temp46.0, i32 2 %322 = insertelement <4 x float> %321, float %318, i32 3 %323 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %322) %324 = extractelement <4 x float> %323, i32 0 %325 = extractelement <4 x float> %323, i32 1 %326 = extractelement <4 x float> %323, i32 2 %327 = extractelement <4 x float> %323, i32 3 %328 = call float @llvm.fabs.f32(float %326) %329 = fdiv float 1.000000e+00, %328 %330 = fmul float %324, %329 %331 = fadd float %330, 1.500000e+00 %332 = fmul float %325, %329 %333 = fadd float %332, 1.500000e+00 %334 = bitcast float %333 to i32 %335 = bitcast float %331 to i32 %336 = bitcast float %327 to i32 %337 = bitcast float %318 to i32 %338 = insertelement <4 x i32> undef, i32 %334, i32 0 %339 = insertelement <4 x i32> %338, i32 %335, i32 1 %340 = insertelement <4 x i32> %339, i32 %336, i32 2 %341 = insertelement <4 x i32> %340, i32 %337, i32 3 %342 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %341, <32 x i8> %85, <16 x i8> %87, i32 4) %343 = extractelement <4 x float> %342, i32 0 %344 = extractelement <4 x float> %342, i32 1 %345 = extractelement <4 x float> %342, i32 2 %346 = extractelement <4 x float> %342, i32 3 %347 = call float @llvm.pow.f32(float %346, float %53) %348 = fmul float %52, %347 %349 = fmul float %348, %343 %350 = fmul float %348, %344 %351 = fmul float %348, %345 %352 = fcmp olt float %50, 0x3FEFFFEB00000000 br i1 %352, label %IF86, label %ENDIF85 IF86: ; preds = %ENDIF %353 = fcmp ogt float %63, 0.000000e+00 br i1 %353, label %IF89, label %ENDIF88 ENDIF85: ; preds = %ENDIF, %ENDIF88 %temp32.0 = phi float [ %581, %ENDIF88 ], [ %349, %ENDIF ] %temp33.0 = phi float [ %582, %ENDIF88 ], [ %350, %ENDIF ] %temp34.0 = phi float [ %583, %ENDIF88 ], [ %351, %ENDIF ] %354 = fmul float %temp32.0, %214 %355 = fmul float %temp33.0, %214 %356 = fmul float %temp34.0, %214 %357 = fsub float 1.000000e+00, %82 %358 = fsub float %24, %192 %359 = fsub float %25, %193 %360 = fsub float %26, %194 %361 = fmul float %358, %358 %362 = fmul float %359, %359 %363 = fadd float %362, %361 %364 = fmul float %360, %360 %365 = fadd float %363, %364 %366 = call float @llvm.AMDGPU.rsq.clamped.f32(float %365) %367 = fmul float %358, %366 %368 = fmul float %359, %366 %369 = fmul float %360, %366 %370 = fmul float %192, %183 %371 = fsub float -0.000000e+00, %370 %372 = fmul float %193, %184 %373 = fsub float %371, %372 %374 = fmul float %194, %185 %375 = fsub float %373, %374 %376 = call float @llvm.maxnum.f32(float %375, float 0.000000e+00) %377 = fmul float %24, %367 %378 = fmul float %25, %368 %379 = fadd float %378, %377 %380 = fmul float %26, %369 %381 = fadd float %379, %380 %382 = call float @llvm.maxnum.f32(float %381, float 0.000000e+00) %383 = fmul float %357, %357 %384 = fmul float %383, %75 %385 = fsub float 1.000000e+00, %357 %386 = fmul float %385, 0x3FEEF9DB20000000 %387 = fadd float %386, 0x3F9EB851E0000000 %388 = call float @llvm.log2.f32(float %387) %389 = fdiv float 1.000000e+00, %388 %390 = fmul float %389, 1.000000e+01 %391 = fmul float %390, %390 %392 = fsub float 1.000000e+00, %220 %393 = fsub float 1.000000e+00, %376 %394 = fmul float %382, 2.000000e+00 %395 = fmul float %382, %357 %396 = fmul float %394, %395 %397 = fadd float %396, 5.000000e-01 %398 = fsub float 1.000000e+00, %382 %399 = fsub float 1.000000e+00, %376 %400 = fsub float 1.000000e+00, %202 %401 = fadd float %82, %400 %402 = call float @llvm.AMDIL.clamp.(float %401, float 0.000000e+00, float 1.000000e+00) %403 = fmul float %399, %399 %404 = fmul float %399, %399 %405 = fmul float %404, %399 %406 = fmul float %403, %405 %407 = call float @llvm.AMDGPU.lrp(float %406, float %402, float %198) %408 = call float @llvm.AMDGPU.lrp(float %406, float %402, float %199) %409 = call float @llvm.AMDGPU.lrp(float %406, float %402, float %200) %410 = call float @llvm.AMDGPU.lrp(float %220, float 1.000000e+00, float %384) %411 = call float @llvm.AMDGPU.lrp(float %376, float 1.000000e+00, float %384) %412 = fmul float %410, %411 %413 = fadd float %412, 0x3F1A36E2E0000000 %414 = fdiv float 1.000000e+00, %413 %415 = fmul float %183, %367 %416 = fmul float %184, %368 %417 = fadd float %416, %415 %418 = fmul float %185, %369 %419 = fadd float %417, %418 %420 = call float @llvm.maxnum.f32(float %419, float 0.000000e+00) %421 = call float @llvm.pow.f32(float %420, float %391) %422 = fadd float %391, 1.000000e+00 %423 = fmul float %422, %74 %424 = fmul float %421, %423 %425 = fmul float %414, %424 %426 = fmul float %425, %220 %427 = fmul float %426, %73 %428 = call float @llvm.maxnum.f32(float %427, float 0.000000e+00) %429 = fmul float %428, %70 %430 = fmul float %428, %71 %431 = fmul float %428, %72 %432 = fsub float 1.000000e+00, %198 %433 = fsub float 1.000000e+00, %199 %434 = fsub float 1.000000e+00, %200 %435 = fmul float %398, %398 %436 = fmul float %398, %398 %437 = fmul float %436, %398 %438 = fmul float %435, %437 %439 = fmul float %432, %438 %440 = fadd float %439, %198 %441 = fmul float %433, %438 %442 = fadd float %441, %199 %443 = fmul float %434, %438 %444 = fadd float %443, %200 %445 = fadd float %397, -1.000000e+00 %446 = fmul float %392, %392 %447 = fmul float %392, %392 %448 = fmul float %447, %392 %449 = fmul float %446, %448 %450 = fmul float %445, %449 %451 = fadd float %450, 1.000000e+00 %452 = fadd float %397, -1.000000e+00 %453 = fmul float %393, %393 %454 = fmul float %393, %393 %455 = fmul float %454, %393 %456 = fmul float %453, %455 %457 = fmul float %452, %456 %458 = fadd float %457, 1.000000e+00 %459 = fmul float %451, %458 %460 = fmul float %459, %220 %461 = fmul float %70, %460 %462 = fadd float %461, %242 %463 = fmul float %71, %460 %464 = fadd float %463, %243 %465 = fmul float %72, %460 %466 = fadd float %465, %244 %467 = fmul float %203, %462 %468 = fmul float %204, %464 %469 = fmul float %205, %466 %470 = fmul float %429, %440 %471 = fadd float %470, %467 %472 = fmul float %430, %442 %473 = fadd float %472, %468 %474 = fmul float %431, %444 %475 = fadd float %474, %469 %476 = fmul float %354, %407 %477 = fadd float %476, %471 %478 = fmul float %355, %408 %479 = fadd float %478, %473 %480 = fmul float %356, %409 %481 = fadd float %480, %475 %482 = fmul float %126, %42 %483 = fadd float %482, %43 %484 = call float @llvm.AMDIL.clamp.(float %483, float 0.000000e+00, float 1.000000e+00) %485 = call float @llvm.AMDGPU.lrp(float %484, float %477, float %39) %486 = call float @llvm.AMDGPU.lrp(float %484, float %479, float %40) %487 = call float @llvm.AMDGPU.lrp(float %484, float %481, float %41) %488 = call i32 @llvm.SI.packf16(float %485, float %486) %489 = bitcast i32 %488 to float %490 = call i32 @llvm.SI.packf16(float %487, float %142) %491 = bitcast i32 %490 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %489, float %491, float %489, float %491) ret void IF89: ; preds = %IF86 %492 = fmul float %256, %256 %493 = fmul float %257, %257 %494 = fadd float %493, %492 %495 = fmul float %258, %258 %496 = fadd float %494, %495 %497 = call float @llvm.AMDGPU.rsq.clamped.f32(float %496) %498 = fmul float %256, %497 %499 = fmul float %257, %497 %500 = fmul float %258, %497 %501 = fsub float %54, %130 %502 = fsub float %55, %131 %503 = fsub float %56, %132 %504 = fdiv float 1.000000e+00, %498 %505 = fdiv float 1.000000e+00, %499 %506 = fdiv float 1.000000e+00, %500 %507 = fmul float %501, %504 %508 = fmul float %502, %505 %509 = fmul float %503, %506 %510 = fsub float %57, %130 %511 = fsub float %58, %131 %512 = fsub float %59, %132 %513 = fdiv float 1.000000e+00, %498 %514 = fdiv float 1.000000e+00, %499 %515 = fdiv float 1.000000e+00, %500 %516 = fmul float %510, %513 %517 = fmul float %511, %514 %518 = fmul float %512, %515 %519 = fcmp ogt float %498, 0.000000e+00 %520 = fcmp ogt float %499, 0.000000e+00 %521 = fcmp ogt float %500, 0.000000e+00 %.101 = select i1 %519, float %507, float %516 %temp68.1 = select i1 %520, float %508, float %517 %.102 = select i1 %521, float %509, float %518 %522 = fadd float %54, %57 %523 = fadd float %55, %58 %524 = fadd float %56, %59 %525 = fmul float %522, 5.000000e-01 %526 = fmul float %523, 5.000000e-01 %527 = fmul float %524, 5.000000e-01 %528 = call float @llvm.minnum.f32(float %.101, float %temp68.1) %529 = call float @llvm.minnum.f32(float %528, float %.102) %530 = fsub float %525, %60 %531 = fsub float %526, %61 %532 = fsub float %527, %62 %533 = fadd float %530, %130 %534 = fadd float %531, %131 %535 = fadd float %532, %132 %536 = fmul float %498, %529 %537 = fadd float %536, %533 %538 = fmul float %499, %529 %539 = fadd float %538, %534 %540 = fmul float %500, %529 %541 = fadd float %540, %535 %542 = fsub float %537, %525 %543 = fsub float %539, %526 %544 = fsub float %541, %527 br label %ENDIF88 ENDIF88: ; preds = %IF86, %IF89 %temp48.0 = phi float [ %542, %IF89 ], [ %256, %IF86 ] %temp49.0 = phi float [ %543, %IF89 ], [ %257, %IF86 ] %temp50.0 = phi float [ %544, %IF89 ], [ %258, %IF86 ] %545 = fsub float 1.000000e+00, %82 %546 = call float @llvm.pow.f32(float %545, float 7.500000e-01) %547 = fmul float %546, 7.000000e+00 %548 = insertelement <4 x float> undef, float %temp48.0, i32 0 %549 = insertelement <4 x float> %548, float %temp49.0, i32 1 %550 = insertelement <4 x float> %549, float %temp50.0, i32 2 %551 = insertelement <4 x float> %550, float %547, i32 3 %552 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %551) %553 = extractelement <4 x float> %552, i32 0 %554 = extractelement <4 x float> %552, i32 1 %555 = extractelement <4 x float> %552, i32 2 %556 = extractelement <4 x float> %552, i32 3 %557 = call float @llvm.fabs.f32(float %555) %558 = fdiv float 1.000000e+00, %557 %559 = fmul float %553, %558 %560 = fadd float %559, 1.500000e+00 %561 = fmul float %554, %558 %562 = fadd float %561, 1.500000e+00 %563 = bitcast float %562 to i32 %564 = bitcast float %560 to i32 %565 = bitcast float %556 to i32 %566 = bitcast float %547 to i32 %567 = insertelement <4 x i32> undef, i32 %563, i32 0 %568 = insertelement <4 x i32> %567, i32 %564, i32 1 %569 = insertelement <4 x i32> %568, i32 %565, i32 2 %570 = insertelement <4 x i32> %569, i32 %566, i32 3 %571 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %570, <32 x i8> %90, <16 x i8> %93, i32 4) %572 = extractelement <4 x float> %571, i32 0 %573 = extractelement <4 x float> %571, i32 1 %574 = extractelement <4 x float> %571, i32 2 %575 = extractelement <4 x float> %571, i32 3 %576 = call float @llvm.pow.f32(float %575, float %65) %577 = fmul float %64, %576 %578 = fmul float %577, %572 %579 = fmul float %577, %573 %580 = fmul float %577, %574 %581 = call float @llvm.AMDGPU.lrp(float %50, float %349, float %578) %582 = call float @llvm.AMDGPU.lrp(float %50, float %350, float %579) %583 = call float @llvm.AMDGPU.lrp(float %50, float %351, float %580) br label %ENDIF85 } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v17, v0, 0, 0, [m0] ; C8440000 v_interp_p2_f32 v17, [v17], v1, 0, 0, [m0] ; C8450001 v_interp_p1_f32 v18, v0, 1, 0, [m0] ; C8480100 v_interp_p2_f32 v18, [v18], v1, 1, 0, [m0] ; C8490101 v_interp_p1_f32 v2, v0, 0, 1, [m0] ; C8080400 v_interp_p2_f32 v2, [v2], v1, 0, 1, [m0] ; C8090401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600 v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601 v_interp_p1_f32 v9, v0, 0, 2, [m0] ; C8240800 v_interp_p2_f32 v9, [v9], v1, 0, 2, [m0] ; C8250801 v_interp_p1_f32 v10, v0, 1, 2, [m0] ; C8280900 v_interp_p2_f32 v10, [v10], v1, 1, 2, [m0] ; C8290901 v_interp_p1_f32 v11, v0, 2, 2, [m0] ; C82C0A00 v_interp_p2_f32 v11, [v11], v1, 2, 2, [m0] ; C82D0A01 v_interp_p1_f32 v12, v0, 0, 3, [m0] ; C8300C00 v_interp_p2_f32 v12, [v12], v1, 0, 3, [m0] ; C8310C01 v_interp_p1_f32 v19, v0, 1, 3, [m0] ; C84C0D00 v_interp_p2_f32 v19, [v19], v1, 1, 3, [m0] ; C84D0D01 v_interp_p1_f32 v20, v0, 2, 3, [m0] ; C8500E00 v_interp_p2_f32 v20, [v20], v1, 2, 3, [m0] ; C8510E01 v_interp_p1_f32 v3, v0, 0, 4, [m0] ; C80C1000 v_interp_p2_f32 v3, [v3], v1, 0, 4, [m0] ; C80D1001 v_interp_p1_f32 v4, v0, 1, 4, [m0] ; C8101100 v_interp_p2_f32 v4, [v4], v1, 1, 4, [m0] ; C8111101 v_interp_p1_f32 v6, v0, 2, 4, [m0] ; C8181200 v_interp_p2_f32 v6, [v6], v1, 2, 4, [m0] ; C8191201 v_interp_p1_f32 v5, v0, 0, 5, [m0] ; C8141400 v_interp_p2_f32 v5, [v5], v1, 0, 5, [m0] ; C8151401 v_interp_p1_f32 v22, v0, 1, 5, [m0] ; C8581500 v_interp_p2_f32 v22, [v22], v1, 1, 5, [m0] ; C8591501 v_interp_p1_f32 v26, v0, 2, 5, [m0] ; C8681600 v_interp_p2_f32 v26, [v26], v1, 2, 5, [m0] ; C8691601 v_interp_p1_f32 v27, v0, 3, 5, [m0] ; C86C1700 v_interp_p2_f32 v27, [v27], v1, 3, 5, [m0] ; C86D1701 s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300 v_interp_p1_f32 v23, v0, 0, 6, [m0] ; C85C1800 s_load_dwordx4 s[0:3], s[4:5], 0x8 ; C0800508 s_load_dwordx4 s[12:15], s[4:5], 0xc ; C086050C s_load_dwordx8 s[24:31], s[6:7], 0x10 ; C0CC0710 s_load_dwordx8 s[36:43], s[6:7], 0x18 ; C0D20718 v_interp_p2_f32 v23, [v23], v1, 0, 6, [m0] ; C85D1801 v_interp_p1_f32 v21, v0, 1, 6, [m0] ; C8541900 v_interp_p2_f32 v21, [v21], v1, 1, 6, [m0] ; C8551901 v_interp_p1_f32 v24, v0, 2, 6, [m0] ; C8601A00 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s44, s[8:11], 0x58 ; C2160958 v_interp_p2_f32 v24, [v24], v1, 2, 6, [m0] ; C8611A01 s_load_dwordx8 s[16:23], s[6:7], 0x20 ; C0C80720 s_load_dwordx4 s[32:35], s[4:5], 0x10 ; C0900510 image_sample v[13:16], 15, 0, 0, 0, 0, 0, 0, 0, v[17:18], s[24:31], s[0:3] ; F0800F00 00060D11 image_sample v[0:1], 10, 0, 0, 0, 0, 0, 0, 0, v[17:18], s[36:43], s[12:15] ; F0800A00 00690011 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mad_f32 v1, 2.0, v1, -1.0 ; D2820001 03CE02F4 v_mad_f32 v0, 2.0, v0, -1.0 ; D2820000 03CE00F4 s_buffer_load_dword s24, s[8:11], 0x5c ; C20C095C s_buffer_load_dword s0, s[8:11], 0x60 ; C2000960 v_mul_f32_e32 v1, s44, v1 ; 1002022C v_mul_f32_e32 v0, s44, v0 ; 1000002C v_mul_f32_e32 v2, v2, v1 ; 10040302 v_mac_f32_e32 v2, v9, v0 ; 3E040109 v_mul_f32_e32 v7, v7, v1 ; 100E0307 v_mac_f32_e32 v7, v10, v0 ; 3E0E010A v_mul_f32_e32 v10, v8, v1 ; 10140308 v_mac_f32_e32 v10, v11, v0 ; 3E14010B v_mul_f32_e32 v0, v0, v0 ; 10000100 v_mac_f32_e32 v0, v1, v1 ; 3E000301 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_sub_f32_e32 v0, 1.0, v0 ; 080000F2 v_sqrt_f32_e32 v0, v0 ; 7E006700 v_mac_f32_e32 v2, v12, v0 ; 3E04010C v_mac_f32_e32 v7, v19, v0 ; 3E0E0113 v_mac_f32_e32 v10, v20, v0 ; 3E140114 v_mul_f32_e32 v0, v2, v2 ; 10000502 v_mac_f32_e32 v0, v7, v7 ; 3E000F07 v_mac_f32_e32 v0, v10, v10 ; 3E00150A v_rsq_clamp_f32_e32 v0, v0 ; 7E005900 v_mul_f32_e32 v1, v22, v22 ; 10022D16 v_mac_f32_e32 v1, v26, v26 ; 3E02351A v_mac_f32_e32 v1, v27, v27 ; 3E02371B v_rsq_clamp_f32_e32 v1, v1 ; 7E025901 v_mul_f32_e32 v9, v0, v2 ; 10120500 v_mul_f32_e32 v8, v0, v7 ; 10100F00 v_mul_f32_e32 v7, v0, v10 ; 100E1500 v_mul_f32_e32 v12, v1, v22 ; 10182D01 v_mul_f32_e32 v11, v1, v26 ; 10163501 v_mul_f32_e32 v0, v12, v9 ; 1000130C v_mac_f32_e32 v0, v11, v8 ; 3E00110B v_mul_f32_e32 v10, v1, v27 ; 10143701 v_mac_f32_e32 v0, v10, v7 ; 3E000F0A v_mul_f32_e32 v2, v9, v0 ; 10040109 v_mac_f32_e32 v2, v9, v0 ; 3E040109 v_mul_f32_e32 v19, v8, v0 ; 10260108 v_mac_f32_e32 v19, v8, v0 ; 3E260108 v_mad_f32 v25, v22, v1, -v2 ; D2820019 840A0316 v_mad_f32 v26, v26, v1, -v19 ; D282001A 844E031A v_mul_f32_e32 v2, v7, v0 ; 10040107 v_mac_f32_e32 v2, v7, v0 ; 3E040107 v_mad_f32 v27, v27, v1, -v2 ; D282001B 840A031B s_buffer_load_dword s1, s[8:11], 0x40 ; C2008940 s_buffer_load_dword s2, s[8:11], 0x41 ; C2010941 s_buffer_load_dword s3, s[8:11], 0x42 ; C2018942 s_buffer_load_dword s13, s[8:11], 0x4c ; C206894C s_buffer_load_dword s14, s[8:11], 0x4d ; C207094D s_buffer_load_dword s15, s[8:11], 0x4e ; C207894E s_buffer_load_dword s12, s[8:11], 0x27 ; C2060927 s_buffer_load_dword s25, s[8:11], 0x2b ; C20C892B s_buffer_load_dword s30, s[8:11], 0x2c ; C20F092C s_buffer_load_dword s31, s[8:11], 0x2d ; C20F892D s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e64 v0, 1.0, s24 ; D2080000 000030F2 v_mul_f32_e32 v2, s1, v0 ; 10040001 v_mul_f32_e32 v1, s2, v0 ; 10020002 v_mul_f32_e32 v0, s3, v0 ; 10000003 v_mul_f32_e32 v22, s13, v13 ; 102C1A0D v_mac_f32_e32 v2, s24, v22 ; 3E042C18 v_mul_f32_e32 v13, s14, v14 ; 101A1C0E v_mov_b32_e32 v28, v25 ; 7E380319 v_mac_f32_e32 v1, s24, v13 ; 3E021A18 v_mov_b32_e32 v29, v26 ; 7E3A031A v_mul_f32_e32 v14, s15, v15 ; 101C1E0F v_mac_f32_e32 v0, s24, v14 ; 3E001C18 v_mov_b32_e32 v30, v27 ; 7E3C031B v_cmp_lt_f32_e64 s[2:3], 0, s25 ; D0020002 00003280 image_sample v[17:20], 15, 0, 0, 0, 0, 0, 0, 0, v[17:18], s[16:23], s[32:35] ; F0800F00 01041111 s_waitcnt vmcnt(0) ; BF8C0770 s_and_saveexec_b64 s[14:15], s[2:3] ; BE8E2402 s_xor_b64 s[14:15], exec, s[14:15] ; 898E0E7E s_cbranch_execz BB0_2 ; BF880000 s_buffer_load_dword s1, s[8:11], 0x20 ; C2008920 s_buffer_load_dword s2, s[8:11], 0x21 ; C2010921 s_buffer_load_dword s3, s[8:11], 0x22 ; C2018922 s_buffer_load_dword s13, s[8:11], 0x24 ; C2068924 s_buffer_load_dword s16, s[8:11], 0x25 ; C2080925 v_mul_f32_e32 v15, v25, v25 ; 101E3319 v_mac_f32_e32 v15, v26, v26 ; 3E1E351A v_mac_f32_e32 v15, v27, v27 ; 3E1E371B v_rsq_clamp_f32_e32 v15, v15 ; 7E1E590F s_buffer_load_dword s17, s[8:11], 0x26 ; C2088926 s_buffer_load_dword s18, s[8:11], 0x28 ; C2090928 s_buffer_load_dword s19, s[8:11], 0x29 ; C2098929 s_buffer_load_dword s20, s[8:11], 0x2a ; C20A092A v_mul_f32_e32 v17, v15, v25 ; 1022330F v_mul_f32_e32 v19, v15, v26 ; 1026350F v_mul_f32_e32 v15, v15, v27 ; 101E370F v_rcp_f32_e32 v20, v17 ; 7E285511 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v28, s1, v23 ; 08382E01 v_sub_f32_e32 v29, s2, v21 ; 083A2A02 v_rcp_f32_e32 v30, v19 ; 7E3C5513 v_mul_f32_e32 v28, v20, v28 ; 10383914 v_sub_f32_e32 v31, s13, v23 ; 083E2E0D v_mul_f32_e32 v20, v20, v31 ; 10283F14 v_cmp_lt_f32_e32 vcc, 0, v17 ; 7C022280 v_cndmask_b32_e32 v20, v20, v28 ; 00283914 v_rcp_f32_e32 v28, v15 ; 7E38550F v_mul_f32_e32 v29, v30, v29 ; 103A3B1E v_sub_f32_e32 v31, s16, v21 ; 083E2A10 v_mul_f32_e32 v30, v30, v31 ; 103C3F1E v_cmp_lt_f32_e32 vcc, 0, v19 ; 7C022680 v_cndmask_b32_e32 v29, v30, v29 ; 003A3B1E v_sub_f32_e32 v30, s3, v24 ; 083C3003 v_mul_f32_e32 v30, v28, v30 ; 103C3D1C v_sub_f32_e32 v31, s17, v24 ; 083E3011 v_mul_f32_e32 v28, v28, v31 ; 10383F1C v_cmp_lt_f32_e32 vcc, 0, v15 ; 7C021E80 v_cndmask_b32_e32 v28, v28, v30 ; 00383D1C v_min3_f32 v20, v20, v29, v28 ; D2A20014 04723B14 v_mov_b32_e32 v28, s13 ; 7E38020D v_add_f32_e32 v28, s1, v28 ; 06383801 v_mov_b32_e32 v29, s16 ; 7E3A0210 v_add_f32_e32 v29, s2, v29 ; 063A3A02 v_mov_b32_e32 v30, s17 ; 7E3C0211 v_add_f32_e32 v30, s3, v30 ; 063C3C03 v_mad_f32 v31, 0.5, v28, -s18 ; D282001F 804A38F0 v_add_f32_e32 v31, v23, v31 ; 063E3F17 v_mac_f32_e32 v31, v20, v17 ; 3E3E2314 v_mad_f32 v17, 0.5, v29, -s19 ; D2820011 804E3AF0 v_add_f32_e32 v17, v21, v17 ; 06222315 v_mac_f32_e32 v17, v20, v19 ; 3E222714 v_mad_f32 v19, 0.5, v30, -s20 ; D2820013 80523CF0 v_add_f32_e32 v19, v24, v19 ; 06262718 v_mac_f32_e32 v19, v20, v15 ; 3E261F14 v_mad_f32 v28, 0.5, -v28, v31 ; D282001C 447E38F0 v_mad_f32 v29, 0.5, -v29, v17 ; D282001D 44463AF0 v_mad_f32 v30, 0.5, -v30, v19 ; D282001E 444E3CF0 s_or_b64 exec, exec, s[14:15] ; 88FE0E7E s_buffer_load_dword s17, s[8:11], 0x17 ; C2088917 s_buffer_load_dword s18, s[8:11], 0x43 ; C2090943 s_buffer_load_dword s19, s[8:11], 0x4f ; C209894F s_buffer_load_dword s16, s[8:11], 0x68 ; C2080968 s_buffer_load_dword s1, s[8:11], 0x0 ; C2008900 s_buffer_load_dword s2, s[8:11], 0x1 ; C2010901 s_buffer_load_dword s3, s[8:11], 0x2 ; C2018902 s_buffer_load_dword s20, s[8:11], 0x4 ; C20A0904 s_buffer_load_dword s21, s[8:11], 0x5 ; C20A8905 s_buffer_load_dword s22, s[8:11], 0x6 ; C20B0906 s_buffer_load_dword s23, s[8:11], 0x7 ; C20B8907 s_buffer_load_dword s13, s[8:11], 0x8 ; C2068908 s_buffer_load_dword s15, s[8:11], 0x9 ; C2078909 s_buffer_load_dword s14, s[8:11], 0xa ; C207090A v_sub_f32_e64 v15, 1.0, s0 ; D208000F 000000F2 v_log_f32_e32 v15, v15 ; 7E1E4F0F v_mul_legacy_f32_e32 v15, 0x3f400000, v15 ; 0E1E1EFF 3F400000 v_exp_f32_e32 v15, v15 ; 7E1E4B0F v_mul_f32_e32 v31, 0x40e00000, v15 ; 103E1EFF 40E00000 v_cubeid_f32 v35, v28, v29, v30 ; D2880023 047A3B1C v_cubema_f32 v34, v28, v29, v30 ; D28E0022 047A3B1C s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500 s_load_dwordx8 s[36:43], s[6:7], 0x0 ; C0D20700 v_cubesc_f32 v33, v28, v29, v30 ; D28A0021 047A3B1C v_cubetc_f32 v32, v28, v29, v30 ; D28C0020 047A3B1C v_rcp_f32_e64 v15, |v34| ; D354010F 00000122 v_mov_b32_e32 v28, 0x3fc00000 ; 7E3802FF 3FC00000 v_mad_f32 v29, v15, v32, v28 ; D282001D 0472410F v_mac_f32_e32 v28, v15, v33 ; 3E38430F v_mov_b32_e32 v30, v35 ; 7E3C0323 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[28:31], 15, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[36:43], s[32:35] ; F0900F00 01091C1C s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v15, v31 ; 7E1E4F1F s_buffer_load_dword s28, s[8:11], 0xb ; C20E090B s_buffer_load_dword s27, s[8:11], 0xc ; C20D890C s_buffer_load_dword s29, s[8:11], 0xd ; C20E890D s_buffer_load_dword s26, s[8:11], 0xe ; C20D090E s_buffer_load_dword s25, s[8:11], 0xf ; C20C890F v_mul_legacy_f32_e32 v15, s31, v15 ; 0E1E1E1F v_exp_f32_e32 v15, v15 ; 7E1E4B0F v_mul_f32_e32 v15, s30, v15 ; 101E1E1E v_mul_f32_e32 v19, v28, v15 ; 10261F1C v_mul_f32_e32 v17, v29, v15 ; 10221F1D v_mul_f32_e32 v15, v30, v15 ; 101E1F1E v_mov_b32_e32 v20, s24 ; 7E280218 v_mov_b32_e32 v28, 0x3f7fff58 ; 7E3802FF 3F7FFF58 v_cmp_lt_f32_e32 vcc, s12, v28 ; 7C02380C s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[30:31], vcc ; BE9E246A s_xor_b64 s[30:31], exec, s[30:31] ; 899E1E7E s_cbranch_execz BB0_6 ; BF880000 s_buffer_load_dword s33, s[8:11], 0x3b ; C210893B s_buffer_load_dword s24, s[8:11], 0x3c ; C20C093C s_buffer_load_dword s32, s[8:11], 0x3d ; C210093D s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_lt_f32_e64 s[34:35], 0, s33 ; D0020022 00004280 s_and_saveexec_b64 s[34:35], s[34:35] ; BEA22422 s_xor_b64 s[34:35], exec, s[34:35] ; 89A2227E s_cbranch_execz BB0_7 ; BF880000 s_buffer_load_dword s33, s[8:11], 0x36 ; C2108936 s_buffer_load_dword s36, s[8:11], 0x38 ; C2120938 s_buffer_load_dword s37, s[8:11], 0x39 ; C2128939 s_buffer_load_dword s38, s[8:11], 0x3a ; C213093A s_buffer_load_dword s39, s[8:11], 0x30 ; C2138930 s_buffer_load_dword s40, s[8:11], 0x31 ; C2140931 s_buffer_load_dword s41, s[8:11], 0x32 ; C2148932 s_buffer_load_dword s42, s[8:11], 0x34 ; C2150934 s_buffer_load_dword s43, s[8:11], 0x35 ; C2158935 v_mul_f32_e32 v28, v25, v25 ; 10383319 v_mac_f32_e32 v28, v26, v26 ; 3E38351A v_mac_f32_e32 v28, v27, v27 ; 3E38371B v_rsq_clamp_f32_e32 v28, v28 ; 7E38591C s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v29, s33, v24 ; 083A3021 v_mov_b32_e32 v30, s33 ; 7E3C0221 v_sub_f32_e32 v31, s39, v23 ; 083E2E27 v_sub_f32_e32 v32, s40, v21 ; 08402A28 v_add_f32_e32 v30, s41, v30 ; 063C3C29 v_sub_f32_e32 v33, s41, v24 ; 08423029 v_mad_f32 v34, 0.5, v30, -s38 ; D2820022 809A3CF0 v_add_f32_e32 v24, v24, v34 ; 06304518 v_mul_f32_e32 v25, v28, v25 ; 1032331C v_mul_f32_e32 v26, v28, v26 ; 1034351C v_mul_f32_e32 v27, v28, v27 ; 1036371C v_rcp_f32_e32 v28, v25 ; 7E385519 v_rcp_f32_e32 v34, v26 ; 7E44551A v_rcp_f32_e32 v35, v27 ; 7E46551B v_sub_f32_e32 v36, s42, v23 ; 08482E2A v_mov_b32_e32 v37, s42 ; 7E4A022A v_add_f32_e32 v37, s39, v37 ; 064A4A27 v_mul_f32_e32 v31, v28, v31 ; 103E3F1C v_mul_f32_e32 v28, v28, v36 ; 1038491C v_mul_f32_e32 v32, v34, v32 ; 10404122 v_mul_f32_e32 v33, v35, v33 ; 10424323 v_mul_f32_e32 v29, v35, v29 ; 103A3B23 v_mad_f32 v35, 0.5, v37, -s36 ; D2820023 80924AF0 v_add_f32_e32 v23, v23, v35 ; 062E4717 v_sub_f32_e32 v35, s43, v21 ; 08462A2B v_mov_b32_e32 v36, s43 ; 7E48022B v_mul_f32_e32 v34, v34, v35 ; 10444722 v_add_f32_e32 v35, s40, v36 ; 06464828 v_cmp_lt_f32_e32 vcc, 0, v25 ; 7C023280 v_cndmask_b32_e32 v28, v28, v31 ; 00383F1C v_cmp_lt_f32_e32 vcc, 0, v26 ; 7C023480 v_cndmask_b32_e32 v31, v34, v32 ; 003E4122 v_cmp_lt_f32_e32 vcc, 0, v27 ; 7C023680 v_cndmask_b32_e32 v29, v29, v33 ; 003A431D v_min3_f32 v28, v28, v31, v29 ; D2A2001C 04763F1C v_mad_f32 v29, 0.5, v35, -s37 ; D282001D 809646F0 v_add_f32_e32 v21, v21, v29 ; 062A3B15 v_mac_f32_e32 v23, v28, v25 ; 3E2E331C v_mac_f32_e32 v21, v28, v26 ; 3E2A351C v_mac_f32_e32 v24, v28, v27 ; 3E30371C v_mad_f32 v25, 0.5, -v37, v23 ; D2820019 445E4AF0 v_mad_f32 v26, 0.5, -v35, v21 ; D282001A 445646F0 v_mad_f32 v27, 0.5, -v30, v24 ; D282001B 44623CF0 s_or_b64 exec, exec, s[34:35] ; 88FE227E v_sub_f32_e64 v21, 1.0, s0 ; D2080015 000000F2 v_log_f32_e32 v21, v21 ; 7E2A4F15 s_load_dwordx4 s[36:39], s[4:5], 0x4 ; C0920504 v_mul_legacy_f32_e32 v21, 0x3f400000, v21 ; 0E2A2AFF 3F400000 v_exp_f32_e32 v21, v21 ; 7E2A4B15 v_mul_f32_e32 v28, 0x40e00000, v21 ; 10382AFF 40E00000 v_cubeid_f32 v32, v25, v26, v27 ; D2880020 046E3519 v_cubema_f32 v31, v25, v26, v27 ; D28E001F 046E3519 s_load_dwordx8 s[40:47], s[6:7], 0x8 ; C0D40708 v_cubesc_f32 v30, v25, v26, v27 ; D28A001E 046E3519 v_cubetc_f32 v29, v25, v26, v27 ; D28C001D 046E3519 v_rcp_f32_e64 v21, |v31| ; D3540115 0000011F v_mov_b32_e32 v25, 0x3fc00000 ; 7E3202FF 3FC00000 v_mad_f32 v26, v21, v29, v25 ; D282001A 04663B15 v_mac_f32_e32 v25, v21, v30 ; 3E323D15 v_mov_b32_e32 v27, v32 ; 7E360320 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[23:26], 15, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[40:47], s[36:39] ; F0900F00 012A1719 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v21, v26 ; 7E2A4F1A v_sub_f32_e64 v26, 1.0, s12 ; D208001A 000018F2 v_mul_legacy_f32_e32 v21, s32, v21 ; 0E2A2A20 v_exp_f32_e32 v21, v21 ; 7E2A4B15 v_mul_f32_e32 v21, s24, v21 ; 102A2A18 v_mul_f32_e32 v23, v23, v21 ; 102E2B17 v_mul_f32_e32 v24, v24, v21 ; 10302B18 v_mul_f32_e32 v21, v25, v21 ; 102A2B19 v_mul_f32_e32 v23, v23, v26 ; 102E3517 v_mul_f32_e32 v24, v24, v26 ; 10303518 v_mul_f32_e32 v21, v21, v26 ; 102A3515 v_mac_f32_e32 v23, s12, v19 ; 3E2E260C v_mac_f32_e32 v24, s12, v17 ; 3E30220C v_mac_f32_e32 v21, s12, v15 ; 3E2A1E0C v_mov_b32_e32 v15, v21 ; 7E1E0315 v_mov_b32_e32 v17, v24 ; 7E220318 v_mov_b32_e32 v19, v23 ; 7E260317 s_or_b64 exec, exec, s[30:31] ; 88FE1E7E v_mul_f32_e32 v16, s19, v16 ; 10202013 v_mad_f32 v23, -v20, s18, s18 ; D2820017 20482514 v_mov_b32_e32 v20, s17 ; 7E280211 v_mul_f32_e32 v21, v23, v22 ; 102A2D17 v_mul_f32_e32 v22, s21, v8 ; 102C1015 v_mac_f32_e32 v22, s20, v9 ; 3E2C1214 v_mac_f32_e32 v22, s22, v7 ; 3E2C0E16 v_add_f32_e32 v24, s23, v22 ; 06302C17 v_sub_f32_e64 v22, 1.0, s16 ; D2080016 000020F2 v_mac_f32_e32 v22, s16, v18 ; 3E2C2410 s_buffer_load_dword s6, s[8:11], 0x10 ; C2030910 s_buffer_load_dword s5, s[8:11], 0x11 ; C2028911 s_buffer_load_dword s4, s[8:11], 0x12 ; C2020912 s_buffer_load_dword s19, s[8:11], 0x16 ; C2098916 s_buffer_load_dword s16, s[8:11], 0x44 ; C2080944 s_buffer_load_dword s7, s[8:11], 0x45 ; C2038945 s_buffer_load_dword s12, s[8:11], 0x46 ; C2060946 s_buffer_load_dword s17, s[8:11], 0x48 ; C2088948 s_buffer_load_dword s18, s[8:11], 0x49 ; C2090949 s_buffer_load_dword s8, s[8:11], 0x4b ; C204094B v_mul_f32_e32 v18, s15, v8 ; 1024100F v_mac_f32_e32 v18, s13, v9 ; 3E24120D v_mac_f32_e32 v18, s14, v7 ; 3E240E0E v_add_f32_e32 v18, s28, v18 ; 0624241C v_mul_f32_e32 v25, s29, v8 ; 1032101D v_mac_f32_e32 v25, s27, v9 ; 3E32121B v_mac_f32_e32 v25, s26, v7 ; 3E320E1A v_add_f32_e32 v25, s25, v25 ; 06323219 v_add_f32_e32 v24, v24, v3 ; 06300718 v_add_f32_e32 v26, v18, v4 ; 06340912 v_add_f32_e32 v25, v25, v6 ; 06320D19 v_mul_f32_e32 v4, v23, v13 ; 10081B17 v_mul_f32_e32 v3, v23, v14 ; 10061D17 v_mul_f32_e32 v6, s1, v9 ; 100C1201 v_mac_f32_e32 v6, s2, v8 ; 3E0C1002 v_mac_f32_e32 v6, s3, v7 ; 3E0C0E03 v_max_f32_e32 v18, 0, v6 ; 20240C80 v_mul_f32_e32 v14, v22, v24 ; 101C3116 v_mul_f32_e32 v6, v22, v26 ; 100C3516 v_mul_f32_e32 v13, v22, v25 ; 101A3316 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v20, s19, v5 ; 3E280A13 v_mul_f32_e32 v5, v22, v19 ; 100A2716 v_mul_f32_e32 v17, v22, v17 ; 10222316 v_mul_f32_e32 v15, v22, v15 ; 101E1F16 v_sub_f32_e32 v19, 1.0, v23 ; 08262EF2 v_add_f32_e32 v19, s0, v19 ; 06262600 v_add_f32_e64 v19, 0, v19 clamp ; D2060813 00022680 v_sub_f32_e32 v22, s1, v12 ; 082C1801 v_sub_f32_e32 v23, s2, v11 ; 082E1602 v_mul_f32_e32 v24, v22, v22 ; 10302D16 v_mac_f32_e32 v24, v23, v23 ; 3E302F17 v_sub_f32_e32 v25, s3, v10 ; 08321403 v_mac_f32_e32 v24, v25, v25 ; 3E303319 v_rsq_clamp_f32_e32 v24, v24 ; 7E305918 v_mul_f32_e32 v22, v24, v22 ; 102C2D18 v_mul_f32_e32 v23, v24, v23 ; 102E2F18 v_mul_f32_e32 v24, v24, v25 ; 10303318 v_mul_f32_e32 v12, v12, v9 ; 1018130C v_mad_f32 v11, -v11, v8, -v12 ; D282000B A432110B v_mul_f32_e32 v9, v22, v9 ; 10121316 v_mac_f32_e32 v9, v23, v8 ; 3E121117 v_mul_f32_e32 v8, s1, v22 ; 10102C01 v_mac_f32_e32 v8, s2, v23 ; 3E102E02 v_mad_f32 v10, -v10, v7, v11 ; D282000A 242E0F0A v_mac_f32_e32 v8, s3, v24 ; 3E103003 v_mac_f32_e32 v9, v24, v7 ; 3E120F18 v_max_f32_e32 v7, 0, v8 ; 200E1080 v_sub_f32_e32 v8, 1.0, v7 ; 08100EF2 v_mul_f32_e32 v11, v8, v8 ; 10161108 v_mul_f32_e32 v8, v8, v11 ; 10101708 v_mul_f32_e32 v8, v8, v11 ; 10101708 v_max_f32_e32 v10, 0, v10 ; 20141480 v_sub_f32_e32 v11, 1.0, v10 ; 081614F2 v_mul_f32_e32 v12, v11, v11 ; 1018170B v_mul_f32_e32 v22, v11, v12 ; 102C190B v_mad_f32 v23, -v12, v22, 1.0 ; D2820017 23CA2D0C v_mul_f32_e32 v24, v2, v23 ; 10302F02 v_sub_f32_e32 v25, 1.0, v2 ; 083204F2 v_mac_f32_e32 v2, v8, v25 ; 3E043308 v_mul_f32_e32 v25, v1, v23 ; 10322F01 v_sub_f32_e32 v26, 1.0, v1 ; 083402F2 v_mac_f32_e32 v1, v8, v26 ; 3E023508 v_mul_f32_e32 v23, v0, v23 ; 102E2F00 v_sub_f32_e32 v26, 1.0, v0 ; 083400F2 v_mac_f32_e32 v0, v8, v26 ; 3E003508 v_sub_f32_e64 v8, 1.0, s0 ; D2080008 000000F2 v_sub_f32_e32 v26, 1.0, v8 ; 083410F2 v_mov_b32_e32 v27, 0x3cf5c28f ; 7E3602FF 3CF5C28F v_madmk_f32_e32 v26, v26, v27, 0x3f77ced9 ; 4034371A 3F77CED9 v_add_f32_e32 v27, v7, v7 ; 06360F07 v_mul_f32_e32 v7, v8, v7 ; 100E0F08 v_mad_f32 v7, v27, v7, 0.5 ; D2820007 03C20F1B v_mul_f32_e32 v12, v22, v12 ; 10181916 v_mac_f32_e32 v24, v19, v12 ; 3E301913 v_mac_f32_e32 v25, v19, v12 ; 3E321913 v_mac_f32_e32 v23, v19, v12 ; 3E2E1913 v_mul_f32_e32 v8, v8, v8 ; 10101108 v_log_f32_e32 v19, v26 ; 7E264F1A v_mul_f32_e32 v8, s8, v8 ; 10101008 v_mul_f32_e32 v11, v8, v11 ; 10161708 v_mac_f32_e32 v11, 1.0, v10 ; 3E1614F2 v_rcp_f32_e32 v10, v19 ; 7E145513 v_sub_f32_e32 v19, 1.0, v18 ; 082624F2 v_mul_f32_e32 v8, v8, v19 ; 10102708 v_mac_f32_e32 v8, 1.0, v18 ; 3E1024F2 v_max_f32_e32 v9, 0, v9 ; 20121280 v_log_f32_e32 v9, v9 ; 7E124F09 v_madak_f32_e32 v8, v8, v11, 0x38d1b717 ; 42101708 38D1B717 v_mul_f32_e32 v10, 0x41200000, v10 ; 101414FF 41200000 v_mul_f32_e32 v11, v10, v10 ; 1016150A v_mul_legacy_f32_e32 v9, v11, v9 ; 0E12130B v_rcp_f32_e32 v8, v8 ; 7E105508 v_mad_f32 v10, v10, v10, 1.0 ; D282000A 03CA150A v_mul_f32_e32 v10, s18, v10 ; 10141412 v_exp_f32_e32 v9, v9 ; 7E124B09 v_mul_f32_e32 v9, v10, v9 ; 1012130A v_mul_f32_e32 v8, v9, v8 ; 10101109 v_mul_f32_e32 v9, v19, v19 ; 10122713 v_mul_f32_e32 v10, v19, v9 ; 10141313 v_mul_f32_e32 v9, v10, v9 ; 1012130A v_mul_f32_e32 v8, v18, v8 ; 10101112 v_mul_f32_e32 v8, s17, v8 ; 10101011 v_add_f32_e32 v7, -1.0, v7 ; 060E0EF3 v_mad_f32 v9, v7, v9, 1.0 ; D2820009 03CA1307 v_mad_f32 v7, v7, v12, 1.0 ; D2820007 03CA1907 v_mul_f32_e32 v7, v7, v9 ; 100E1307 v_mul_f32_e32 v7, v18, v7 ; 100E0F12 v_mac_f32_e32 v14, s16, v7 ; 3E1C0E10 v_mul_f32_e32 v9, v14, v21 ; 10122B0E v_max_f32_e32 v8, 0, v8 ; 20101080 v_mul_f32_e32 v10, s16, v8 ; 10141010 v_mac_f32_e32 v9, v2, v10 ; 3E121502 v_mac_f32_e32 v6, s7, v7 ; 3E0C0E07 v_mac_f32_e32 v13, s12, v7 ; 3E1A0E0C v_mul_f32_e32 v2, s7, v8 ; 10041007 v_mul_f32_e32 v7, s12, v8 ; 100E100C v_mul_f32_e32 v4, v6, v4 ; 10080906 v_mul_f32_e32 v3, v13, v3 ; 1006070D v_mac_f32_e32 v4, v1, v2 ; 3E080501 v_mac_f32_e32 v3, v0, v7 ; 3E060F00 v_mac_f32_e32 v9, v24, v5 ; 3E120B18 v_mac_f32_e32 v4, v25, v17 ; 3E082319 v_mac_f32_e32 v3, v23, v15 ; 3E061F17 v_add_f32_e64 v0, 0, v20 clamp ; D2060800 00022880 v_sub_f32_e32 v1, 1.0, v0 ; 080200F2 v_mul_f32_e32 v2, s6, v1 ; 10040206 v_mac_f32_e32 v2, v9, v0 ; 3E040109 v_mul_f32_e32 v5, s5, v1 ; 100A0205 v_mac_f32_e32 v5, v4, v0 ; 3E0A0104 v_mul_f32_e32 v1, s4, v1 ; 10020204 v_mac_f32_e32 v1, v3, v0 ; 3E020103 v_cvt_pkrtz_f16_f32_e32 v0, v2, v5 ; 5E000B02 v_cvt_pkrtz_f16_f32_e32 v1, v1, v16 ; 5E022101 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 56 VGPRS: 40 Code Size: 2272 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL CONST[0..3] DCL TEMP[0], LOCAL 0: MUL TEMP[0], CONST[0], IN[0].xxxx 1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0] 4: MOV OUT[1], IN[1].xyxy 5: MOV OUT[0], TEMP[0] 6: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = fmul float %13, %33 %44 = fmul float %14, %33 %45 = fmul float %15, %33 %46 = fmul float %16, %33 %47 = fmul float %17, %34 %48 = fadd float %47, %43 %49 = fmul float %18, %34 %50 = fadd float %49, %44 %51 = fmul float %19, %34 %52 = fadd float %51, %45 %53 = fmul float %20, %34 %54 = fadd float %53, %46 %55 = fmul float %21, %35 %56 = fadd float %55, %48 %57 = fmul float %22, %35 %58 = fadd float %57, %50 %59 = fmul float %23, %35 %60 = fadd float %59, %52 %61 = fmul float %24, %35 %62 = fadd float %61, %54 %63 = fmul float %25, %36 %64 = fadd float %63, %56 %65 = fmul float %26, %36 %66 = fadd float %65, %58 %67 = fmul float %27, %36 %68 = fadd float %67, %60 %69 = fmul float %28, %36 %70 = fadd float %69, %62 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float %41, float %42) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %64, float %66, float %68, float %70) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[8:11], 0 idxen ; E00C2000 80020500 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s4, v1 ; 10000204 v_mac_f32_e32 v0, s8, v2 ; 3E000408 v_mul_f32_e32 v7, s5, v1 ; 100E0205 v_mac_f32_e32 v7, s9, v2 ; 3E0E0409 v_mul_f32_e32 v8, s6, v1 ; 10100206 v_mac_f32_e32 v8, s10, v2 ; 3E10040A v_mul_f32_e32 v1, s7, v1 ; 10020207 v_mac_f32_e32 v1, s11, v2 ; 3E02040B v_mac_f32_e32 v0, s12, v3 ; 3E00060C v_mac_f32_e32 v7, s13, v3 ; 3E0E060D v_mac_f32_e32 v8, s14, v3 ; 3E10060E v_mac_f32_e32 v1, s15, v3 ; 3E02060F v_mac_f32_e32 v0, s16, v4 ; 3E000810 v_mac_f32_e32 v7, s17, v4 ; 3E0E0811 v_mac_f32_e32 v8, s18, v4 ; 3E100812 v_mac_f32_e32 v1, s0, v4 ; 3E020800 exp 15, 32, 0, 0, 0, v5, v6, v5, v6 ; F800020F 06050605 exp 15, 12, 0, 1, 0, v0, v7, v8, v1 ; F80008CF 01080700 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 192 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL CONST[2] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[1], 2D 2: MUL TEMP[0], TEMP[0], CONST[2].xxxx 3: ADD TEMP[0], IMM[0].xxxx, -TEMP[0] 4: MOV TEMP[1].xy, IN[0].zwww 5: TEX TEMP[1], TEMP[1], SAMP[0], 2D 6: ADD TEMP[1], IMM[0].xxxx, -TEMP[1] 7: MUL TEMP[0], TEMP[0], TEMP[1] 8: ADD TEMP[0], IMM[0].xxxx, -TEMP[0] 9: MOV OUT[0], TEMP[0] 10: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %25 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %26 = load <32 x i8>, <32 x i8> addrspace(2)* %25, align 32, !tbaa !0 %27 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %28 = load <16 x i8>, <16 x i8> addrspace(2)* %27, align 16, !tbaa !0 %29 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %30 = bitcast <8 x i32> addrspace(2)* %29 to <32 x i8> addrspace(2)* %31 = load <32 x i8>, <32 x i8> addrspace(2)* %30, align 32, !tbaa !0 %32 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %33 = bitcast <4 x i32> addrspace(2)* %32 to <16 x i8> addrspace(2)* %34 = load <16 x i8>, <16 x i8> addrspace(2)* %33, align 16, !tbaa !0 %35 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %36 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %37 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %38 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %39 = bitcast float %35 to i32 %40 = bitcast float %36 to i32 %41 = insertelement <2 x i32> undef, i32 %39, i32 0 %42 = insertelement <2 x i32> %41, i32 %40, i32 1 %43 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %42, <32 x i8> %31, <16 x i8> %34, i32 2) %44 = extractelement <4 x float> %43, i32 0 %45 = extractelement <4 x float> %43, i32 1 %46 = extractelement <4 x float> %43, i32 2 %47 = extractelement <4 x float> %43, i32 3 %48 = fmul float %44, %24 %49 = fmul float %45, %24 %50 = fmul float %46, %24 %51 = fmul float %47, %24 %52 = fsub float 1.000000e+00, %48 %53 = fsub float 1.000000e+00, %49 %54 = fsub float 1.000000e+00, %50 %55 = fsub float 1.000000e+00, %51 %56 = bitcast float %37 to i32 %57 = bitcast float %38 to i32 %58 = insertelement <2 x i32> undef, i32 %56, i32 0 %59 = insertelement <2 x i32> %58, i32 %57, i32 1 %60 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %59, <32 x i8> %26, <16 x i8> %28, i32 2) %61 = extractelement <4 x float> %60, i32 0 %62 = extractelement <4 x float> %60, i32 1 %63 = extractelement <4 x float> %60, i32 2 %64 = extractelement <4 x float> %60, i32 3 %65 = fsub float 1.000000e+00, %61 %66 = fsub float 1.000000e+00, %62 %67 = fsub float 1.000000e+00, %63 %68 = fsub float 1.000000e+00, %64 %69 = fmul float %52, %65 %70 = fmul float %53, %66 %71 = fmul float %54, %67 %72 = fmul float %55, %68 %73 = fsub float 1.000000e+00, %69 %74 = fsub float 1.000000e+00, %70 %75 = fsub float 1.000000e+00, %71 %76 = fsub float 1.000000e+00, %72 %77 = call i32 @llvm.SI.packf16(float %73, float %74) %78 = bitcast i32 %77 to float %79 = call i32 @llvm.SI.packf16(float %75, float %76) %80 = bitcast i32 %79 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %78, float %80, float %78, float %80) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500 s_load_dwordx4 s[16:19], s[4:5], 0x4 ; C0880504 s_mov_b32 m0, s9 ; BEFC0309 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s0, s[0:3], 0x8 ; C2000108 s_load_dwordx8 s[20:27], s[6:7], 0x8 ; C0CA0708 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[20:27], s[16:19] ; F0800F00 00850002 image_sample v[4:7], 15, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[4:11], s[12:15] ; F0800F00 00610404 s_waitcnt vmcnt(1) ; BF8C0771 v_mad_f32 v0, -v0, s0, 1.0 ; D2820000 23C80100 v_mad_f32 v1, -v1, s0, 1.0 ; D2820001 23C80101 v_mad_f32 v2, -v2, s0, 1.0 ; D2820002 23C80102 v_mad_f32 v3, -v3, s0, 1.0 ; D2820003 23C80103 s_waitcnt vmcnt(0) ; BF8C0770 v_sub_f32_e32 v4, 1.0, v4 ; 080808F2 v_sub_f32_e32 v5, 1.0, v5 ; 080A0AF2 v_sub_f32_e32 v6, 1.0, v6 ; 080C0CF2 v_sub_f32_e32 v7, 1.0, v7 ; 080E0EF2 v_mad_f32 v0, -v0, v4, 1.0 ; D2820000 23CA0900 v_mad_f32 v1, -v1, v5, 1.0 ; D2820001 23CA0B01 v_mad_f32 v2, -v2, v6, 1.0 ; D2820002 23CA0D02 v_mad_f32 v3, -v3, v7, 1.0 ; D2820003 23CA0F03 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 8 Code Size: 196 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..25] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 1.0000, 2.0000, 0.0000, 0.5000} 0: MUL TEMP[0].xyz, IN[0].xyzz, CONST[1].xyzz 1: MUL TEMP[1], CONST[2], TEMP[0].xxxx 2: MAD TEMP[1], CONST[3], TEMP[0].yyyy, TEMP[1] 3: MAD TEMP[1].xyz, CONST[4], TEMP[0].zzzz, TEMP[1] 4: LRP TEMP[0].xyz, IN[1].wwww, TEMP[1].xyzz, TEMP[0].xyzz 5: DP3 TEMP[1].x, CONST[6].xyzz, TEMP[0].xyzz 6: ADD TEMP[1].x, TEMP[1].xxxx, CONST[6].wwww 7: MUL TEMP[1].xyz, TEMP[1].xxxx, CONST[6].xyzz 8: ADD TEMP[1].xyz, TEMP[0].xyzz, -TEMP[1].xyzz 9: LRP TEMP[0].xyz, CONST[7].xxxx, TEMP[0].xyzz, TEMP[1].xyzz 10: MOV TEMP[1].w, CONST[9].xxxx 11: MUL TEMP[2], CONST[21], IMM[0].yyyy 12: MUL TEMP[1].xyz, CONST[12].xyzz, CONST[8].xxxx 13: DP4 TEMP[3].x, IN[3], TEMP[1] 14: MAX TEMP[3].x, IMM[0].zzzz, TEMP[3].xxxx 15: ADD TEMP[3].x, TEMP[3].xxxx, CONST[10].xxxx 16: MAD TEMP[2], CONST[16], TEMP[3].xxxx, TEMP[2] 17: MUL TEMP[1].xyz, CONST[13].xyzz, CONST[8].xxxx 18: DP4 TEMP[3].x, IN[3], TEMP[1] 19: MAX TEMP[3].x, IMM[0].zzzz, TEMP[3].xxxx 20: ADD TEMP[3].x, TEMP[3].xxxx, CONST[10].xxxx 21: MAD TEMP[2], CONST[17], TEMP[3].xxxx, TEMP[2] 22: MUL TEMP[1].xyz, CONST[14].xyzz, CONST[8].xxxx 23: DP4 TEMP[3].x, IN[3], TEMP[1] 24: MAX TEMP[3].x, IMM[0].zzzz, TEMP[3].xxxx 25: ADD TEMP[3].x, TEMP[3].xxxx, CONST[10].xxxx 26: MAD TEMP[2], CONST[18], TEMP[3].xxxx, TEMP[2] 27: MUL TEMP[1].xyz, CONST[15].xyzz, CONST[8].xxxx 28: DP4 TEMP[1].x, IN[3], TEMP[1] 29: MAX TEMP[1].x, IMM[0].zzzz, TEMP[1].xxxx 30: ADD TEMP[1].x, TEMP[1].xxxx, CONST[10].xxxx 31: MAD TEMP[2], CONST[19], TEMP[1].xxxx, TEMP[2] 32: MUL TEMP[1], TEMP[2], CONST[11] 33: MUL TEMP[1].xyz, TEMP[1], CONST[0] 34: MOV TEMP[1].xyz, TEMP[1].xyzx 35: MUL TEMP[2].x, IMM[0].wwww, CONST[20].xxxx 36: MOV TEMP[1].w, TEMP[2].xxxx 37: MUL TEMP[2], CONST[22], TEMP[0].xxxx 38: MAD TEMP[2], CONST[23], TEMP[0].yyyy, TEMP[2] 39: MAD TEMP[0], CONST[24], TEMP[0].zzzz, TEMP[2] 40: ADD TEMP[0], TEMP[0], CONST[25] 41: MOV OUT[1], IN[2] 42: MOV OUT[2], TEMP[1] 43: MOV OUT[0], TEMP[0] 44: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312) %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320) %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 336) %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 340) %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 344) %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 352) %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 356) %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 360) %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 364) %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 368) %72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 372) %73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 376) %74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 380) %75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 384) %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 388) %77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 392) %78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 396) %79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 400) %80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 404) %81 = call float @llvm.SI.load.const(<16 x i8> %12, i32 408) %82 = call float @llvm.SI.load.const(<16 x i8> %12, i32 412) %83 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %84 = load <16 x i8>, <16 x i8> addrspace(2)* %83, align 16, !tbaa !0 %85 = add i32 %5, %7 %86 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %84, i32 0, i32 %85) %87 = extractelement <4 x float> %86, i32 0 %88 = extractelement <4 x float> %86, i32 1 %89 = extractelement <4 x float> %86, i32 2 %90 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %91 = load <16 x i8>, <16 x i8> addrspace(2)* %90, align 16, !tbaa !0 %92 = add i32 %5, %7 %93 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %91, i32 0, i32 %92) %94 = extractelement <4 x float> %93, i32 3 %95 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %96 = load <16 x i8>, <16 x i8> addrspace(2)* %95, align 16, !tbaa !0 %97 = add i32 %5, %7 %98 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %96, i32 0, i32 %97) %99 = extractelement <4 x float> %98, i32 0 %100 = extractelement <4 x float> %98, i32 1 %101 = extractelement <4 x float> %98, i32 2 %102 = extractelement <4 x float> %98, i32 3 %103 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %104 = load <16 x i8>, <16 x i8> addrspace(2)* %103, align 16, !tbaa !0 %105 = add i32 %5, %7 %106 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %104, i32 0, i32 %105) %107 = extractelement <4 x float> %106, i32 0 %108 = extractelement <4 x float> %106, i32 1 %109 = extractelement <4 x float> %106, i32 2 %110 = extractelement <4 x float> %106, i32 3 %111 = fmul float %87, %16 %112 = fmul float %88, %17 %113 = fmul float %89, %18 %114 = fmul float %19, %111 %115 = fmul float %20, %111 %116 = fmul float %21, %111 %117 = fmul float %22, %112 %118 = fadd float %117, %114 %119 = fmul float %23, %112 %120 = fadd float %119, %115 %121 = fmul float %24, %112 %122 = fadd float %121, %116 %123 = fmul float %25, %113 %124 = fadd float %123, %118 %125 = fmul float %26, %113 %126 = fadd float %125, %120 %127 = fmul float %27, %113 %128 = fadd float %127, %122 %129 = call float @llvm.AMDGPU.lrp(float %94, float %124, float %111) %130 = call float @llvm.AMDGPU.lrp(float %94, float %126, float %112) %131 = call float @llvm.AMDGPU.lrp(float %94, float %128, float %113) %132 = fmul float %28, %129 %133 = fmul float %29, %130 %134 = fadd float %133, %132 %135 = fmul float %30, %131 %136 = fadd float %134, %135 %137 = fadd float %136, %31 %138 = fmul float %137, %28 %139 = fmul float %137, %29 %140 = fmul float %137, %30 %141 = fsub float %129, %138 %142 = fsub float %130, %139 %143 = fsub float %131, %140 %144 = call float @llvm.AMDGPU.lrp(float %32, float %129, float %141) %145 = call float @llvm.AMDGPU.lrp(float %32, float %130, float %142) %146 = call float @llvm.AMDGPU.lrp(float %32, float %131, float %143) %147 = fmul float %64, 2.000000e+00 %148 = fmul float %65, 2.000000e+00 %149 = fmul float %66, 2.000000e+00 %150 = fmul float %39, %33 %151 = fmul float %40, %33 %152 = fmul float %41, %33 %153 = fmul float %107, %150 %154 = fmul float %108, %151 %155 = fadd float %153, %154 %156 = fmul float %109, %152 %157 = fadd float %155, %156 %158 = fmul float %110, %34 %159 = fadd float %157, %158 %160 = call float @llvm.maxnum.f32(float %159, float 0.000000e+00) %161 = fadd float %160, %35 %162 = fmul float %51, %161 %163 = fadd float %162, %147 %164 = fmul float %52, %161 %165 = fadd float %164, %148 %166 = fmul float %53, %161 %167 = fadd float %166, %149 %168 = fmul float %42, %33 %169 = fmul float %43, %33 %170 = fmul float %44, %33 %171 = fmul float %107, %168 %172 = fmul float %108, %169 %173 = fadd float %171, %172 %174 = fmul float %109, %170 %175 = fadd float %173, %174 %176 = fmul float %110, %34 %177 = fadd float %175, %176 %178 = call float @llvm.maxnum.f32(float %177, float 0.000000e+00) %179 = fadd float %178, %35 %180 = fmul float %54, %179 %181 = fadd float %180, %163 %182 = fmul float %55, %179 %183 = fadd float %182, %165 %184 = fmul float %56, %179 %185 = fadd float %184, %167 %186 = fmul float %45, %33 %187 = fmul float %46, %33 %188 = fmul float %47, %33 %189 = fmul float %107, %186 %190 = fmul float %108, %187 %191 = fadd float %189, %190 %192 = fmul float %109, %188 %193 = fadd float %191, %192 %194 = fmul float %110, %34 %195 = fadd float %193, %194 %196 = call float @llvm.maxnum.f32(float %195, float 0.000000e+00) %197 = fadd float %196, %35 %198 = fmul float %57, %197 %199 = fadd float %198, %181 %200 = fmul float %58, %197 %201 = fadd float %200, %183 %202 = fmul float %59, %197 %203 = fadd float %202, %185 %204 = fmul float %48, %33 %205 = fmul float %49, %33 %206 = fmul float %50, %33 %207 = fmul float %107, %204 %208 = fmul float %108, %205 %209 = fadd float %207, %208 %210 = fmul float %109, %206 %211 = fadd float %209, %210 %212 = fmul float %110, %34 %213 = fadd float %211, %212 %214 = call float @llvm.maxnum.f32(float %213, float 0.000000e+00) %215 = fadd float %214, %35 %216 = fmul float %60, %215 %217 = fadd float %216, %199 %218 = fmul float %61, %215 %219 = fadd float %218, %201 %220 = fmul float %62, %215 %221 = fadd float %220, %203 %222 = fmul float %217, %36 %223 = fmul float %219, %37 %224 = fmul float %221, %38 %225 = fmul float %222, %13 %226 = fmul float %223, %14 %227 = fmul float %224, %15 %228 = fmul float %63, 5.000000e-01 %229 = fmul float %67, %144 %230 = fmul float %68, %144 %231 = fmul float %69, %144 %232 = fmul float %70, %144 %233 = fmul float %71, %145 %234 = fadd float %233, %229 %235 = fmul float %72, %145 %236 = fadd float %235, %230 %237 = fmul float %73, %145 %238 = fadd float %237, %231 %239 = fmul float %74, %145 %240 = fadd float %239, %232 %241 = fmul float %75, %146 %242 = fadd float %241, %234 %243 = fmul float %76, %146 %244 = fadd float %243, %236 %245 = fmul float %77, %146 %246 = fadd float %245, %238 %247 = fmul float %78, %146 %248 = fadd float %247, %240 %249 = fadd float %242, %79 %250 = fadd float %244, %80 %251 = fadd float %246, %81 %252 = fadd float %248, %82 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %99, float %100, float %101, float %102) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %225, float %226, float %227, float %228) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %249, float %250, float %251, float %252) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 s_load_dwordx4 s[8:11], s[8:9], 0xc ; C084090C s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[4:7], v0, s[12:15], 0 idxen ; E00C2000 80030400 buffer_load_format_xyzw v[8:11], v0, s[16:19], 0 idxen ; E00C2000 80040800 buffer_load_format_xyzw v[12:15], v0, s[8:11], 0 idxen ; E00C2000 80020C00 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x4 ; C2038104 s_buffer_load_dword s8, s[0:3], 0x5 ; C2040105 s_buffer_load_dword s9, s[0:3], 0x6 ; C2048106 s_buffer_load_dword s10, s[0:3], 0x8 ; C2050108 s_buffer_load_dword s11, s[0:3], 0x9 ; C2058109 s_buffer_load_dword s12, s[0:3], 0xa ; C206010A s_buffer_load_dword s13, s[0:3], 0xc ; C206810C s_buffer_load_dword s14, s[0:3], 0xd ; C207010D s_buffer_load_dword s15, s[0:3], 0xe ; C207810E s_buffer_load_dword s16, s[0:3], 0x10 ; C2080110 s_buffer_load_dword s17, s[0:3], 0x11 ; C2088111 s_buffer_load_dword s18, s[0:3], 0x12 ; C2090112 s_buffer_load_dword s19, s[0:3], 0x18 ; C2098118 s_buffer_load_dword s20, s[0:3], 0x19 ; C20A0119 s_buffer_load_dword s21, s[0:3], 0x1a ; C20A811A s_buffer_load_dword s22, s[0:3], 0x1b ; C20B011B s_buffer_load_dword s23, s[0:3], 0x1c ; C20B811C s_buffer_load_dword s24, s[0:3], 0x20 ; C20C0120 s_buffer_load_dword s25, s[0:3], 0x24 ; C20C8124 s_buffer_load_dword s26, s[0:3], 0x28 ; C20D0128 s_buffer_load_dword s27, s[0:3], 0x2c ; C20D812C s_buffer_load_dword s28, s[0:3], 0x2d ; C20E012D s_buffer_load_dword s29, s[0:3], 0x2e ; C20E812E s_buffer_load_dword s30, s[0:3], 0x30 ; C20F0130 s_buffer_load_dword s31, s[0:3], 0x31 ; C20F8131 s_buffer_load_dword s32, s[0:3], 0x32 ; C2100132 s_buffer_load_dword s33, s[0:3], 0x34 ; C2108134 s_buffer_load_dword s34, s[0:3], 0x35 ; C2110135 s_buffer_load_dword s35, s[0:3], 0x36 ; C2118136 s_buffer_load_dword s36, s[0:3], 0x38 ; C2120138 s_buffer_load_dword s37, s[0:3], 0x39 ; C2128139 s_buffer_load_dword s38, s[0:3], 0x3a ; C213013A s_buffer_load_dword s39, s[0:3], 0x3c ; C213813C s_buffer_load_dword s40, s[0:3], 0x3d ; C214013D s_buffer_load_dword s41, s[0:3], 0x3e ; C214813E s_buffer_load_dword s42, s[0:3], 0x40 ; C2150140 s_buffer_load_dword s43, s[0:3], 0x41 ; C2158141 s_buffer_load_dword s44, s[0:3], 0x42 ; C2160142 s_buffer_load_dword s45, s[0:3], 0x44 ; C2168144 s_buffer_load_dword s46, s[0:3], 0x45 ; C2170145 s_buffer_load_dword s47, s[0:3], 0x46 ; C2178146 s_buffer_load_dword s48, s[0:3], 0x48 ; C2180148 s_buffer_load_dword s49, s[0:3], 0x49 ; C2188149 s_buffer_load_dword s50, s[0:3], 0x4a ; C219014A s_buffer_load_dword s51, s[0:3], 0x4c ; C219814C s_buffer_load_dword s52, s[0:3], 0x4d ; C21A014D s_buffer_load_dword s53, s[0:3], 0x4e ; C21A814E s_buffer_load_dword s54, s[0:3], 0x50 ; C21B0150 s_buffer_load_dword s55, s[0:3], 0x54 ; C21B8154 s_buffer_load_dword s56, s[0:3], 0x55 ; C21C0155 s_buffer_load_dword s57, s[0:3], 0x56 ; C21C8156 s_buffer_load_dword s58, s[0:3], 0x58 ; C21D0158 s_buffer_load_dword s59, s[0:3], 0x59 ; C21D8159 s_buffer_load_dword s60, s[0:3], 0x5a ; C21E015A s_buffer_load_dword s61, s[0:3], 0x5b ; C21E815B s_buffer_load_dword s62, s[0:3], 0x5c ; C21F015C s_buffer_load_dword s63, s[0:3], 0x5d ; C21F815D s_buffer_load_dword s64, s[0:3], 0x5e ; C220015E s_buffer_load_dword s65, s[0:3], 0x5f ; C220815F s_buffer_load_dword s66, s[0:3], 0x60 ; C2210160 s_buffer_load_dword s67, s[0:3], 0x61 ; C2218161 s_buffer_load_dword s68, s[0:3], 0x62 ; C2220162 s_buffer_load_dword s69, s[0:3], 0x63 ; C2228163 s_buffer_load_dword s70, s[0:3], 0x64 ; C2230164 s_buffer_load_dword s71, s[0:3], 0x65 ; C2238165 s_buffer_load_dword s72, s[0:3], 0x66 ; C2240166 s_buffer_load_dword s0, s[0:3], 0x67 ; C2000167 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s7, v1 ; 10000207 v_mul_f32_e32 v1, s8, v2 ; 10020408 v_mul_f32_e32 v2, s9, v3 ; 10040609 v_mov_b32_e32 v3, s24 ; 7E060218 v_mov_b32_e32 v4, s24 ; 7E080218 v_mov_b32_e32 v5, s24 ; 7E0A0218 v_mov_b32_e32 v6, s24 ; 7E0C0218 v_mov_b32_e32 v16, s24 ; 7E200218 v_mov_b32_e32 v17, s24 ; 7E220218 v_mov_b32_e32 v18, s24 ; 7E240218 v_mov_b32_e32 v19, s24 ; 7E260218 v_mov_b32_e32 v20, s24 ; 7E280218 v_mov_b32_e32 v21, s24 ; 7E2A0218 v_mov_b32_e32 v22, s24 ; 7E2C0218 v_mov_b32_e32 v23, s24 ; 7E2E0218 v_mul_f32_e32 v23, s30, v23 ; 102E2E1E v_mul_f32_e32 v3, s31, v3 ; 1006061F v_mul_f32_e32 v4, s32, v4 ; 10080820 v_mul_f32_e32 v5, s33, v5 ; 100A0A21 v_mul_f32_e32 v6, s34, v6 ; 100C0C22 v_mul_f32_e32 v16, s35, v16 ; 10202023 v_mul_f32_e32 v17, s36, v17 ; 10222224 v_mul_f32_e32 v18, s37, v18 ; 10242425 v_mul_f32_e32 v19, s38, v19 ; 10262626 v_mul_f32_e32 v20, s39, v20 ; 10282827 v_mul_f32_e32 v21, s40, v21 ; 102A2A28 v_mul_f32_e32 v22, s41, v22 ; 102C2C29 v_mul_f32_e32 v3, v3, v13 ; 10061B03 v_mac_f32_e32 v3, v23, v12 ; 3E061917 v_mul_f32_e32 v6, v6, v13 ; 100C1B06 v_mac_f32_e32 v6, v5, v12 ; 3E0C1905 v_mul_f32_e32 v5, v18, v13 ; 100A1B12 v_mac_f32_e32 v5, v17, v12 ; 3E0A1911 v_mul_f32_e32 v13, v21, v13 ; 101A1B15 v_mac_f32_e32 v13, v20, v12 ; 3E1A1914 v_mac_f32_e32 v3, v4, v14 ; 3E061D04 v_mac_f32_e32 v6, v16, v14 ; 3E0C1D10 v_mac_f32_e32 v5, v19, v14 ; 3E0A1D13 v_mac_f32_e32 v13, v22, v14 ; 3E1A1D16 v_mac_f32_e32 v3, s25, v15 ; 3E061E19 v_mac_f32_e32 v6, s25, v15 ; 3E0C1E19 v_mac_f32_e32 v5, s25, v15 ; 3E0A1E19 v_mac_f32_e32 v13, s25, v15 ; 3E1A1E19 v_mul_f32_e32 v4, s10, v0 ; 1008000A v_mul_f32_e32 v12, s11, v0 ; 1018000B v_mul_f32_e32 v14, s12, v0 ; 101C000C v_mac_f32_e32 v4, s13, v1 ; 3E08020D v_mac_f32_e32 v12, s14, v1 ; 3E18020E v_mac_f32_e32 v14, s15, v1 ; 3E1C020F v_mac_f32_e32 v4, s16, v2 ; 3E080410 v_mac_f32_e32 v12, s17, v2 ; 3E180411 v_mac_f32_e32 v14, s18, v2 ; 3E1C0412 v_sub_f32_e32 v15, 1.0, v7 ; 081E0EF2 v_mul_f32_e32 v0, v0, v15 ; 10001F00 v_mul_f32_e32 v1, v1, v15 ; 10021F01 v_mul_f32_e32 v2, v2, v15 ; 10041F02 v_mac_f32_e32 v0, v4, v7 ; 3E000F04 v_mac_f32_e32 v1, v12, v7 ; 3E020F0C v_mac_f32_e32 v2, v14, v7 ; 3E040F0E v_add_f32_e64 v4, s55, s55 ; D2060004 00006E37 v_max_f32_e32 v3, 0, v3 ; 20060680 v_add_f32_e32 v3, s26, v3 ; 0606061A v_mac_f32_e32 v4, s42, v3 ; 3E08062A v_add_f32_e64 v7, s56, s56 ; D2060007 00007038 v_mac_f32_e32 v7, s43, v3 ; 3E0E062B v_add_f32_e64 v12, s57, s57 ; D206000C 00007239 v_mac_f32_e32 v12, s44, v3 ; 3E18062C v_max_f32_e32 v3, 0, v6 ; 20060C80 v_add_f32_e32 v3, s26, v3 ; 0606061A v_mac_f32_e32 v4, s45, v3 ; 3E08062D v_mac_f32_e32 v7, s46, v3 ; 3E0E062E v_mac_f32_e32 v12, s47, v3 ; 3E18062F v_max_f32_e32 v3, 0, v5 ; 20060A80 v_add_f32_e32 v3, s26, v3 ; 0606061A v_mac_f32_e32 v4, s48, v3 ; 3E080630 v_mac_f32_e32 v7, s49, v3 ; 3E0E0631 v_mac_f32_e32 v12, s50, v3 ; 3E180632 v_max_f32_e32 v3, 0, v13 ; 20061A80 v_add_f32_e32 v3, s26, v3 ; 0606061A v_mac_f32_e32 v4, s51, v3 ; 3E080633 v_mac_f32_e32 v7, s52, v3 ; 3E0E0634 v_mac_f32_e32 v12, s53, v3 ; 3E180635 v_mul_f32_e32 v3, s19, v0 ; 10060013 v_mac_f32_e32 v3, s20, v1 ; 3E060214 v_mac_f32_e32 v3, s21, v2 ; 3E060415 v_add_f32_e32 v3, s22, v3 ; 06060616 v_mad_f32 v5, -v3, s19, v0 ; D2820005 24002703 v_mad_f32 v6, -v3, s20, v1 ; D2820006 24042903 v_mad_f32 v3, -v3, s21, v2 ; D2820003 24082B03 v_sub_f32_e64 v13, 1.0, s23 ; D208000D 00002EF2 v_mul_f32_e32 v5, v5, v13 ; 100A1B05 v_mul_f32_e32 v6, v6, v13 ; 100C1B06 v_mul_f32_e32 v3, v3, v13 ; 10061B03 v_mac_f32_e32 v5, s23, v0 ; 3E0A0017 v_mac_f32_e32 v6, s23, v1 ; 3E0C0217 v_mac_f32_e32 v3, s23, v2 ; 3E060417 v_mul_f32_e32 v0, s58, v5 ; 10000A3A v_mul_f32_e32 v1, s59, v5 ; 10020A3B v_mul_f32_e32 v2, s60, v5 ; 10040A3C v_mul_f32_e32 v5, s61, v5 ; 100A0A3D v_mac_f32_e32 v0, s62, v6 ; 3E000C3E v_mac_f32_e32 v1, s63, v6 ; 3E020C3F v_mac_f32_e32 v2, s64, v6 ; 3E040C40 v_mac_f32_e32 v5, s65, v6 ; 3E0A0C41 v_mul_f32_e32 v4, s27, v4 ; 1008081B v_mul_f32_e32 v6, s28, v7 ; 100C0E1C v_mul_f32_e32 v7, s29, v12 ; 100E181D v_mul_f32_e32 v4, s4, v4 ; 10080804 v_mul_f32_e32 v6, s5, v6 ; 100C0C05 v_mul_f32_e32 v7, s6, v7 ; 100E0E06 v_mac_f32_e32 v0, s66, v3 ; 3E000642 v_mac_f32_e32 v1, s67, v3 ; 3E020643 v_mac_f32_e32 v2, s68, v3 ; 3E040644 v_mul_f32_e64 v12, 0.5, s54 ; D210000C 00006CF0 v_mac_f32_e32 v5, s69, v3 ; 3E0A0645 v_add_f32_e32 v0, s70, v0 ; 06000046 v_add_f32_e32 v1, s71, v1 ; 06020247 v_add_f32_e32 v2, s72, v2 ; 06040448 v_add_f32_e32 v3, s0, v5 ; 06060A00 exp 15, 32, 0, 0, 0, v8, v9, v10, v11 ; F800020F 0B0A0908 exp 15, 33, 0, 0, 0, v4, v6, v7, v12 ; F800021F 0C070604 exp 15, 12, 0, 1, 0, v0, v1, v2, v3 ; F80008CF 03020100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 80 VGPRS: 24 Code Size: 888 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MUL TEMP[1].xyz, TEMP[0].xyzz, IN[1].xyzz 3: FSLT TEMP[0].x, TEMP[0].wwww, CONST[1].xxxx 4: AND TEMP[0].x, TEMP[0].xxxx, IMM[0].xxxx 5: KILL_IF -TEMP[0].xxxx 6: MOV TEMP[1].w, IMM[0].xxxx 7: MOV OUT[0], TEMP[1] 8: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %25 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %26 = load <32 x i8>, <32 x i8> addrspace(2)* %25, align 32, !tbaa !0 %27 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %28 = load <16 x i8>, <16 x i8> addrspace(2)* %27, align 16, !tbaa !0 %29 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %32 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %34 = bitcast float %29 to i32 %35 = bitcast float %30 to i32 %36 = insertelement <2 x i32> undef, i32 %34, i32 0 %37 = insertelement <2 x i32> %36, i32 %35, i32 1 %38 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %37, <32 x i8> %26, <16 x i8> %28, i32 2) %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = extractelement <4 x float> %38, i32 2 %42 = extractelement <4 x float> %38, i32 3 %43 = fmul float %39, %31 %44 = fmul float %40, %32 %45 = fmul float %41, %33 %46 = fcmp olt float %42, %24 %47 = select i1 %46, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %47) %48 = call i32 @llvm.SI.packf16(float %43, float %44) %49 = bitcast i32 %48 to float %50 = call i32 @llvm.SI.packf16(float %45, float 1.000000e+00) %51 = bitcast i32 %50 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %49, float %51, float %49, float %51) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_mov_b32 m0, s9 ; BEFC0309 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s0, s[0:3], 0x4 ; C2000104 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 s_load_dwordx4 s[4:7], s[4:5], 0x0 ; C0820500 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400 v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401 v_interp_p1_f32 v5, v0, 1, 1, [m0] ; C8140500 v_interp_p2_f32 v5, [v5], v1, 1, 1, [m0] ; C8150501 v_interp_p1_f32 v0, v0, 2, 1, [m0] ; C8000600 v_interp_p2_f32 v0, [v0], v1, 2, 1, [m0] ; C8010601 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[6:9], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[4:7] ; F0800F00 00230602 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v1, v4, v6 ; 10020D04 v_mul_f32_e32 v2, v5, v7 ; 10040F05 v_mul_f32_e32 v0, v0, v8 ; 10001100 v_cmp_gt_f32_e32 vcc, s0, v9 ; 7C081200 v_cndmask_b32_e64 v3, 0, -1.0, vcc ; D2000003 01A9E680 v_cmpx_le_f32_e32 vcc, 0, v3 ; 7C260680 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_cvt_pkrtz_f16_f32_e64 v0, v0, 1.0 ; D25E0000 0001E500 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 136 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL CONST[0..21] DCL TEMP[0..7], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, -1.0000, 0.0000} 0: MOV TEMP[0].w, IN[4].wwww 1: MUL TEMP[1].xyz, IN[0].xyzz, CONST[8].xyzz 2: MUL TEMP[2].x, IN[2].wwww, IN[2].wwww 3: MOV TEMP[3].y, IMM[0].xxxx 4: MOV TEMP[3].x, TEMP[2].xxxx 5: MOV TEMP[3].z, TEMP[2].xxxx 6: MUL TEMP[2], CONST[9], TEMP[1].xxxx 7: MAD TEMP[2], CONST[10], TEMP[1].yyyy, TEMP[2] 8: MAD TEMP[2].xyz, CONST[11], TEMP[1].zzzz, TEMP[2] 9: LRP TEMP[1].xyz, TEMP[3].xyzz, TEMP[2].xyzz, TEMP[1].xyzz 10: DP3 TEMP[2].x, CONST[13].xyzz, TEMP[1].xyzz 11: ADD TEMP[2].x, TEMP[2].xxxx, CONST[13].wwww 12: MUL TEMP[2].xyz, TEMP[2].xxxx, CONST[13].xyzz 13: ADD TEMP[2].xyz, TEMP[1].xyzz, -TEMP[2].xyzz 14: LRP TEMP[1].xyz, CONST[14].xxxx, TEMP[1].xyzz, TEMP[2].xyzz 15: MOV TEMP[2].xz, IMM[0].xxxx 16: MOV TEMP[2].y, CONST[15].xxxx 17: MUL TEMP[3].xyz, TEMP[2].zxyy, IN[1].yzxx 18: MAD TEMP[2].xyz, TEMP[2].yzxx, IN[1].zxyy, -TEMP[3].xyzz 19: MUL TEMP[3].xyz, IN[1].zxyy, TEMP[2].yzxx 20: MAD TEMP[3].xyz, IN[1].yzxx, TEMP[2].zxyy, -TEMP[3].xyzz 21: MUL TEMP[4].xyz, IN[1].zxyy, TEMP[3].yzxx 22: MAD TEMP[4].xyz, IN[1].yzxx, TEMP[3].zxyy, -TEMP[4].xyzz 23: DP3 TEMP[2].x, TEMP[4].xyzz, TEMP[2].xyzz 24: FSLT TEMP[2].x, TEMP[2].xxxx, IMM[0].xxxx 25: UIF TEMP[2].xxxx :0 26: MOV TEMP[0].w, IMM[0].zzzz 27: ELSE :0 28: MOV TEMP[0].w, IMM[0].yyyy 29: ENDIF 30: MAD TEMP[2].xy, IN[3].xyyy, CONST[16].xyyy, CONST[16].zwww 31: MAD TEMP[4].xy, IN[3].xyyy, CONST[17].xyyy, CONST[17].zwww 32: MOV TEMP[2].zw, TEMP[4].yyxy 33: MOV TEMP[4].x, CONST[4].xxxx 34: MOV TEMP[4].y, CONST[5].xxxx 35: MOV TEMP[4].z, CONST[6].xxxx 36: MOV TEMP[5].x, CONST[4].yyyy 37: MOV TEMP[5].y, CONST[5].yyyy 38: MOV TEMP[5].z, CONST[6].yyyy 39: MOV TEMP[6].x, CONST[4].zzzz 40: MOV TEMP[6].y, CONST[5].zzzz 41: MOV TEMP[6].z, CONST[6].zzzz 42: MUL TEMP[4].xyz, TEMP[4].xyzz, IN[1].xxxx 43: MAD TEMP[4].xyz, TEMP[5].xyzz, IN[1].yyyy, TEMP[4].xyzz 44: MAD TEMP[4].xyz, TEMP[6].xyzz, IN[1].zzzz, TEMP[4].xyzz 45: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz 46: RSQ TEMP[5].x, TEMP[5].xxxx 47: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx 48: MUL TEMP[5].xyz, CONST[0].xyzz, TEMP[3].xxxx 49: MAD TEMP[5].xyz, CONST[1].xyzz, TEMP[3].yyyy, TEMP[5].xyzz 50: MAD TEMP[3].xyz, CONST[2].xyzz, TEMP[3].zzzz, TEMP[5].xyzz 51: DP3 TEMP[5].x, TEMP[3].xyzz, TEMP[3].xyzz 52: RSQ TEMP[5].x, TEMP[5].xxxx 53: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[5].xxxx 54: MUL TEMP[5].xyz, TEMP[4].zxyy, TEMP[3].yzxx 55: MAD TEMP[5].xyz, TEMP[4].yzxx, TEMP[3].zxyy, -TEMP[5].xyzz 56: MUL TEMP[0].xyz, TEMP[5].xyzz, TEMP[0].wwww 57: MOV TEMP[5].x, TEMP[3].xxxx 58: MOV TEMP[5].y, TEMP[0].xxxx 59: MOV TEMP[5].z, TEMP[4].xxxx 60: MOV TEMP[6].y, TEMP[0].yyyy 61: MOV TEMP[6].z, TEMP[4].yyyy 62: MOV TEMP[7].x, TEMP[3].zzzz 63: MOV TEMP[7].y, TEMP[0].zzzz 64: MUL TEMP[0], CONST[18], TEMP[1].xxxx 65: MAD TEMP[0], CONST[19], TEMP[1].yyyy, TEMP[0] 66: MAD TEMP[0], CONST[20], TEMP[1].zzzz, TEMP[0] 67: ADD TEMP[0], TEMP[0], CONST[21] 68: MOV TEMP[1].xyz, TEMP[5].xyzx 69: MOV TEMP[1].w, TEMP[3].yyyy 70: MOV TEMP[3].xy, TEMP[6].yzyy 71: MOV TEMP[3].zw, TEMP[7].yyxy 72: MOV TEMP[4].x, TEMP[4].zzzz 73: MOV OUT[4], TEMP[4] 74: MOV OUT[1], TEMP[2] 75: MOV OUT[3], TEMP[3] 76: MOV OUT[0], TEMP[0] 77: MOV OUT[2], TEMP[1] 78: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 268) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308) %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312) %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316) %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320) %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 324) %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 328) %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 332) %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 336) %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 340) %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 344) %72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 348) %73 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %74 = load <16 x i8>, <16 x i8> addrspace(2)* %73, align 16, !tbaa !0 %75 = add i32 %5, %7 %76 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %74, i32 0, i32 %75) %77 = extractelement <4 x float> %76, i32 0 %78 = extractelement <4 x float> %76, i32 1 %79 = extractelement <4 x float> %76, i32 2 %80 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %81 = load <16 x i8>, <16 x i8> addrspace(2)* %80, align 16, !tbaa !0 %82 = add i32 %5, %7 %83 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %81, i32 0, i32 %82) %84 = extractelement <4 x float> %83, i32 0 %85 = extractelement <4 x float> %83, i32 1 %86 = extractelement <4 x float> %83, i32 2 %87 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %88 = load <16 x i8>, <16 x i8> addrspace(2)* %87, align 16, !tbaa !0 %89 = add i32 %5, %7 %90 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %88, i32 0, i32 %89) %91 = extractelement <4 x float> %90, i32 3 %92 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %93 = load <16 x i8>, <16 x i8> addrspace(2)* %92, align 16, !tbaa !0 %94 = add i32 %5, %7 %95 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %93, i32 0, i32 %94) %96 = extractelement <4 x float> %95, i32 0 %97 = extractelement <4 x float> %95, i32 1 %98 = fmul float %77, %31 %99 = fmul float %78, %32 %100 = fmul float %79, %33 %101 = fmul float %91, %91 %102 = fmul float %34, %98 %103 = fmul float %35, %98 %104 = fmul float %36, %98 %105 = fmul float %37, %99 %106 = fadd float %105, %102 %107 = fmul float %38, %99 %108 = fadd float %107, %103 %109 = fmul float %39, %99 %110 = fadd float %109, %104 %111 = fmul float %40, %100 %112 = fadd float %111, %106 %113 = fmul float %41, %100 %114 = fadd float %113, %108 %115 = fmul float %42, %100 %116 = fadd float %115, %110 %117 = call float @llvm.AMDGPU.lrp(float %101, float %112, float %98) %118 = call float @llvm.AMDGPU.lrp(float 0.000000e+00, float %114, float %99) %119 = call float @llvm.AMDGPU.lrp(float %101, float %116, float %100) %120 = fmul float %43, %117 %121 = fmul float %44, %118 %122 = fadd float %121, %120 %123 = fmul float %45, %119 %124 = fadd float %122, %123 %125 = fadd float %124, %46 %126 = fmul float %125, %43 %127 = fmul float %125, %44 %128 = fmul float %125, %45 %129 = fsub float %117, %126 %130 = fsub float %118, %127 %131 = fsub float %119, %128 %132 = call float @llvm.AMDGPU.lrp(float %47, float %117, float %129) %133 = call float @llvm.AMDGPU.lrp(float %47, float %118, float %130) %134 = call float @llvm.AMDGPU.lrp(float %47, float %119, float %131) %135 = fmul float %85, 0.000000e+00 %136 = fmul float %86, 0.000000e+00 %137 = fmul float %48, %84 %138 = fmul float %48, %86 %139 = fsub float %138, %135 %140 = fmul float %84, 0.000000e+00 %141 = fsub float %140, %136 %142 = fmul float %85, 0.000000e+00 %143 = fsub float %142, %137 %144 = fmul float %86, %141 %145 = fmul float %84, %143 %146 = fmul float %85, %139 %147 = fmul float %85, %143 %148 = fsub float %147, %144 %149 = fmul float %86, %139 %150 = fsub float %149, %145 %151 = fmul float %84, %141 %152 = fsub float %151, %146 %153 = fmul float %86, %150 %154 = fmul float %84, %152 %155 = fmul float %85, %148 %156 = fmul float %85, %152 %157 = fsub float %156, %153 %158 = fmul float %86, %148 %159 = fsub float %158, %154 %160 = fmul float %84, %150 %161 = fsub float %160, %155 %162 = fmul float %157, %139 %163 = fmul float %159, %141 %164 = fadd float %163, %162 %165 = fmul float %161, %143 %166 = fadd float %164, %165 %167 = fcmp olt float %166, 0.000000e+00 %. = select i1 %167, float -1.000000e+00, float 1.000000e+00 %168 = fmul float %96, %49 %169 = fadd float %168, %51 %170 = fmul float %97, %50 %171 = fadd float %170, %52 %172 = fmul float %96, %53 %173 = fadd float %172, %55 %174 = fmul float %97, %54 %175 = fadd float %174, %56 %176 = fmul float %22, %84 %177 = fmul float %25, %84 %178 = fmul float %28, %84 %179 = fmul float %23, %85 %180 = fadd float %179, %176 %181 = fmul float %26, %85 %182 = fadd float %181, %177 %183 = fmul float %29, %85 %184 = fadd float %183, %178 %185 = fmul float %24, %86 %186 = fadd float %185, %180 %187 = fmul float %27, %86 %188 = fadd float %187, %182 %189 = fmul float %30, %86 %190 = fadd float %189, %184 %191 = fmul float %186, %186 %192 = fmul float %188, %188 %193 = fadd float %192, %191 %194 = fmul float %190, %190 %195 = fadd float %193, %194 %196 = call float @llvm.AMDGPU.rsq.clamped.f32(float %195) %197 = fmul float %186, %196 %198 = fmul float %188, %196 %199 = fmul float %190, %196 %200 = fmul float %13, %148 %201 = fmul float %14, %148 %202 = fmul float %15, %148 %203 = fmul float %16, %150 %204 = fadd float %203, %200 %205 = fmul float %17, %150 %206 = fadd float %205, %201 %207 = fmul float %18, %150 %208 = fadd float %207, %202 %209 = fmul float %19, %152 %210 = fadd float %209, %204 %211 = fmul float %20, %152 %212 = fadd float %211, %206 %213 = fmul float %21, %152 %214 = fadd float %213, %208 %215 = fmul float %210, %210 %216 = fmul float %212, %212 %217 = fadd float %216, %215 %218 = fmul float %214, %214 %219 = fadd float %217, %218 %220 = call float @llvm.AMDGPU.rsq.clamped.f32(float %219) %221 = fmul float %210, %220 %222 = fmul float %212, %220 %223 = fmul float %214, %220 %224 = fmul float %199, %222 %225 = fmul float %197, %223 %226 = fmul float %198, %221 %227 = fmul float %198, %223 %228 = fsub float %227, %224 %229 = fmul float %199, %221 %230 = fsub float %229, %225 %231 = fmul float %197, %222 %232 = fsub float %231, %226 %233 = fmul float %228, %. %234 = fmul float %230, %. %235 = fmul float %232, %. %236 = fmul float %57, %132 %237 = fmul float %58, %132 %238 = fmul float %59, %132 %239 = fmul float %60, %132 %240 = fmul float %61, %133 %241 = fadd float %240, %236 %242 = fmul float %62, %133 %243 = fadd float %242, %237 %244 = fmul float %63, %133 %245 = fadd float %244, %238 %246 = fmul float %64, %133 %247 = fadd float %246, %239 %248 = fmul float %65, %134 %249 = fadd float %248, %241 %250 = fmul float %66, %134 %251 = fadd float %250, %243 %252 = fmul float %67, %134 %253 = fadd float %252, %245 %254 = fmul float %68, %134 %255 = fadd float %254, %247 %256 = fadd float %249, %69 %257 = fadd float %251, %70 %258 = fadd float %253, %71 %259 = fadd float %255, %72 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %169, float %171, float %173, float %175) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %221, float %233, float %197, float %222) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %234, float %198, float %223, float %235) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %199, float %198, float %199, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %256, float %257, float %258, float %259) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0x80000000 ; 7E0202FF 80000000 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[44:47], s[2:3], 0x0 ; C0960300 s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 s_load_dwordx4 s[12:15], s[8:9], 0x8 ; C0860908 s_load_dwordx4 s[16:19], s[8:9], 0xc ; C088090C s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s9, s[44:47], 0x3c ; C204AD3C buffer_load_format_xyzw v[2:5], v0, s[0:3], 0 idxen ; E00C2000 80000200 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[5:8], v0, s[4:7], 0 idxen ; E00C2000 80010500 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[8:11], v0, s[12:15], 0 idxen ; E00C2000 80030800 buffer_load_format_xyzw v[12:15], v0, s[16:19], 0 idxen ; E00C2000 80040C00 s_buffer_load_dword s48, s[44:47], 0x40 ; C2182D40 s_buffer_load_dword s49, s[44:47], 0x41 ; C218AD41 s_buffer_load_dword s8, s[44:47], 0x42 ; C2042D42 s_buffer_load_dword s10, s[44:47], 0x43 ; C2052D43 s_buffer_load_dword s0, s[44:47], 0x0 ; C2002D00 s_buffer_load_dword s1, s[44:47], 0x1 ; C200AD01 s_buffer_load_dword s2, s[44:47], 0x2 ; C2012D02 s_buffer_load_dword s3, s[44:47], 0x4 ; C201AD04 s_buffer_load_dword s4, s[44:47], 0x5 ; C2022D05 s_buffer_load_dword s5, s[44:47], 0x6 ; C202AD06 s_buffer_load_dword s6, s[44:47], 0x8 ; C2032D08 s_buffer_load_dword s7, s[44:47], 0x9 ; C203AD09 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v0, s8 ; 7E000208 s_buffer_load_dword s8, s[44:47], 0xa ; C2042D0A v_mov_b32_e32 v8, s10 ; 7E10020A s_buffer_load_dword s50, s[44:47], 0x10 ; C2192D10 s_buffer_load_dword s51, s[44:47], 0x11 ; C219AD11 s_buffer_load_dword s52, s[44:47], 0x12 ; C21A2D12 s_buffer_load_dword s53, s[44:47], 0x14 ; C21AAD14 s_buffer_load_dword s54, s[44:47], 0x15 ; C21B2D15 s_buffer_load_dword s37, s[44:47], 0x16 ; C212AD16 s_buffer_load_dword s55, s[44:47], 0x18 ; C21BAD18 s_buffer_load_dword s56, s[44:47], 0x19 ; C21C2D19 s_buffer_load_dword s43, s[44:47], 0x1a ; C215AD1A s_buffer_load_dword s57, s[44:47], 0x20 ; C21CAD20 s_buffer_load_dword s21, s[44:47], 0x21 ; C20AAD21 s_buffer_load_dword s26, s[44:47], 0x22 ; C20D2D22 s_buffer_load_dword s22, s[44:47], 0x24 ; C20B2D24 s_buffer_load_dword s23, s[44:47], 0x25 ; C20BAD25 s_buffer_load_dword s24, s[44:47], 0x26 ; C20C2D26 s_buffer_load_dword s28, s[44:47], 0x28 ; C20E2D28 s_buffer_load_dword s29, s[44:47], 0x29 ; C20EAD29 s_buffer_load_dword s30, s[44:47], 0x2a ; C20F2D2A s_buffer_load_dword s31, s[44:47], 0x2c ; C20FAD2C s_buffer_load_dword s32, s[44:47], 0x2d ; C2102D2D s_buffer_load_dword s34, s[44:47], 0x2e ; C2112D2E s_buffer_load_dword s10, s[44:47], 0x34 ; C2052D34 s_buffer_load_dword s12, s[44:47], 0x35 ; C2062D35 s_buffer_load_dword s14, s[44:47], 0x36 ; C2072D36 s_buffer_load_dword s19, s[44:47], 0x37 ; C209AD37 s_buffer_load_dword s11, s[44:47], 0x38 ; C205AD38 s_buffer_load_dword s58, s[44:47], 0x44 ; C21D2D44 s_buffer_load_dword s59, s[44:47], 0x45 ; C21DAD45 s_buffer_load_dword s13, s[44:47], 0x46 ; C206AD46 s_buffer_load_dword s15, s[44:47], 0x47 ; C207AD47 s_buffer_load_dword s33, s[44:47], 0x48 ; C210AD48 s_buffer_load_dword s35, s[44:47], 0x49 ; C211AD49 s_buffer_load_dword s36, s[44:47], 0x4a ; C2122D4A s_buffer_load_dword s38, s[44:47], 0x4b ; C2132D4B s_buffer_load_dword s39, s[44:47], 0x4c ; C213AD4C s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e64 v9, 1.0, s11 ; D2080009 000016F2 s_buffer_load_dword s40, s[44:47], 0x4d ; C2142D4D s_buffer_load_dword s41, s[44:47], 0x4e ; C214AD4E v_mov_b32_e32 v10, s13 ; 7E14020D s_buffer_load_dword s42, s[44:47], 0x4f ; C2152D4F v_mov_b32_e32 v14, s15 ; 7E1C020F s_buffer_load_dword s20, s[44:47], 0x50 ; C20A2D50 s_buffer_load_dword s25, s[44:47], 0x51 ; C20CAD51 s_buffer_load_dword s27, s[44:47], 0x52 ; C20DAD52 s_buffer_load_dword s13, s[44:47], 0x53 ; C206AD53 s_buffer_load_dword s15, s[44:47], 0x54 ; C207AD54 s_buffer_load_dword s16, s[44:47], 0x55 ; C2082D55 s_buffer_load_dword s17, s[44:47], 0x56 ; C208AD56 s_buffer_load_dword s18, s[44:47], 0x57 ; C2092D57 v_mul_f32_e32 v2, s57, v2 ; 10040439 v_mul_f32_e32 v15, s50, v5 ; 101E0A32 v_mac_f32_e32 v15, s51, v6 ; 3E1E0C33 v_mac_f32_e32 v0, s48, v12 ; 3E001830 v_mac_f32_e32 v8, s49, v13 ; 3E101A31 v_mac_f32_e32 v10, s58, v12 ; 3E14183A v_mac_f32_e32 v14, s59, v13 ; 3E1C1A3B v_mul_f32_e32 v12, s53, v5 ; 10180A35 v_mul_f32_e32 v13, s55, v5 ; 101A0A37 v_mac_f32_e32 v12, s54, v6 ; 3E180C36 v_mac_f32_e32 v13, s56, v6 ; 3E1A0C38 v_mac_f32_e32 v15, s52, v7 ; 3E1E0E34 exp 15, 32, 0, 0, 0, v0, v8, v10, v14 ; F800020F 0E0A0800 v_mac_f32_e32 v12, s37, v7 ; 3E180E25 v_mac_f32_e32 v13, s43, v7 ; 3E1A0E2B s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v0, v1, v6 ; 10000D01 v_mac_f32_e32 v0, s9, v7 ; 3E000E09 v_mul_f32_e32 v8, s9, v5 ; 10100A09 v_mul_f32_e32 v1, v1, v7 ; 10020F01 v_mac_f32_e32 v1, 0, v5 ; 3E020A80 v_mad_f32 v8, 0, v6, -v8 ; D2820008 84220C80 v_mul_f32_e32 v10, v1, v7 ; 10140F01 v_mad_f32 v10, v6, v8, -v10 ; D282000A 842A1106 v_mul_f32_e32 v14, v8, v5 ; 101C0B08 v_mad_f32 v14, v7, v0, -v14 ; D282000E 843A0107 v_mul_f32_e32 v16, v0, v6 ; 10200D00 v_mad_f32 v16, v5, v1, -v16 ; D2820010 84420305 v_mul_f32_e32 v17, v14, v7 ; 10220F0E v_mad_f32 v17, v6, v16, -v17 ; D2820011 84462106 v_mul_f32_e32 v18, v16, v5 ; 10240B10 v_mad_f32 v7, v7, v10, -v18 ; D2820007 844A1507 v_mul_f32_e32 v6, v10, v6 ; 100C0D0A v_mad_f32 v5, v5, v14, -v6 ; D2820005 841A1D05 v_mul_f32_e32 v3, s21, v3 ; 10060615 v_mul_f32_e32 v4, s26, v4 ; 1008081A v_mul_f32_e32 v6, v11, v11 ; 100C170B v_mad_f32 v11, -v11, v11, 1.0 ; D282000B 23CA170B v_mul_f32_e32 v18, s22, v2 ; 10240416 v_mac_f32_e32 v18, s28, v3 ; 3E24061C v_mul_f32_e32 v19, s23, v2 ; 10260417 v_mac_f32_e32 v19, s29, v3 ; 3E26061D v_mul_f32_e32 v20, s24, v2 ; 10280418 v_mac_f32_e32 v20, s30, v3 ; 3E28061E v_mac_f32_e32 v18, s31, v4 ; 3E24081F v_mac_f32_e32 v19, s32, v4 ; 3E260820 v_mac_f32_e32 v20, s34, v4 ; 3E280822 v_mul_f32_e32 v2, v2, v11 ; 10041702 v_mul_f32_e32 v4, v4, v11 ; 10081704 v_mac_f32_e32 v2, v18, v6 ; 3E040D12 v_mac_f32_e32 v4, v20, v6 ; 3E080D14 v_mul_f32_e32 v6, s0, v10 ; 100C1400 v_mul_f32_e32 v11, s1, v10 ; 10161401 v_mul_f32_e32 v10, s2, v10 ; 10141402 v_mac_f32_e32 v6, s3, v14 ; 3E0C1C03 v_mac_f32_e32 v11, s4, v14 ; 3E161C04 v_mac_f32_e32 v10, s5, v14 ; 3E141C05 v_mac_f32_e32 v6, s6, v16 ; 3E0C2006 v_mac_f32_e32 v11, s7, v16 ; 3E162007 v_mac_f32_e32 v10, s8, v16 ; 3E142008 v_mul_f32_e32 v0, v0, v17 ; 10002300 v_mac_f32_e32 v0, v1, v7 ; 3E000F01 v_mac_f32_e32 v0, v8, v5 ; 3E000B08 v_mac_f32_e32 v3, 0, v19 ; 3E062680 v_mul_f32_e32 v1, s10, v2 ; 1002040A v_mac_f32_e32 v1, s12, v3 ; 3E02060C v_mac_f32_e32 v1, s14, v4 ; 3E02080E v_add_f32_e32 v1, s19, v1 ; 06020213 v_mad_f32 v5, -v1, s10, v2 ; D2820005 24081501 v_mad_f32 v7, -v1, s12, v3 ; D2820007 240C1901 v_mad_f32 v1, -v1, s14, v4 ; D2820001 24101D01 v_mul_f32_e32 v5, v5, v9 ; 100A1305 v_mul_f32_e32 v7, v7, v9 ; 100E1307 v_mul_f32_e32 v1, v1, v9 ; 10021301 v_mac_f32_e32 v5, s11, v2 ; 3E0A040B v_mac_f32_e32 v7, s11, v3 ; 3E0E060B v_mac_f32_e32 v1, s11, v4 ; 3E02080B v_mul_f32_e32 v2, s33, v5 ; 10040A21 v_mul_f32_e32 v3, s35, v5 ; 10060A23 v_mul_f32_e32 v4, s36, v5 ; 10080A24 v_mul_f32_e32 v5, s38, v5 ; 100A0A26 v_mac_f32_e32 v2, s39, v7 ; 3E040E27 v_mac_f32_e32 v3, s40, v7 ; 3E060E28 v_mac_f32_e32 v4, s41, v7 ; 3E080E29 v_mac_f32_e32 v5, s42, v7 ; 3E0A0E2A v_cmp_gt_f32_e32 vcc, 0, v0 ; 7C080080 v_cndmask_b32_e64 v0, 1.0, -1.0, vcc ; D2000000 01A9E6F2 v_mul_f32_e32 v7, v15, v15 ; 100E1F0F v_mac_f32_e32 v7, v12, v12 ; 3E0E190C v_mac_f32_e32 v7, v13, v13 ; 3E0E1B0D v_rsq_clamp_f32_e32 v7, v7 ; 7E0E5907 v_mul_f32_e32 v8, v6, v6 ; 10100D06 v_mac_f32_e32 v8, v11, v11 ; 3E10170B v_mac_f32_e32 v8, v10, v10 ; 3E10150A v_rsq_clamp_f32_e32 v8, v8 ; 7E105908 v_mul_f32_e32 v9, v7, v15 ; 10121F07 v_mul_f32_e32 v12, v7, v12 ; 10181907 v_mul_f32_e32 v7, v7, v13 ; 100E1B07 v_mul_f32_e32 v6, v8, v6 ; 100C0D08 v_mul_f32_e32 v11, v8, v11 ; 10161708 v_mul_f32_e32 v8, v8, v10 ; 10101508 v_mac_f32_e32 v2, s20, v1 ; 3E040214 v_mac_f32_e32 v3, s25, v1 ; 3E060219 v_mac_f32_e32 v4, s27, v1 ; 3E08021B v_mul_f32_e32 v10, v11, v7 ; 10140F0B v_mad_f32 v10, v12, v8, -v10 ; D282000A 842A110C v_mul_f32_e32 v10, v0, v10 ; 10141500 exp 15, 33, 0, 0, 0, v6, v10, v9, v11 ; F800021F 0B090A06 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v10, v6, v12 ; 10141906 v_mad_f32 v10, v9, v11, -v10 ; D282000A 842A1709 v_mul_f32_e32 v9, v8, v9 ; 10121308 v_mad_f32 v6, v7, v6, -v9 ; D2820006 84260D07 v_mul_f32_e32 v6, v0, v6 ; 100C0D00 v_mul_f32_e32 v0, v0, v10 ; 10001500 exp 15, 34, 0, 0, 0, v6, v12, v8, v0 ; F800022F 00080C06 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v0, 0 ; 7E000280 exp 15, 35, 0, 0, 0, v7, v12, v7, v0 ; F800023F 00070C07 v_mac_f32_e32 v5, s13, v1 ; 3E0A020D s_waitcnt expcnt(0) ; BF8C070F v_add_f32_e32 v0, s15, v2 ; 0600040F v_add_f32_e32 v1, s16, v3 ; 06020610 v_add_f32_e32 v2, s17, v4 ; 06040811 v_add_f32_e32 v3, s18, v5 ; 06060A12 exp 15, 12, 0, 1, 0, v0, v1, v2, v3 ; F80008CF 03020100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 64 VGPRS: 24 Code Size: 936 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL CONST[0] DCL CONST[3] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 0.5000} IMM[1] FLT32 { 0.0078, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].x, IN[1].wwww 1: MOV TEMP[0].yz, IN[2].yxyy 2: MOV TEMP[1].xy, IN[2].zwzz 3: MOV TEMP[1].z, IN[3].xxxx 4: MOV TEMP[2].xy, IN[0].zwww 5: TEX TEMP[2].yw, TEMP[2], SAMP[1], 2D 6: MAD TEMP[2].xy, TEMP[2].wyyy, IMM[0].xxxx, IMM[0].yyyy 7: DP2 TEMP[3].x, TEMP[2].xyyy, TEMP[2].xyyy 8: MOV_SAT TEMP[3].x, TEMP[3].xxxx 9: ADD TEMP[3].x, IMM[0].zzzz, -TEMP[3].xxxx 10: SQRT TEMP[3].x, TEMP[3].xxxx 11: MOV TEMP[2].z, TEMP[3].xxxx 12: MOV TEMP[3].xy, IN[0].xyyy 13: TEX TEMP[3].w, TEMP[3], SAMP[0], 2D 14: FSLT TEMP[3].x, TEMP[3].wwww, CONST[3].xxxx 15: AND TEMP[3].x, TEMP[3].xxxx, IMM[0].zzzz 16: KILL_IF -TEMP[3].xxxx 17: DP3 TEMP[3].x, IN[1].xyzz, TEMP[2].xyzz 18: DP3 TEMP[0].x, TEMP[0].xyzz, TEMP[2].xyzz 19: MOV TEMP[3].y, TEMP[0].xxxx 20: DP3 TEMP[0].x, TEMP[1].xyzz, TEMP[2].xyzz 21: MOV TEMP[3].z, TEMP[0].xxxx 22: MAD TEMP[0].xyz, TEMP[3].xyzz, IMM[0].wwww, IMM[0].wwww 23: MUL TEMP[1].x, CONST[0].xxxx, IMM[1].xxxx 24: MOV TEMP[0].w, TEMP[1].xxxx 25: MOV OUT[0], TEMP[0] 26: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %26 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %27 = load <32 x i8>, <32 x i8> addrspace(2)* %26, align 32, !tbaa !0 %28 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %29 = load <16 x i8>, <16 x i8> addrspace(2)* %28, align 16, !tbaa !0 %30 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %31 = bitcast <8 x i32> addrspace(2)* %30 to <32 x i8> addrspace(2)* %32 = load <32 x i8>, <32 x i8> addrspace(2)* %31, align 32, !tbaa !0 %33 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %34 = bitcast <4 x i32> addrspace(2)* %33 to <16 x i8> addrspace(2)* %35 = load <16 x i8>, <16 x i8> addrspace(2)* %34, align 16, !tbaa !0 %36 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %37 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %38 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %39 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %40 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %41 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %42 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %43 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %44 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %45 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %46 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %47 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %48 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %49 = bitcast float %38 to i32 %50 = bitcast float %39 to i32 %51 = insertelement <2 x i32> undef, i32 %49, i32 0 %52 = insertelement <2 x i32> %51, i32 %50, i32 1 %53 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %52, <32 x i8> %32, <16 x i8> %35, i32 2) %54 = extractelement <4 x float> %53, i32 1 %55 = extractelement <4 x float> %53, i32 3 %56 = fmul float %55, 2.000000e+00 %57 = fadd float %56, -1.000000e+00 %58 = fmul float %54, 2.000000e+00 %59 = fadd float %58, -1.000000e+00 %60 = fmul float %57, %57 %61 = fmul float %59, %59 %62 = fadd float %60, %61 %63 = call float @llvm.AMDIL.clamp.(float %62, float 0.000000e+00, float 1.000000e+00) %64 = fsub float 1.000000e+00, %63 %65 = call float @llvm.sqrt.f32(float %64) %66 = bitcast float %36 to i32 %67 = bitcast float %37 to i32 %68 = insertelement <2 x i32> undef, i32 %66, i32 0 %69 = insertelement <2 x i32> %68, i32 %67, i32 1 %70 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %69, <32 x i8> %27, <16 x i8> %29, i32 2) %71 = extractelement <4 x float> %70, i32 3 %72 = fcmp olt float %71, %25 %73 = select i1 %72, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %73) %74 = fmul float %40, %57 %75 = fmul float %41, %59 %76 = fadd float %75, %74 %77 = fmul float %42, %65 %78 = fadd float %76, %77 %79 = fmul float %43, %57 %80 = fmul float %44, %59 %81 = fadd float %80, %79 %82 = fmul float %45, %65 %83 = fadd float %81, %82 %84 = fmul float %46, %57 %85 = fmul float %47, %59 %86 = fadd float %85, %84 %87 = fmul float %48, %65 %88 = fadd float %86, %87 %89 = fmul float %78, 5.000000e-01 %90 = fadd float %89, 5.000000e-01 %91 = fmul float %83, 5.000000e-01 %92 = fadd float %91, 5.000000e-01 %93 = fmul float %88, 5.000000e-01 %94 = fadd float %93, 5.000000e-01 %95 = fmul float %24, 7.812500e-03 %96 = call i32 @llvm.SI.packf16(float %90, float %92) %97 = bitcast i32 %96 to float %98 = call i32 @llvm.SI.packf16(float %94, float %95) %99 = bitcast i32 %98 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %97, float %99, float %97, float %99) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600 v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601 v_interp_p1_f32 v9, v0, 3, 1, [m0] ; C8240700 v_interp_p2_f32 v9, [v9], v1, 3, 1, [m0] ; C8250701 v_interp_p1_f32 v10, v0, 0, 2, [m0] ; C8280800 v_interp_p2_f32 v10, [v10], v1, 0, 2, [m0] ; C8290801 v_interp_p1_f32 v11, v0, 1, 2, [m0] ; C82C0900 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504 s_load_dwordx8 s[16:23], s[6:7], 0x8 ; C0C80708 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx8 s[24:31], s[6:7], 0x0 ; C0CC0700 v_interp_p2_f32 v11, [v11], v1, 1, 2, [m0] ; C82D0901 v_interp_p1_f32 v12, v0, 2, 2, [m0] ; C8300A00 v_interp_p2_f32 v12, [v12], v1, 2, 2, [m0] ; C8310A01 v_interp_p1_f32 v13, v0, 3, 2, [m0] ; C8340B00 v_interp_p2_f32 v13, [v13], v1, 3, 2, [m0] ; C8350B01 v_interp_p1_f32 v0, v0, 0, 3, [m0] ; C8000C00 v_interp_p2_f32 v0, [v0], v1, 0, 3, [m0] ; C8010C01 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[4:5], 10, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[16:23], s[12:15] ; F0800A00 00640404 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C image_sample v1, 8, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[24:31], s[8:11] ; F0800800 00460102 s_buffer_load_dword s0, s[0:3], 0x0 ; C2000100 s_waitcnt vmcnt(1) ; BF8C0771 v_mad_f32 v2, 2.0, v5, -1.0 ; D2820002 03CE0AF4 v_mad_f32 v3, 2.0, v4, -1.0 ; D2820003 03CE08F4 v_mul_f32_e32 v4, v3, v3 ; 10080703 v_mac_f32_e32 v4, v2, v2 ; 3E080502 v_add_f32_e64 v4, 0, v4 clamp ; D2060804 00020880 v_sub_f32_e32 v4, 1.0, v4 ; 080808F2 v_sqrt_f32_e32 v4, v4 ; 7E086704 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_cmp_gt_f32_e32 vcc, s4, v1 ; 7C080204 v_cndmask_b32_e64 v1, 0, -1.0, vcc ; D2000001 01A9E680 v_cmpx_le_f32_e32 vcc, 0, v1 ; 7C260280 v_mul_f32_e32 v1, v2, v6 ; 10020D02 v_mac_f32_e32 v1, v3, v7 ; 3E020F03 v_mul_f32_e32 v5, v2, v9 ; 100A1302 v_mac_f32_e32 v5, v3, v10 ; 3E0A1503 v_mul_f32_e32 v2, v2, v12 ; 10041902 v_mac_f32_e32 v2, v3, v13 ; 3E041B03 v_mac_f32_e32 v1, v4, v8 ; 3E021104 v_mac_f32_e32 v5, v4, v11 ; 3E0A1704 v_mac_f32_e32 v2, v4, v0 ; 3E040104 v_mov_b32_e32 v0, 0x3c000000 ; 7E0002FF 3C000000 v_mul_f32_e32 v0, s0, v0 ; 10000000 v_mad_f32 v1, 0.5, v1, 0.5 ; D2820001 03C202F0 v_mad_f32 v3, 0.5, v5, 0.5 ; D2820003 03C20AF0 v_cvt_pkrtz_f16_f32_e32 v1, v1, v3 ; 5E020701 v_mad_f32 v2, 0.5, v2, 0.5 ; D2820002 03C204F0 v_cvt_pkrtz_f16_f32_e32 v0, v2, v0 ; 5E000102 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 16 Code Size: 316 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL CONST[0..21] DCL TEMP[0..7], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, -1.0000, 0.0000} 0: MOV TEMP[0].w, IN[4].wwww 1: MUL TEMP[1].xyz, IN[0].xyzz, CONST[8].xyzz 2: MUL TEMP[2].x, IN[2].wwww, IN[2].wwww 3: MOV TEMP[3].y, IMM[0].xxxx 4: MOV TEMP[3].x, TEMP[2].xxxx 5: MOV TEMP[3].z, TEMP[2].xxxx 6: MUL TEMP[2], CONST[9], TEMP[1].xxxx 7: MAD TEMP[2], CONST[10], TEMP[1].yyyy, TEMP[2] 8: MAD TEMP[2].xyz, CONST[11], TEMP[1].zzzz, TEMP[2] 9: LRP TEMP[1].xyz, TEMP[3].xyzz, TEMP[2].xyzz, TEMP[1].xyzz 10: DP3 TEMP[2].x, CONST[13].xyzz, TEMP[1].xyzz 11: ADD TEMP[2].x, TEMP[2].xxxx, CONST[13].wwww 12: MUL TEMP[2].xyz, TEMP[2].xxxx, CONST[13].xyzz 13: ADD TEMP[2].xyz, TEMP[1].xyzz, -TEMP[2].xyzz 14: LRP TEMP[1].xyz, CONST[14].xxxx, TEMP[1].xyzz, TEMP[2].xyzz 15: MOV TEMP[2].xz, IMM[0].xxxx 16: MOV TEMP[2].y, CONST[15].xxxx 17: MUL TEMP[3].xyz, TEMP[2].zxyy, IN[1].yzxx 18: MAD TEMP[2].xyz, TEMP[2].yzxx, IN[1].zxyy, -TEMP[3].xyzz 19: MUL TEMP[3].xyz, IN[1].zxyy, TEMP[2].yzxx 20: MAD TEMP[3].xyz, IN[1].yzxx, TEMP[2].zxyy, -TEMP[3].xyzz 21: MUL TEMP[4].xyz, IN[1].zxyy, TEMP[3].yzxx 22: MAD TEMP[4].xyz, IN[1].yzxx, TEMP[3].zxyy, -TEMP[4].xyzz 23: DP3 TEMP[2].x, TEMP[4].xyzz, TEMP[2].xyzz 24: FSLT TEMP[2].x, TEMP[2].xxxx, IMM[0].xxxx 25: UIF TEMP[2].xxxx :0 26: MOV TEMP[0].w, IMM[0].zzzz 27: ELSE :0 28: MOV TEMP[0].w, IMM[0].yyyy 29: ENDIF 30: MAD TEMP[2].xy, IN[3].xyyy, CONST[16].xyyy, CONST[16].zwww 31: MAD TEMP[4].xy, IN[3].xyyy, CONST[17].xyyy, CONST[17].zwww 32: MOV TEMP[2].zw, TEMP[4].yyxy 33: MOV TEMP[4].x, CONST[4].xxxx 34: MOV TEMP[4].y, CONST[5].xxxx 35: MOV TEMP[4].z, CONST[6].xxxx 36: MOV TEMP[5].x, CONST[4].yyyy 37: MOV TEMP[5].y, CONST[5].yyyy 38: MOV TEMP[5].z, CONST[6].yyyy 39: MOV TEMP[6].x, CONST[4].zzzz 40: MOV TEMP[6].y, CONST[5].zzzz 41: MOV TEMP[6].z, CONST[6].zzzz 42: MUL TEMP[4].xyz, TEMP[4].xyzz, IN[1].xxxx 43: MAD TEMP[4].xyz, TEMP[5].xyzz, IN[1].yyyy, TEMP[4].xyzz 44: MAD TEMP[4].xyz, TEMP[6].xyzz, IN[1].zzzz, TEMP[4].xyzz 45: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz 46: RSQ TEMP[5].x, TEMP[5].xxxx 47: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx 48: MUL TEMP[5].xyz, CONST[0].xyzz, TEMP[3].xxxx 49: MAD TEMP[5].xyz, CONST[1].xyzz, TEMP[3].yyyy, TEMP[5].xyzz 50: MAD TEMP[3].xyz, CONST[2].xyzz, TEMP[3].zzzz, TEMP[5].xyzz 51: DP3 TEMP[5].x, TEMP[3].xyzz, TEMP[3].xyzz 52: RSQ TEMP[5].x, TEMP[5].xxxx 53: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[5].xxxx 54: MUL TEMP[5].xyz, TEMP[4].zxyy, TEMP[3].yzxx 55: MAD TEMP[5].xyz, TEMP[4].yzxx, TEMP[3].zxyy, -TEMP[5].xyzz 56: MUL TEMP[0].xyz, TEMP[5].xyzz, TEMP[0].wwww 57: MOV TEMP[5].x, TEMP[3].xxxx 58: MOV TEMP[5].y, TEMP[0].xxxx 59: MOV TEMP[5].z, TEMP[4].xxxx 60: MOV TEMP[6].y, TEMP[0].yyyy 61: MOV TEMP[6].z, TEMP[4].yyyy 62: MOV TEMP[7].x, TEMP[3].zzzz 63: MOV TEMP[7].y, TEMP[0].zzzz 64: MUL TEMP[0], CONST[18], TEMP[1].xxxx 65: MAD TEMP[0], CONST[19], TEMP[1].yyyy, TEMP[0] 66: MAD TEMP[0], CONST[20], TEMP[1].zzzz, TEMP[0] 67: ADD TEMP[0], TEMP[0], CONST[21] 68: MOV TEMP[1].xyz, TEMP[5].xyzx 69: MOV TEMP[1].w, TEMP[3].yyyy 70: MOV TEMP[3].xy, TEMP[6].yzyy 71: MOV TEMP[3].zw, TEMP[7].yyxy 72: MOV TEMP[4].x, TEMP[4].zzzz 73: MOV OUT[4], TEMP[4] 74: MOV OUT[1], TEMP[2] 75: MOV OUT[3], TEMP[3] 76: MOV OUT[0], TEMP[0] 77: MOV OUT[2], TEMP[1] 78: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 268) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308) %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312) %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316) %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320) %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 324) %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 328) %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 332) %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 336) %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 340) %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 344) %72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 348) %73 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %74 = load <16 x i8>, <16 x i8> addrspace(2)* %73, align 16, !tbaa !0 %75 = add i32 %5, %7 %76 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %74, i32 0, i32 %75) %77 = extractelement <4 x float> %76, i32 0 %78 = extractelement <4 x float> %76, i32 1 %79 = extractelement <4 x float> %76, i32 2 %80 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %81 = load <16 x i8>, <16 x i8> addrspace(2)* %80, align 16, !tbaa !0 %82 = add i32 %5, %7 %83 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %81, i32 0, i32 %82) %84 = extractelement <4 x float> %83, i32 0 %85 = extractelement <4 x float> %83, i32 1 %86 = extractelement <4 x float> %83, i32 2 %87 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %88 = load <16 x i8>, <16 x i8> addrspace(2)* %87, align 16, !tbaa !0 %89 = add i32 %5, %7 %90 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %88, i32 0, i32 %89) %91 = extractelement <4 x float> %90, i32 3 %92 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %93 = load <16 x i8>, <16 x i8> addrspace(2)* %92, align 16, !tbaa !0 %94 = add i32 %5, %7 %95 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %93, i32 0, i32 %94) %96 = extractelement <4 x float> %95, i32 0 %97 = extractelement <4 x float> %95, i32 1 %98 = fmul float %77, %31 %99 = fmul float %78, %32 %100 = fmul float %79, %33 %101 = fmul float %91, %91 %102 = fmul float %34, %98 %103 = fmul float %35, %98 %104 = fmul float %36, %98 %105 = fmul float %37, %99 %106 = fadd float %105, %102 %107 = fmul float %38, %99 %108 = fadd float %107, %103 %109 = fmul float %39, %99 %110 = fadd float %109, %104 %111 = fmul float %40, %100 %112 = fadd float %111, %106 %113 = fmul float %41, %100 %114 = fadd float %113, %108 %115 = fmul float %42, %100 %116 = fadd float %115, %110 %117 = call float @llvm.AMDGPU.lrp(float %101, float %112, float %98) %118 = call float @llvm.AMDGPU.lrp(float 0.000000e+00, float %114, float %99) %119 = call float @llvm.AMDGPU.lrp(float %101, float %116, float %100) %120 = fmul float %43, %117 %121 = fmul float %44, %118 %122 = fadd float %121, %120 %123 = fmul float %45, %119 %124 = fadd float %122, %123 %125 = fadd float %124, %46 %126 = fmul float %125, %43 %127 = fmul float %125, %44 %128 = fmul float %125, %45 %129 = fsub float %117, %126 %130 = fsub float %118, %127 %131 = fsub float %119, %128 %132 = call float @llvm.AMDGPU.lrp(float %47, float %117, float %129) %133 = call float @llvm.AMDGPU.lrp(float %47, float %118, float %130) %134 = call float @llvm.AMDGPU.lrp(float %47, float %119, float %131) %135 = fmul float %85, 0.000000e+00 %136 = fmul float %86, 0.000000e+00 %137 = fmul float %48, %84 %138 = fmul float %48, %86 %139 = fsub float %138, %135 %140 = fmul float %84, 0.000000e+00 %141 = fsub float %140, %136 %142 = fmul float %85, 0.000000e+00 %143 = fsub float %142, %137 %144 = fmul float %86, %141 %145 = fmul float %84, %143 %146 = fmul float %85, %139 %147 = fmul float %85, %143 %148 = fsub float %147, %144 %149 = fmul float %86, %139 %150 = fsub float %149, %145 %151 = fmul float %84, %141 %152 = fsub float %151, %146 %153 = fmul float %86, %150 %154 = fmul float %84, %152 %155 = fmul float %85, %148 %156 = fmul float %85, %152 %157 = fsub float %156, %153 %158 = fmul float %86, %148 %159 = fsub float %158, %154 %160 = fmul float %84, %150 %161 = fsub float %160, %155 %162 = fmul float %157, %139 %163 = fmul float %159, %141 %164 = fadd float %163, %162 %165 = fmul float %161, %143 %166 = fadd float %164, %165 %167 = fcmp olt float %166, 0.000000e+00 %. = select i1 %167, float -1.000000e+00, float 1.000000e+00 %168 = fmul float %96, %49 %169 = fadd float %168, %51 %170 = fmul float %97, %50 %171 = fadd float %170, %52 %172 = fmul float %96, %53 %173 = fadd float %172, %55 %174 = fmul float %97, %54 %175 = fadd float %174, %56 %176 = fmul float %22, %84 %177 = fmul float %25, %84 %178 = fmul float %28, %84 %179 = fmul float %23, %85 %180 = fadd float %179, %176 %181 = fmul float %26, %85 %182 = fadd float %181, %177 %183 = fmul float %29, %85 %184 = fadd float %183, %178 %185 = fmul float %24, %86 %186 = fadd float %185, %180 %187 = fmul float %27, %86 %188 = fadd float %187, %182 %189 = fmul float %30, %86 %190 = fadd float %189, %184 %191 = fmul float %186, %186 %192 = fmul float %188, %188 %193 = fadd float %192, %191 %194 = fmul float %190, %190 %195 = fadd float %193, %194 %196 = call float @llvm.AMDGPU.rsq.clamped.f32(float %195) %197 = fmul float %186, %196 %198 = fmul float %188, %196 %199 = fmul float %190, %196 %200 = fmul float %13, %148 %201 = fmul float %14, %148 %202 = fmul float %15, %148 %203 = fmul float %16, %150 %204 = fadd float %203, %200 %205 = fmul float %17, %150 %206 = fadd float %205, %201 %207 = fmul float %18, %150 %208 = fadd float %207, %202 %209 = fmul float %19, %152 %210 = fadd float %209, %204 %211 = fmul float %20, %152 %212 = fadd float %211, %206 %213 = fmul float %21, %152 %214 = fadd float %213, %208 %215 = fmul float %210, %210 %216 = fmul float %212, %212 %217 = fadd float %216, %215 %218 = fmul float %214, %214 %219 = fadd float %217, %218 %220 = call float @llvm.AMDGPU.rsq.clamped.f32(float %219) %221 = fmul float %210, %220 %222 = fmul float %212, %220 %223 = fmul float %214, %220 %224 = fmul float %199, %222 %225 = fmul float %197, %223 %226 = fmul float %198, %221 %227 = fmul float %198, %223 %228 = fsub float %227, %224 %229 = fmul float %199, %221 %230 = fsub float %229, %225 %231 = fmul float %197, %222 %232 = fsub float %231, %226 %233 = fmul float %228, %. %234 = fmul float %230, %. %235 = fmul float %232, %. %236 = fmul float %57, %132 %237 = fmul float %58, %132 %238 = fmul float %59, %132 %239 = fmul float %60, %132 %240 = fmul float %61, %133 %241 = fadd float %240, %236 %242 = fmul float %62, %133 %243 = fadd float %242, %237 %244 = fmul float %63, %133 %245 = fadd float %244, %238 %246 = fmul float %64, %133 %247 = fadd float %246, %239 %248 = fmul float %65, %134 %249 = fadd float %248, %241 %250 = fmul float %66, %134 %251 = fadd float %250, %243 %252 = fmul float %67, %134 %253 = fadd float %252, %245 %254 = fmul float %68, %134 %255 = fadd float %254, %247 %256 = fadd float %249, %69 %257 = fadd float %251, %70 %258 = fadd float %253, %71 %259 = fadd float %255, %72 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %169, float %171, float %173, float %175) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %221, float %233, float %197, float %222) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %234, float %198, float %223, float %235) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %199, float %198, float %199, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %256, float %257, float %258, float %259) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0x80000000 ; 7E0202FF 80000000 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[44:47], s[2:3], 0x0 ; C0960300 s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 s_load_dwordx4 s[12:15], s[8:9], 0x8 ; C0860908 s_load_dwordx4 s[16:19], s[8:9], 0xc ; C088090C s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s9, s[44:47], 0x3c ; C204AD3C buffer_load_format_xyzw v[2:5], v0, s[0:3], 0 idxen ; E00C2000 80000200 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[5:8], v0, s[4:7], 0 idxen ; E00C2000 80010500 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[8:11], v0, s[12:15], 0 idxen ; E00C2000 80030800 buffer_load_format_xyzw v[12:15], v0, s[16:19], 0 idxen ; E00C2000 80040C00 s_buffer_load_dword s48, s[44:47], 0x40 ; C2182D40 s_buffer_load_dword s49, s[44:47], 0x41 ; C218AD41 s_buffer_load_dword s8, s[44:47], 0x42 ; C2042D42 s_buffer_load_dword s10, s[44:47], 0x43 ; C2052D43 s_buffer_load_dword s0, s[44:47], 0x0 ; C2002D00 s_buffer_load_dword s1, s[44:47], 0x1 ; C200AD01 s_buffer_load_dword s2, s[44:47], 0x2 ; C2012D02 s_buffer_load_dword s3, s[44:47], 0x4 ; C201AD04 s_buffer_load_dword s4, s[44:47], 0x5 ; C2022D05 s_buffer_load_dword s5, s[44:47], 0x6 ; C202AD06 s_buffer_load_dword s6, s[44:47], 0x8 ; C2032D08 s_buffer_load_dword s7, s[44:47], 0x9 ; C203AD09 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v0, s8 ; 7E000208 s_buffer_load_dword s8, s[44:47], 0xa ; C2042D0A v_mov_b32_e32 v8, s10 ; 7E10020A s_buffer_load_dword s50, s[44:47], 0x10 ; C2192D10 s_buffer_load_dword s51, s[44:47], 0x11 ; C219AD11 s_buffer_load_dword s52, s[44:47], 0x12 ; C21A2D12 s_buffer_load_dword s53, s[44:47], 0x14 ; C21AAD14 s_buffer_load_dword s54, s[44:47], 0x15 ; C21B2D15 s_buffer_load_dword s37, s[44:47], 0x16 ; C212AD16 s_buffer_load_dword s55, s[44:47], 0x18 ; C21BAD18 s_buffer_load_dword s56, s[44:47], 0x19 ; C21C2D19 s_buffer_load_dword s43, s[44:47], 0x1a ; C215AD1A s_buffer_load_dword s57, s[44:47], 0x20 ; C21CAD20 s_buffer_load_dword s21, s[44:47], 0x21 ; C20AAD21 s_buffer_load_dword s26, s[44:47], 0x22 ; C20D2D22 s_buffer_load_dword s22, s[44:47], 0x24 ; C20B2D24 s_buffer_load_dword s23, s[44:47], 0x25 ; C20BAD25 s_buffer_load_dword s24, s[44:47], 0x26 ; C20C2D26 s_buffer_load_dword s28, s[44:47], 0x28 ; C20E2D28 s_buffer_load_dword s29, s[44:47], 0x29 ; C20EAD29 s_buffer_load_dword s30, s[44:47], 0x2a ; C20F2D2A s_buffer_load_dword s31, s[44:47], 0x2c ; C20FAD2C s_buffer_load_dword s32, s[44:47], 0x2d ; C2102D2D s_buffer_load_dword s34, s[44:47], 0x2e ; C2112D2E s_buffer_load_dword s10, s[44:47], 0x34 ; C2052D34 s_buffer_load_dword s12, s[44:47], 0x35 ; C2062D35 s_buffer_load_dword s14, s[44:47], 0x36 ; C2072D36 s_buffer_load_dword s19, s[44:47], 0x37 ; C209AD37 s_buffer_load_dword s11, s[44:47], 0x38 ; C205AD38 s_buffer_load_dword s58, s[44:47], 0x44 ; C21D2D44 s_buffer_load_dword s59, s[44:47], 0x45 ; C21DAD45 s_buffer_load_dword s13, s[44:47], 0x46 ; C206AD46 s_buffer_load_dword s15, s[44:47], 0x47 ; C207AD47 s_buffer_load_dword s33, s[44:47], 0x48 ; C210AD48 s_buffer_load_dword s35, s[44:47], 0x49 ; C211AD49 s_buffer_load_dword s36, s[44:47], 0x4a ; C2122D4A s_buffer_load_dword s38, s[44:47], 0x4b ; C2132D4B s_buffer_load_dword s39, s[44:47], 0x4c ; C213AD4C s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e64 v9, 1.0, s11 ; D2080009 000016F2 s_buffer_load_dword s40, s[44:47], 0x4d ; C2142D4D s_buffer_load_dword s41, s[44:47], 0x4e ; C214AD4E v_mov_b32_e32 v10, s13 ; 7E14020D s_buffer_load_dword s42, s[44:47], 0x4f ; C2152D4F v_mov_b32_e32 v14, s15 ; 7E1C020F s_buffer_load_dword s20, s[44:47], 0x50 ; C20A2D50 s_buffer_load_dword s25, s[44:47], 0x51 ; C20CAD51 s_buffer_load_dword s27, s[44:47], 0x52 ; C20DAD52 s_buffer_load_dword s13, s[44:47], 0x53 ; C206AD53 s_buffer_load_dword s15, s[44:47], 0x54 ; C207AD54 s_buffer_load_dword s16, s[44:47], 0x55 ; C2082D55 s_buffer_load_dword s17, s[44:47], 0x56 ; C208AD56 s_buffer_load_dword s18, s[44:47], 0x57 ; C2092D57 v_mul_f32_e32 v2, s57, v2 ; 10040439 v_mul_f32_e32 v15, s50, v5 ; 101E0A32 v_mac_f32_e32 v15, s51, v6 ; 3E1E0C33 v_mac_f32_e32 v0, s48, v12 ; 3E001830 v_mac_f32_e32 v8, s49, v13 ; 3E101A31 v_mac_f32_e32 v10, s58, v12 ; 3E14183A v_mac_f32_e32 v14, s59, v13 ; 3E1C1A3B v_mul_f32_e32 v12, s53, v5 ; 10180A35 v_mul_f32_e32 v13, s55, v5 ; 101A0A37 v_mac_f32_e32 v12, s54, v6 ; 3E180C36 v_mac_f32_e32 v13, s56, v6 ; 3E1A0C38 v_mac_f32_e32 v15, s52, v7 ; 3E1E0E34 exp 15, 32, 0, 0, 0, v0, v8, v10, v14 ; F800020F 0E0A0800 v_mac_f32_e32 v12, s37, v7 ; 3E180E25 v_mac_f32_e32 v13, s43, v7 ; 3E1A0E2B s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v0, v1, v6 ; 10000D01 v_mac_f32_e32 v0, s9, v7 ; 3E000E09 v_mul_f32_e32 v8, s9, v5 ; 10100A09 v_mul_f32_e32 v1, v1, v7 ; 10020F01 v_mac_f32_e32 v1, 0, v5 ; 3E020A80 v_mad_f32 v8, 0, v6, -v8 ; D2820008 84220C80 v_mul_f32_e32 v10, v1, v7 ; 10140F01 v_mad_f32 v10, v6, v8, -v10 ; D282000A 842A1106 v_mul_f32_e32 v14, v8, v5 ; 101C0B08 v_mad_f32 v14, v7, v0, -v14 ; D282000E 843A0107 v_mul_f32_e32 v16, v0, v6 ; 10200D00 v_mad_f32 v16, v5, v1, -v16 ; D2820010 84420305 v_mul_f32_e32 v17, v14, v7 ; 10220F0E v_mad_f32 v17, v6, v16, -v17 ; D2820011 84462106 v_mul_f32_e32 v18, v16, v5 ; 10240B10 v_mad_f32 v7, v7, v10, -v18 ; D2820007 844A1507 v_mul_f32_e32 v6, v10, v6 ; 100C0D0A v_mad_f32 v5, v5, v14, -v6 ; D2820005 841A1D05 v_mul_f32_e32 v3, s21, v3 ; 10060615 v_mul_f32_e32 v4, s26, v4 ; 1008081A v_mul_f32_e32 v6, v11, v11 ; 100C170B v_mad_f32 v11, -v11, v11, 1.0 ; D282000B 23CA170B v_mul_f32_e32 v18, s22, v2 ; 10240416 v_mac_f32_e32 v18, s28, v3 ; 3E24061C v_mul_f32_e32 v19, s23, v2 ; 10260417 v_mac_f32_e32 v19, s29, v3 ; 3E26061D v_mul_f32_e32 v20, s24, v2 ; 10280418 v_mac_f32_e32 v20, s30, v3 ; 3E28061E v_mac_f32_e32 v18, s31, v4 ; 3E24081F v_mac_f32_e32 v19, s32, v4 ; 3E260820 v_mac_f32_e32 v20, s34, v4 ; 3E280822 v_mul_f32_e32 v2, v2, v11 ; 10041702 v_mul_f32_e32 v4, v4, v11 ; 10081704 v_mac_f32_e32 v2, v18, v6 ; 3E040D12 v_mac_f32_e32 v4, v20, v6 ; 3E080D14 v_mul_f32_e32 v6, s0, v10 ; 100C1400 v_mul_f32_e32 v11, s1, v10 ; 10161401 v_mul_f32_e32 v10, s2, v10 ; 10141402 v_mac_f32_e32 v6, s3, v14 ; 3E0C1C03 v_mac_f32_e32 v11, s4, v14 ; 3E161C04 v_mac_f32_e32 v10, s5, v14 ; 3E141C05 v_mac_f32_e32 v6, s6, v16 ; 3E0C2006 v_mac_f32_e32 v11, s7, v16 ; 3E162007 v_mac_f32_e32 v10, s8, v16 ; 3E142008 v_mul_f32_e32 v0, v0, v17 ; 10002300 v_mac_f32_e32 v0, v1, v7 ; 3E000F01 v_mac_f32_e32 v0, v8, v5 ; 3E000B08 v_mac_f32_e32 v3, 0, v19 ; 3E062680 v_mul_f32_e32 v1, s10, v2 ; 1002040A v_mac_f32_e32 v1, s12, v3 ; 3E02060C v_mac_f32_e32 v1, s14, v4 ; 3E02080E v_add_f32_e32 v1, s19, v1 ; 06020213 v_mad_f32 v5, -v1, s10, v2 ; D2820005 24081501 v_mad_f32 v7, -v1, s12, v3 ; D2820007 240C1901 v_mad_f32 v1, -v1, s14, v4 ; D2820001 24101D01 v_mul_f32_e32 v5, v5, v9 ; 100A1305 v_mul_f32_e32 v7, v7, v9 ; 100E1307 v_mul_f32_e32 v1, v1, v9 ; 10021301 v_mac_f32_e32 v5, s11, v2 ; 3E0A040B v_mac_f32_e32 v7, s11, v3 ; 3E0E060B v_mac_f32_e32 v1, s11, v4 ; 3E02080B v_mul_f32_e32 v2, s33, v5 ; 10040A21 v_mul_f32_e32 v3, s35, v5 ; 10060A23 v_mul_f32_e32 v4, s36, v5 ; 10080A24 v_mul_f32_e32 v5, s38, v5 ; 100A0A26 v_mac_f32_e32 v2, s39, v7 ; 3E040E27 v_mac_f32_e32 v3, s40, v7 ; 3E060E28 v_mac_f32_e32 v4, s41, v7 ; 3E080E29 v_mac_f32_e32 v5, s42, v7 ; 3E0A0E2A v_cmp_gt_f32_e32 vcc, 0, v0 ; 7C080080 v_cndmask_b32_e64 v0, 1.0, -1.0, vcc ; D2000000 01A9E6F2 v_mul_f32_e32 v7, v15, v15 ; 100E1F0F v_mac_f32_e32 v7, v12, v12 ; 3E0E190C v_mac_f32_e32 v7, v13, v13 ; 3E0E1B0D v_rsq_clamp_f32_e32 v7, v7 ; 7E0E5907 v_mul_f32_e32 v8, v6, v6 ; 10100D06 v_mac_f32_e32 v8, v11, v11 ; 3E10170B v_mac_f32_e32 v8, v10, v10 ; 3E10150A v_rsq_clamp_f32_e32 v8, v8 ; 7E105908 v_mul_f32_e32 v9, v7, v15 ; 10121F07 v_mul_f32_e32 v12, v7, v12 ; 10181907 v_mul_f32_e32 v7, v7, v13 ; 100E1B07 v_mul_f32_e32 v6, v8, v6 ; 100C0D08 v_mul_f32_e32 v11, v8, v11 ; 10161708 v_mul_f32_e32 v8, v8, v10 ; 10101508 v_mac_f32_e32 v2, s20, v1 ; 3E040214 v_mac_f32_e32 v3, s25, v1 ; 3E060219 v_mac_f32_e32 v4, s27, v1 ; 3E08021B v_mul_f32_e32 v10, v11, v7 ; 10140F0B v_mad_f32 v10, v12, v8, -v10 ; D282000A 842A110C v_mul_f32_e32 v10, v0, v10 ; 10141500 exp 15, 33, 0, 0, 0, v6, v10, v9, v11 ; F800021F 0B090A06 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v10, v6, v12 ; 10141906 v_mad_f32 v10, v9, v11, -v10 ; D282000A 842A1709 v_mul_f32_e32 v9, v8, v9 ; 10121308 v_mad_f32 v6, v7, v6, -v9 ; D2820006 84260D07 v_mul_f32_e32 v6, v0, v6 ; 100C0D00 v_mul_f32_e32 v0, v0, v10 ; 10001500 exp 15, 34, 0, 0, 0, v6, v12, v8, v0 ; F800022F 00080C06 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v0, 0 ; 7E000280 exp 15, 35, 0, 0, 0, v7, v12, v7, v0 ; F800023F 00070C07 v_mac_f32_e32 v5, s13, v1 ; 3E0A020D s_waitcnt expcnt(0) ; BF8C070F v_add_f32_e32 v0, s15, v2 ; 0600040F v_add_f32_e32 v1, s16, v3 ; 06020610 v_add_f32_e32 v2, s17, v4 ; 06040811 v_add_f32_e32 v3, s18, v5 ; 06060A12 exp 15, 12, 0, 1, 0, v0, v1, v2, v3 ; F80008CF 03020100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 64 VGPRS: 24 Code Size: 936 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL CONST[0] DCL CONST[3] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 0.5000} IMM[1] FLT32 { 0.0078, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].x, IN[1].wwww 1: MOV TEMP[0].yz, IN[2].yxyy 2: MOV TEMP[1].xy, IN[2].zwzz 3: MOV TEMP[1].z, IN[3].xxxx 4: MOV TEMP[2].xy, IN[0].zwww 5: TEX TEMP[2].yw, TEMP[2], SAMP[1], 2D 6: MAD TEMP[2].xy, TEMP[2].wyyy, IMM[0].xxxx, IMM[0].yyyy 7: DP2 TEMP[3].x, TEMP[2].xyyy, TEMP[2].xyyy 8: MOV_SAT TEMP[3].x, TEMP[3].xxxx 9: ADD TEMP[3].x, IMM[0].zzzz, -TEMP[3].xxxx 10: SQRT TEMP[3].x, TEMP[3].xxxx 11: MOV TEMP[2].z, TEMP[3].xxxx 12: MOV TEMP[3].xy, IN[0].xyyy 13: TEX TEMP[3].w, TEMP[3], SAMP[0], 2D 14: FSLT TEMP[3].x, TEMP[3].wwww, CONST[3].xxxx 15: AND TEMP[3].x, TEMP[3].xxxx, IMM[0].zzzz 16: KILL_IF -TEMP[3].xxxx 17: DP3 TEMP[3].x, IN[1].xyzz, TEMP[2].xyzz 18: DP3 TEMP[0].x, TEMP[0].xyzz, TEMP[2].xyzz 19: MOV TEMP[3].y, TEMP[0].xxxx 20: DP3 TEMP[0].x, TEMP[1].xyzz, TEMP[2].xyzz 21: MOV TEMP[3].z, TEMP[0].xxxx 22: MAD TEMP[0].xyz, TEMP[3].xyzz, IMM[0].wwww, IMM[0].wwww 23: MUL TEMP[1].x, CONST[0].xxxx, IMM[1].xxxx 24: MOV TEMP[0].w, TEMP[1].xxxx 25: MOV OUT[0], TEMP[0] 26: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %26 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %27 = load <32 x i8>, <32 x i8> addrspace(2)* %26, align 32, !tbaa !0 %28 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %29 = load <16 x i8>, <16 x i8> addrspace(2)* %28, align 16, !tbaa !0 %30 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %31 = bitcast <8 x i32> addrspace(2)* %30 to <32 x i8> addrspace(2)* %32 = load <32 x i8>, <32 x i8> addrspace(2)* %31, align 32, !tbaa !0 %33 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %34 = bitcast <4 x i32> addrspace(2)* %33 to <16 x i8> addrspace(2)* %35 = load <16 x i8>, <16 x i8> addrspace(2)* %34, align 16, !tbaa !0 %36 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %37 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %38 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %39 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %40 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %41 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %42 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %43 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %44 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %45 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %46 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %47 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %48 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %49 = bitcast float %38 to i32 %50 = bitcast float %39 to i32 %51 = insertelement <2 x i32> undef, i32 %49, i32 0 %52 = insertelement <2 x i32> %51, i32 %50, i32 1 %53 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %52, <32 x i8> %32, <16 x i8> %35, i32 2) %54 = extractelement <4 x float> %53, i32 1 %55 = extractelement <4 x float> %53, i32 3 %56 = fmul float %55, 2.000000e+00 %57 = fadd float %56, -1.000000e+00 %58 = fmul float %54, 2.000000e+00 %59 = fadd float %58, -1.000000e+00 %60 = fmul float %57, %57 %61 = fmul float %59, %59 %62 = fadd float %60, %61 %63 = call float @llvm.AMDIL.clamp.(float %62, float 0.000000e+00, float 1.000000e+00) %64 = fsub float 1.000000e+00, %63 %65 = call float @llvm.sqrt.f32(float %64) %66 = bitcast float %36 to i32 %67 = bitcast float %37 to i32 %68 = insertelement <2 x i32> undef, i32 %66, i32 0 %69 = insertelement <2 x i32> %68, i32 %67, i32 1 %70 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %69, <32 x i8> %27, <16 x i8> %29, i32 2) %71 = extractelement <4 x float> %70, i32 3 %72 = fcmp olt float %71, %25 %73 = select i1 %72, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %73) %74 = fmul float %40, %57 %75 = fmul float %41, %59 %76 = fadd float %75, %74 %77 = fmul float %42, %65 %78 = fadd float %76, %77 %79 = fmul float %43, %57 %80 = fmul float %44, %59 %81 = fadd float %80, %79 %82 = fmul float %45, %65 %83 = fadd float %81, %82 %84 = fmul float %46, %57 %85 = fmul float %47, %59 %86 = fadd float %85, %84 %87 = fmul float %48, %65 %88 = fadd float %86, %87 %89 = fmul float %78, 5.000000e-01 %90 = fadd float %89, 5.000000e-01 %91 = fmul float %83, 5.000000e-01 %92 = fadd float %91, 5.000000e-01 %93 = fmul float %88, 5.000000e-01 %94 = fadd float %93, 5.000000e-01 %95 = fmul float %24, 7.812500e-03 %96 = call i32 @llvm.SI.packf16(float %90, float %92) %97 = bitcast i32 %96 to float %98 = call i32 @llvm.SI.packf16(float %94, float %95) %99 = bitcast i32 %98 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %97, float %99, float %97, float %99) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600 v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601 v_interp_p1_f32 v9, v0, 3, 1, [m0] ; C8240700 v_interp_p2_f32 v9, [v9], v1, 3, 1, [m0] ; C8250701 v_interp_p1_f32 v10, v0, 0, 2, [m0] ; C8280800 v_interp_p2_f32 v10, [v10], v1, 0, 2, [m0] ; C8290801 v_interp_p1_f32 v11, v0, 1, 2, [m0] ; C82C0900 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504 s_load_dwordx8 s[16:23], s[6:7], 0x8 ; C0C80708 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx8 s[24:31], s[6:7], 0x0 ; C0CC0700 v_interp_p2_f32 v11, [v11], v1, 1, 2, [m0] ; C82D0901 v_interp_p1_f32 v12, v0, 2, 2, [m0] ; C8300A00 v_interp_p2_f32 v12, [v12], v1, 2, 2, [m0] ; C8310A01 v_interp_p1_f32 v13, v0, 3, 2, [m0] ; C8340B00 v_interp_p2_f32 v13, [v13], v1, 3, 2, [m0] ; C8350B01 v_interp_p1_f32 v0, v0, 0, 3, [m0] ; C8000C00 v_interp_p2_f32 v0, [v0], v1, 0, 3, [m0] ; C8010C01 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[4:5], 10, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[16:23], s[12:15] ; F0800A00 00640404 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C image_sample v1, 8, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[24:31], s[8:11] ; F0800800 00460102 s_buffer_load_dword s0, s[0:3], 0x0 ; C2000100 s_waitcnt vmcnt(1) ; BF8C0771 v_mad_f32 v2, 2.0, v5, -1.0 ; D2820002 03CE0AF4 v_mad_f32 v3, 2.0, v4, -1.0 ; D2820003 03CE08F4 v_mul_f32_e32 v4, v3, v3 ; 10080703 v_mac_f32_e32 v4, v2, v2 ; 3E080502 v_add_f32_e64 v4, 0, v4 clamp ; D2060804 00020880 v_sub_f32_e32 v4, 1.0, v4 ; 080808F2 v_sqrt_f32_e32 v4, v4 ; 7E086704 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_cmp_gt_f32_e32 vcc, s4, v1 ; 7C080204 v_cndmask_b32_e64 v1, 0, -1.0, vcc ; D2000001 01A9E680 v_cmpx_le_f32_e32 vcc, 0, v1 ; 7C260280 v_mul_f32_e32 v1, v2, v6 ; 10020D02 v_mac_f32_e32 v1, v3, v7 ; 3E020F03 v_mul_f32_e32 v5, v2, v9 ; 100A1302 v_mac_f32_e32 v5, v3, v10 ; 3E0A1503 v_mul_f32_e32 v2, v2, v12 ; 10041902 v_mac_f32_e32 v2, v3, v13 ; 3E041B03 v_mac_f32_e32 v1, v4, v8 ; 3E021104 v_mac_f32_e32 v5, v4, v11 ; 3E0A1704 v_mac_f32_e32 v2, v4, v0 ; 3E040104 v_mov_b32_e32 v0, 0x3c000000 ; 7E0002FF 3C000000 v_mul_f32_e32 v0, s0, v0 ; 10000000 v_mad_f32 v1, 0.5, v1, 0.5 ; D2820001 03C202F0 v_mad_f32 v3, 0.5, v5, 0.5 ; D2820003 03C20AF0 v_cvt_pkrtz_f16_f32_e32 v1, v1, v3 ; 5E020701 v_mad_f32 v2, 0.5, v2, 0.5 ; D2820002 03C204F0 v_cvt_pkrtz_f16_f32_e32 v0, v2, v0 ; 5E000102 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 16 Code Size: 316 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL CONST[0..25] DCL TEMP[0..5], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MUL TEMP[0].xyz, IN[0].xyzz, CONST[14].xyzz 1: MUL TEMP[1].x, IN[2].wwww, IN[2].wwww 2: MOV TEMP[2].y, IMM[0].xxxx 3: MOV TEMP[2].x, TEMP[1].xxxx 4: MOV TEMP[2].z, TEMP[1].xxxx 5: MUL TEMP[1], CONST[15], TEMP[0].xxxx 6: MAD TEMP[1], CONST[16], TEMP[0].yyyy, TEMP[1] 7: MAD TEMP[1].xyz, CONST[17], TEMP[0].zzzz, TEMP[1] 8: LRP TEMP[0].xyz, TEMP[2].xyzz, TEMP[1].xyzz, TEMP[0].xyzz 9: DP3 TEMP[1].x, CONST[19].xyzz, TEMP[0].xyzz 10: ADD TEMP[1].x, TEMP[1].xxxx, CONST[19].wwww 11: MUL TEMP[1].xyz, TEMP[1].xxxx, CONST[19].xyzz 12: ADD TEMP[1].xyz, TEMP[0].xyzz, -TEMP[1].xyzz 13: LRP TEMP[0].xyz, CONST[20].xxxx, TEMP[0].xyzz, TEMP[1].xyzz 14: MAD TEMP[1].xy, IN[3].xyyy, CONST[21].xyyy, CONST[21].zwww 15: FSNE TEMP[2].x, CONST[1].zzzz, IMM[0].xxxx 16: UIF TEMP[2].xxxx :0 17: MUL TEMP[2], CONST[2], TEMP[0].xxxx 18: MAD TEMP[2], CONST[3], TEMP[0].yyyy, TEMP[2] 19: MAD TEMP[2], CONST[4], TEMP[0].zzzz, TEMP[2] 20: ADD TEMP[2].xyz, TEMP[2], CONST[5] 21: MOV TEMP[3].x, CONST[6].xxxx 22: MOV TEMP[3].y, CONST[7].xxxx 23: MOV TEMP[3].z, CONST[8].xxxx 24: MOV TEMP[4].x, CONST[6].yyyy 25: MOV TEMP[4].y, CONST[7].yyyy 26: MOV TEMP[4].z, CONST[8].yyyy 27: MOV TEMP[5].x, CONST[6].zzzz 28: MOV TEMP[5].y, CONST[7].zzzz 29: MOV TEMP[5].z, CONST[8].zzzz 30: MUL TEMP[3].xyz, TEMP[3].xyzz, IN[1].xxxx 31: MAD TEMP[3].xyz, TEMP[4].xyzz, IN[1].yyyy, TEMP[3].xyzz 32: MAD TEMP[3].xyz, TEMP[5].xyzz, IN[1].zzzz, TEMP[3].xyzz 33: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz 34: RSQ TEMP[4].x, TEMP[4].xxxx 35: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx 36: MUL TEMP[4].xyz, TEMP[2].xyzz, CONST[0].wwww 37: ADD TEMP[4].xyz, CONST[0].xyzz, -TEMP[4].xyzz 38: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz 39: RSQ TEMP[5].x, TEMP[5].xxxx 40: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx 41: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[4].xyzz 42: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[4].xxxx 43: ADD TEMP[4].x, IMM[0].yyyy, -TEMP[4].xxxx 44: SQRT TEMP[4].x, TEMP[4].xxxx 45: MUL TEMP[4].x, CONST[1].zzzz, TEMP[4].xxxx 46: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx 47: ADD TEMP[2].xyz, TEMP[2].xyzz, -TEMP[3].xyzz 48: MUL TEMP[3], CONST[10], TEMP[2].xxxx 49: MAD TEMP[3], CONST[11], TEMP[2].yyyy, TEMP[3] 50: MAD TEMP[2], CONST[12], TEMP[2].zzzz, TEMP[3] 51: ADD TEMP[2], TEMP[2], CONST[13] 52: ELSE :0 53: MUL TEMP[3], CONST[22], TEMP[0].xxxx 54: MAD TEMP[3], CONST[23], TEMP[0].yyyy, TEMP[3] 55: MAD TEMP[0], CONST[24], TEMP[0].zzzz, TEMP[3] 56: ADD TEMP[2], TEMP[0], CONST[25] 57: ENDIF 58: MOV TEMP[0].xyw, TEMP[2].xyxw 59: RCP TEMP[3].x, TEMP[2].wwww 60: MUL TEMP[3].x, CONST[1].xxxx, TEMP[3].xxxx 61: MOV_SAT TEMP[3].x, TEMP[3].xxxx 62: ADD TEMP[3].x, TEMP[2].zzzz, TEMP[3].xxxx 63: MAX TEMP[2].x, TEMP[3].xxxx, -TEMP[2].wwww 64: LRP TEMP[2].x, CONST[1].yyyy, TEMP[2].xxxx, TEMP[3].xxxx 65: MOV TEMP[0].z, TEMP[2].xxxx 66: MOV TEMP[1].xy, TEMP[1].xyxx 67: MOV OUT[1], TEMP[1] 68: MOV OUT[0], TEMP[0] 69: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 252) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 268) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 336) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 340) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 344) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 348) %40 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %41 = load <16 x i8>, <16 x i8> addrspace(2)* %40, align 16, !tbaa !0 %42 = add i32 %5, %7 %43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %41, i32 0, i32 %42) %44 = extractelement <4 x float> %43, i32 0 %45 = extractelement <4 x float> %43, i32 1 %46 = extractelement <4 x float> %43, i32 2 %47 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %48 = load <16 x i8>, <16 x i8> addrspace(2)* %47, align 16, !tbaa !0 %49 = add i32 %5, %7 %50 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %48, i32 0, i32 %49) %51 = extractelement <4 x float> %50, i32 0 %52 = extractelement <4 x float> %50, i32 1 %53 = extractelement <4 x float> %50, i32 2 %54 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %55 = load <16 x i8>, <16 x i8> addrspace(2)* %54, align 16, !tbaa !0 %56 = add i32 %5, %7 %57 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %55, i32 0, i32 %56) %58 = extractelement <4 x float> %57, i32 3 %59 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %60 = load <16 x i8>, <16 x i8> addrspace(2)* %59, align 16, !tbaa !0 %61 = add i32 %5, %7 %62 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %60, i32 0, i32 %61) %63 = extractelement <4 x float> %62, i32 0 %64 = extractelement <4 x float> %62, i32 1 %65 = fmul float %44, %17 %66 = fmul float %45, %18 %67 = fmul float %46, %19 %68 = fmul float %58, %58 %69 = fmul float %20, %65 %70 = fmul float %21, %65 %71 = fmul float %22, %65 %72 = fmul float %23, %65 %73 = fmul float %24, %66 %74 = fadd float %73, %69 %75 = fmul float %25, %66 %76 = fadd float %75, %70 %77 = fmul float %26, %66 %78 = fadd float %77, %71 %79 = fmul float %27, %66 %80 = fadd float %79, %72 %81 = fmul float %28, %67 %82 = fadd float %81, %74 %83 = fmul float %29, %67 %84 = fadd float %83, %76 %85 = fmul float %30, %67 %86 = fadd float %85, %78 %87 = call float @llvm.AMDGPU.lrp(float %68, float %82, float %65) %88 = call float @llvm.AMDGPU.lrp(float 0.000000e+00, float %84, float %66) %89 = call float @llvm.AMDGPU.lrp(float %68, float %86, float %67) %90 = fmul float %31, %87 %91 = fmul float %32, %88 %92 = fadd float %91, %90 %93 = fmul float %33, %89 %94 = fadd float %92, %93 %95 = fadd float %94, %34 %96 = fmul float %95, %31 %97 = fmul float %95, %32 %98 = fmul float %95, %33 %99 = fsub float %87, %96 %100 = fsub float %88, %97 %101 = fsub float %89, %98 %102 = call float @llvm.AMDGPU.lrp(float %35, float %87, float %99) %103 = call float @llvm.AMDGPU.lrp(float %35, float %88, float %100) %104 = call float @llvm.AMDGPU.lrp(float %35, float %89, float %101) %105 = fmul float %63, %36 %106 = fadd float %105, %38 %107 = fmul float %64, %37 %108 = fadd float %107, %39 %109 = fcmp une float %16, 0.000000e+00 br i1 %109, label %IF, label %ELSE IF: ; preds = %main_body %110 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220) %111 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %112 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %113 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %114 = call float @llvm.SI.load.const(<16 x i8> %12, i32 204) %115 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %116 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196) %117 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %118 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188) %119 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %120 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %121 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %122 = call float @llvm.SI.load.const(<16 x i8> %12, i32 172) %123 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %124 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %125 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %126 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %127 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %128 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %129 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %130 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %131 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %132 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %133 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %134 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %135 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %136 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %137 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %138 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %139 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %140 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %141 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %142 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %143 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %144 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %145 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %146 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %147 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %148 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %149 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %150 = fmul float %146, %102 %151 = fmul float %145, %102 %152 = fmul float %144, %102 %153 = fmul float %143, %103 %154 = fadd float %153, %150 %155 = fmul float %142, %103 %156 = fadd float %155, %151 %157 = fmul float %141, %103 %158 = fadd float %157, %152 %159 = fmul float %140, %104 %160 = fadd float %159, %154 %161 = fmul float %139, %104 %162 = fadd float %161, %156 %163 = fmul float %138, %104 %164 = fadd float %163, %158 %165 = fadd float %160, %137 %166 = fadd float %162, %136 %167 = fadd float %164, %135 %168 = fmul float %134, %51 %169 = fmul float %131, %51 %170 = fmul float %128, %51 %171 = fmul float %133, %52 %172 = fadd float %171, %168 %173 = fmul float %130, %52 %174 = fadd float %173, %169 %175 = fmul float %127, %52 %176 = fadd float %175, %170 %177 = fmul float %132, %53 %178 = fadd float %177, %172 %179 = fmul float %129, %53 %180 = fadd float %179, %174 %181 = fmul float %126, %53 %182 = fadd float %181, %176 %183 = fmul float %178, %178 %184 = fmul float %180, %180 %185 = fadd float %184, %183 %186 = fmul float %182, %182 %187 = fadd float %185, %186 %188 = call float @llvm.AMDGPU.rsq.clamped.f32(float %187) %189 = fmul float %178, %188 %190 = fmul float %180, %188 %191 = fmul float %182, %188 %192 = fmul float %165, %13 %193 = fmul float %166, %13 %194 = fmul float %167, %13 %195 = fsub float %149, %192 %196 = fsub float %148, %193 %197 = fsub float %147, %194 %198 = fmul float %195, %195 %199 = fmul float %196, %196 %200 = fadd float %199, %198 %201 = fmul float %197, %197 %202 = fadd float %200, %201 %203 = call float @llvm.AMDGPU.rsq.clamped.f32(float %202) %204 = fmul float %195, %203 %205 = fmul float %196, %203 %206 = fmul float %197, %203 %207 = fmul float %189, %204 %208 = fmul float %190, %205 %209 = fadd float %208, %207 %210 = fmul float %191, %206 %211 = fadd float %209, %210 %212 = fmul float %211, %211 %213 = fsub float 1.000000e+00, %212 %214 = call float @llvm.sqrt.f32(float %213) %215 = fmul float %16, %214 %216 = fmul float %189, %215 %217 = fmul float %190, %215 %218 = fmul float %191, %215 %219 = fsub float %165, %216 %220 = fsub float %166, %217 %221 = fsub float %167, %218 %222 = fmul float %125, %219 %223 = fmul float %124, %219 %224 = fmul float %123, %219 %225 = fmul float %122, %219 %226 = fmul float %121, %220 %227 = fadd float %226, %222 %228 = fmul float %120, %220 %229 = fadd float %228, %223 %230 = fmul float %119, %220 %231 = fadd float %230, %224 %232 = fmul float %118, %220 %233 = fadd float %232, %225 %234 = fmul float %117, %221 %235 = fadd float %234, %227 %236 = fmul float %116, %221 %237 = fadd float %236, %229 %238 = fmul float %115, %221 %239 = fadd float %238, %231 %240 = fmul float %114, %221 %241 = fadd float %240, %233 %242 = fadd float %235, %113 %243 = fadd float %237, %112 %244 = fadd float %239, %111 %245 = fadd float %241, %110 br label %ENDIF ELSE: ; preds = %main_body %246 = call float @llvm.SI.load.const(<16 x i8> %12, i32 412) %247 = call float @llvm.SI.load.const(<16 x i8> %12, i32 408) %248 = call float @llvm.SI.load.const(<16 x i8> %12, i32 404) %249 = call float @llvm.SI.load.const(<16 x i8> %12, i32 400) %250 = call float @llvm.SI.load.const(<16 x i8> %12, i32 396) %251 = call float @llvm.SI.load.const(<16 x i8> %12, i32 392) %252 = call float @llvm.SI.load.const(<16 x i8> %12, i32 388) %253 = call float @llvm.SI.load.const(<16 x i8> %12, i32 384) %254 = call float @llvm.SI.load.const(<16 x i8> %12, i32 380) %255 = call float @llvm.SI.load.const(<16 x i8> %12, i32 376) %256 = call float @llvm.SI.load.const(<16 x i8> %12, i32 372) %257 = call float @llvm.SI.load.const(<16 x i8> %12, i32 368) %258 = call float @llvm.SI.load.const(<16 x i8> %12, i32 364) %259 = call float @llvm.SI.load.const(<16 x i8> %12, i32 360) %260 = call float @llvm.SI.load.const(<16 x i8> %12, i32 356) %261 = call float @llvm.SI.load.const(<16 x i8> %12, i32 352) %262 = fmul float %261, %102 %263 = fmul float %260, %102 %264 = fmul float %259, %102 %265 = fmul float %258, %102 %266 = fmul float %257, %103 %267 = fadd float %266, %262 %268 = fmul float %256, %103 %269 = fadd float %268, %263 %270 = fmul float %255, %103 %271 = fadd float %270, %264 %272 = fmul float %254, %103 %273 = fadd float %272, %265 %274 = fmul float %253, %104 %275 = fadd float %274, %267 %276 = fmul float %252, %104 %277 = fadd float %276, %269 %278 = fmul float %251, %104 %279 = fadd float %278, %271 %280 = fmul float %250, %104 %281 = fadd float %280, %273 %282 = fadd float %275, %249 %283 = fadd float %277, %248 %284 = fadd float %279, %247 %285 = fadd float %281, %246 br label %ENDIF ENDIF: ; preds = %ELSE, %IF %temp8.0 = phi float [ %242, %IF ], [ %282, %ELSE ] %temp9.0 = phi float [ %243, %IF ], [ %283, %ELSE ] %temp10.0 = phi float [ %244, %IF ], [ %284, %ELSE ] %temp11.0 = phi float [ %245, %IF ], [ %285, %ELSE ] %286 = fdiv float 1.000000e+00, %temp11.0 %287 = fmul float %14, %286 %288 = call float @llvm.AMDIL.clamp.(float %287, float 0.000000e+00, float 1.000000e+00) %289 = fadd float %temp10.0, %288 %290 = fsub float -0.000000e+00, %temp11.0 %291 = call float @llvm.maxnum.f32(float %289, float %290) %292 = call float @llvm.AMDGPU.lrp(float %15, float %291, float %289) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %106, float %108, float %101, float %80) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %temp8.0, float %temp9.0, float %292, float %temp11.0) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[12:15], s[8:9], 0x0 ; C0860900 s_load_dwordx4 s[16:19], s[8:9], 0x4 ; C0880904 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[20:23], s[8:9], 0x8 ; C08A0908 s_load_dwordx4 s[8:11], s[8:9], 0xc ; C084090C s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 buffer_load_format_xyzw v[11:14], v0, s[12:15], 0 idxen ; E00C2000 80030B00 buffer_load_format_xyzw v[7:10], v0, s[16:19], 0 idxen ; E00C2000 80040700 s_buffer_load_dword s5, s[0:3], 0x38 ; C2028138 s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 buffer_load_format_xyzw v[14:17], v0, s[20:23], 0 idxen ; E00C2000 80050E00 buffer_load_format_xyzw v[0:3], v0, s[8:11], 0 idxen ; E00C2000 80020000 s_buffer_load_dword s6, s[0:3], 0x39 ; C2030139 s_buffer_load_dword s7, s[0:3], 0x3a ; C203813A s_buffer_load_dword s8, s[0:3], 0x3c ; C204013C s_buffer_load_dword s9, s[0:3], 0x3d ; C204813D s_buffer_load_dword s10, s[0:3], 0x3e ; C205013E s_buffer_load_dword s11, s[0:3], 0x40 ; C2058140 s_buffer_load_dword s12, s[0:3], 0x41 ; C2060141 s_buffer_load_dword s15, s[0:3], 0x42 ; C2078142 s_buffer_load_dword s16, s[0:3], 0x44 ; C2080144 s_buffer_load_dword s17, s[0:3], 0x45 ; C2088145 s_buffer_load_dword s18, s[0:3], 0x46 ; C2090146 s_buffer_load_dword s19, s[0:3], 0x4c ; C209814C s_buffer_load_dword s20, s[0:3], 0x4d ; C20A014D s_buffer_load_dword s21, s[0:3], 0x4e ; C20A814E s_buffer_load_dword s22, s[0:3], 0x4f ; C20B014F s_buffer_load_dword s23, s[0:3], 0x50 ; C20B8150 s_buffer_load_dword s13, s[0:3], 0x56 ; C2068156 s_buffer_load_dword s14, s[0:3], 0x57 ; C2070157 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_sub_f32_e64 v5, 1.0, s23 ; D2080005 00002EF2 v_mul_f32_e32 v3, s5, v11 ; 10061605 v_mul_f32_e32 v2, s6, v12 ; 10041806 v_mul_f32_e32 v4, s7, v13 ; 10081A07 v_mul_f32_e32 v6, v17, v17 ; 100C2311 v_mad_f32 v10, -v17, v17, 1.0 ; D282000A 23CA2311 v_mul_f32_e32 v11, s8, v3 ; 10160608 v_mac_f32_e32 v11, s11, v2 ; 3E16040B v_mul_f32_e32 v12, s9, v3 ; 10180609 v_mac_f32_e32 v12, s12, v2 ; 3E18040C v_mul_f32_e32 v13, s10, v3 ; 101A060A v_mac_f32_e32 v13, s15, v2 ; 3E1A040F v_mac_f32_e32 v11, s16, v4 ; 3E160810 v_mac_f32_e32 v12, s17, v4 ; 3E180811 v_mac_f32_e32 v13, s18, v4 ; 3E1A0812 v_mul_f32_e32 v14, v4, v10 ; 101C1504 v_mul_f32_e32 v10, v3, v10 ; 10141503 v_mac_f32_e32 v10, v11, v6 ; 3E140D0B v_mac_f32_e32 v14, v13, v6 ; 3E1C0D0D v_mad_f32 v6, 0, v12, v2 ; D2820006 040A1880 v_mul_f32_e32 v4, s19, v10 ; 10081413 v_mac_f32_e32 v4, s20, v6 ; 3E080C14 v_mac_f32_e32 v4, s21, v14 ; 3E081C15 v_add_f32_e32 v4, s22, v4 ; 06080816 v_mad_f32 v11, -v4, s19, v10 ; D282000B 24282704 v_mad_f32 v13, -v4, s20, v6 ; D282000D 24182904 v_mad_f32 v4, -v4, s21, v14 ; D2820004 24382B04 v_mul_f32_e32 v12, v11, v5 ; 10180B0B v_mac_f32_e32 v12, s23, v10 ; 3E181417 v_mul_f32_e32 v11, v13, v5 ; 10160B0D v_mac_f32_e32 v11, s23, v6 ; 3E160C17 v_mul_f32_e32 v10, v4, v5 ; 10140B04 v_mac_f32_e32 v10, s23, v14 ; 3E141C17 v_cmp_eq_f32_e64 s[6:7], 0, s4 ; D0040006 00000880 s_and_saveexec_b64 s[6:7], s[6:7] ; BE862406 s_xor_b64 s[6:7], exec, s[6:7] ; 8986067E s_cbranch_execz BB0_1 ; BF880000 s_buffer_load_dword s5, s[0:3], 0x67 ; C2028167 s_buffer_load_dword s8, s[0:3], 0x62 ; C2040162 s_buffer_load_dword s9, s[0:3], 0x63 ; C2048163 s_buffer_load_dword s10, s[0:3], 0x64 ; C2050164 s_buffer_load_dword s11, s[0:3], 0x65 ; C2058165 s_buffer_load_dword s12, s[0:3], 0x66 ; C2060166 s_buffer_load_dword s15, s[0:3], 0x5d ; C207815D s_buffer_load_dword s16, s[0:3], 0x5e ; C208015E s_buffer_load_dword s17, s[0:3], 0x5f ; C208815F s_buffer_load_dword s18, s[0:3], 0x60 ; C2090160 s_buffer_load_dword s19, s[0:3], 0x61 ; C2098161 s_buffer_load_dword s20, s[0:3], 0x58 ; C20A0158 s_buffer_load_dword s21, s[0:3], 0x59 ; C20A8159 s_buffer_load_dword s22, s[0:3], 0x5a ; C20B015A s_buffer_load_dword s23, s[0:3], 0x5b ; C20B815B s_buffer_load_dword s24, s[0:3], 0x5c ; C20C015C s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s20, v12 ; 100A1814 v_mul_f32_e32 v6, s21, v12 ; 100C1815 v_mul_f32_e32 v15, s22, v12 ; 101E1816 v_mul_f32_e32 v17, s23, v12 ; 10221817 v_mac_f32_e32 v5, s24, v11 ; 3E0A1618 v_mac_f32_e32 v6, s15, v11 ; 3E0C160F v_mac_f32_e32 v15, s16, v11 ; 3E1E1610 v_mac_f32_e32 v17, s17, v11 ; 3E221611 v_mac_f32_e32 v5, s18, v10 ; 3E0A1412 v_mac_f32_e32 v6, s19, v10 ; 3E0C1413 v_mac_f32_e32 v15, s8, v10 ; 3E1E1408 v_mac_f32_e32 v17, s9, v10 ; 3E221409 v_add_f32_e32 v13, s10, v5 ; 061A0A0A v_add_f32_e32 v14, s11, v6 ; 061C0C0B v_add_f32_e32 v16, s12, v15 ; 06201E0C v_add_f32_e32 v15, s5, v17 ; 061E2205 s_or_saveexec_b64 s[6:7], s[6:7] ; BE862506 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_buffer_load_dword s12, s[0:3], 0x3f ; C206013F s_buffer_load_dword s9, s[0:3], 0x43 ; C2048143 s_buffer_load_dword s10, s[0:3], 0x54 ; C2050154 s_buffer_load_dword s11, s[0:3], 0x55 ; C2058155 v_mov_b32_e32 v5, s13 ; 7E0A020D v_mov_b32_e32 v6, s14 ; 7E0C020E s_waitcnt lgkmcnt(0) ; BF8C007F s_xor_b64 exec, exec, s[6:7] ; 89FE067E s_cbranch_execz BB0_4 ; BF880000 s_buffer_load_dword s13, s[0:3], 0x37 ; C2068137 s_buffer_load_dword s14, s[0:3], 0x32 ; C2070132 s_buffer_load_dword s15, s[0:3], 0x33 ; C2078133 s_buffer_load_dword s16, s[0:3], 0x34 ; C2080134 s_buffer_load_dword s17, s[0:3], 0x35 ; C2088135 s_buffer_load_dword s18, s[0:3], 0x36 ; C2090136 s_buffer_load_dword s19, s[0:3], 0x2d ; C209812D s_buffer_load_dword s20, s[0:3], 0x2e ; C20A012E s_buffer_load_dword s21, s[0:3], 0x2f ; C20A812F s_buffer_load_dword s22, s[0:3], 0x30 ; C20B0130 s_buffer_load_dword s23, s[0:3], 0x31 ; C20B8131 s_buffer_load_dword s24, s[0:3], 0x28 ; C20C0128 s_buffer_load_dword s25, s[0:3], 0x29 ; C20C8129 s_buffer_load_dword s26, s[0:3], 0x2a ; C20D012A s_buffer_load_dword s27, s[0:3], 0x2b ; C20D812B s_buffer_load_dword s28, s[0:3], 0x2c ; C20E012C s_buffer_load_dword s29, s[0:3], 0x1d ; C20E811D s_buffer_load_dword s30, s[0:3], 0x1e ; C20F011E s_buffer_load_dword s31, s[0:3], 0x20 ; C20F8120 s_buffer_load_dword s32, s[0:3], 0x21 ; C2100121 s_buffer_load_dword s33, s[0:3], 0x22 ; C2108122 s_buffer_load_dword s34, s[0:3], 0x16 ; C2110116 s_buffer_load_dword s35, s[0:3], 0x18 ; C2118118 s_buffer_load_dword s36, s[0:3], 0x19 ; C2120119 s_buffer_load_dword s37, s[0:3], 0x1a ; C212811A s_buffer_load_dword s38, s[0:3], 0x1c ; C213011C s_buffer_load_dword s39, s[0:3], 0x10 ; C2138110 s_buffer_load_dword s40, s[0:3], 0x11 ; C2140111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v13, s31, v7 ; 101A0E1F v_mac_f32_e32 v13, s32, v8 ; 3E1A1020 v_mac_f32_e32 v13, s33, v9 ; 3E1A1221 s_buffer_load_dword s31, s[0:3], 0x12 ; C20F8112 v_mul_f32_e32 v14, s35, v7 ; 101C0E23 v_mac_f32_e32 v14, s36, v8 ; 3E1C1024 v_mac_f32_e32 v14, s37, v9 ; 3E1C1225 v_mul_f32_e32 v7, s38, v7 ; 100E0E26 v_mac_f32_e32 v7, s29, v8 ; 3E0E101D v_mac_f32_e32 v7, s30, v9 ; 3E0E121E s_buffer_load_dword s29, s[0:3], 0x14 ; C20E8114 s_buffer_load_dword s30, s[0:3], 0x15 ; C20F0115 s_buffer_load_dword s32, s[0:3], 0x9 ; C2100109 s_buffer_load_dword s33, s[0:3], 0xa ; C210810A s_buffer_load_dword s35, s[0:3], 0xc ; C211810C s_buffer_load_dword s36, s[0:3], 0xd ; C212010D s_buffer_load_dword s37, s[0:3], 0xe ; C212810E s_buffer_load_dword s38, s[0:3], 0x0 ; C2130100 v_mul_f32_e32 v8, v14, v14 ; 10101D0E v_mac_f32_e32 v8, v7, v7 ; 3E100F07 v_mac_f32_e32 v8, v13, v13 ; 3E101B0D v_rsq_clamp_f32_e32 v8, v8 ; 7E105908 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v9, s32, v12 ; 10121820 v_mul_f32_e32 v15, s33, v12 ; 101E1821 s_buffer_load_dword s32, s[0:3], 0x1 ; C2100101 v_mac_f32_e32 v9, s36, v11 ; 3E121624 v_mac_f32_e32 v15, s37, v11 ; 3E1E1625 v_mac_f32_e32 v9, s40, v10 ; 3E121428 v_mac_f32_e32 v15, s31, v10 ; 3E1E141F v_add_f32_e32 v9, s30, v9 ; 0612121E v_add_f32_e32 v15, s34, v15 ; 061E1E22 s_buffer_load_dword s30, s[0:3], 0x2 ; C20F0102 s_buffer_load_dword s31, s[0:3], 0x3 ; C20F8103 s_buffer_load_dword s33, s[0:3], 0x8 ; C2108108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v16, s31 ; 7E20021F v_mad_f32 v17, -v9, v16, s32 ; D2820011 20822109 v_mul_f32_e32 v12, s33, v12 ; 10181821 v_mac_f32_e32 v12, s35, v11 ; 3E181623 v_mac_f32_e32 v12, s39, v10 ; 3E181427 v_add_f32_e32 v10, s29, v12 ; 0614181D v_mad_f32 v11, -v10, v16, s38 ; D282000B 209A210A v_mad_f32 v12, -v15, v16, s30 ; D282000C 207A210F v_mul_f32_e32 v16, v11, v11 ; 1020170B v_mac_f32_e32 v16, v17, v17 ; 3E202311 v_mac_f32_e32 v16, v12, v12 ; 3E20190C v_rsq_clamp_f32_e32 v16, v16 ; 7E205910 v_mul_f32_e32 v14, v8, v14 ; 101C1D08 v_mul_f32_e32 v7, v8, v7 ; 100E0F08 v_mul_f32_e32 v8, v8, v13 ; 10101B08 v_mul_f32_e32 v11, v16, v11 ; 10161710 v_mul_f32_e32 v13, v16, v17 ; 101A2310 v_mul_f32_e32 v12, v16, v12 ; 10181910 v_mul_f32_e32 v11, v11, v14 ; 10161D0B v_mac_f32_e32 v11, v13, v7 ; 3E160F0D v_mac_f32_e32 v11, v12, v8 ; 3E16110C v_mad_f32 v11, -v11, v11, 1.0 ; D282000B 23CA170B v_sqrt_f32_e32 v11, v11 ; 7E16670B v_mul_f32_e32 v11, s4, v11 ; 10161604 v_mad_f32 v10, -v14, v11, v10 ; D282000A 242A170E v_mad_f32 v7, -v7, v11, v9 ; D2820007 24261707 v_mad_f32 v8, -v8, v11, v15 ; D2820008 243E1708 v_mul_f32_e32 v9, s24, v10 ; 10121418 v_mul_f32_e32 v11, s25, v10 ; 10161419 v_mul_f32_e32 v12, s26, v10 ; 1018141A v_mul_f32_e32 v10, s27, v10 ; 1014141B v_mac_f32_e32 v9, s28, v7 ; 3E120E1C v_mac_f32_e32 v11, s19, v7 ; 3E160E13 v_mac_f32_e32 v12, s20, v7 ; 3E180E14 v_mac_f32_e32 v10, s21, v7 ; 3E140E15 v_mac_f32_e32 v9, s22, v8 ; 3E121016 v_mac_f32_e32 v11, s23, v8 ; 3E161017 v_mac_f32_e32 v12, s14, v8 ; 3E18100E v_mac_f32_e32 v10, s15, v8 ; 3E14100F v_add_f32_e32 v13, s16, v9 ; 061A1210 v_add_f32_e32 v14, s17, v11 ; 061C1611 v_add_f32_e32 v16, s18, v12 ; 06201812 v_add_f32_e32 v15, s13, v10 ; 061E140D s_or_b64 exec, exec, s[6:7] ; 88FE067E v_mul_f32_e32 v3, s12, v3 ; 1006060C v_rcp_f32_e32 v7, v15 ; 7E0E550F v_mac_f32_e32 v3, s9, v2 ; 3E060409 v_mac_f32_e32 v5, s10, v0 ; 3E0A000A v_mac_f32_e32 v6, s11, v1 ; 3E0C020B v_mul_f32_e32 v0, s8, v7 ; 10000E08 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_add_f32_e32 v0, v0, v16 ; 06002100 v_max_f32_e64 v1, v0, -v15 ; D2200001 40021F00 v_sub_f32_e64 v2, 1.0, s5 ; D2080002 00000AF2 v_mul_f32_e32 v0, v0, v2 ; 10000500 v_mac_f32_e32 v0, s5, v1 ; 3E000205 exp 15, 32, 0, 0, 0, v5, v6, v4, v3 ; F800020F 03040605 exp 15, 12, 0, 1, 0, v13, v14, v0, v15 ; F80008CF 0F000E0D s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 48 VGPRS: 20 Code Size: 1048 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1] DCL TEMP[0], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0].w, TEMP[0], SAMP[0], 2D 2: FSLT TEMP[0].x, TEMP[0].wwww, CONST[1].xxxx 3: AND TEMP[0].x, TEMP[0].xxxx, IMM[0].xxxx 4: KILL_IF -TEMP[0].xxxx 5: MOV OUT[0], IMM[0].yyyy 6: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %25 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %26 = load <32 x i8>, <32 x i8> addrspace(2)* %25, align 32, !tbaa !0 %27 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %28 = load <16 x i8>, <16 x i8> addrspace(2)* %27, align 16, !tbaa !0 %29 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %31 = bitcast float %29 to i32 %32 = bitcast float %30 to i32 %33 = insertelement <2 x i32> undef, i32 %31, i32 0 %34 = insertelement <2 x i32> %33, i32 %32, i32 1 %35 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %34, <32 x i8> %26, <16 x i8> %28, i32 2) %36 = extractelement <4 x float> %35, i32 3 %37 = fcmp olt float %36, %24 %38 = select i1 %37, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %38) %39 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00) %40 = bitcast i32 %39 to float %41 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00) %42 = bitcast i32 %41 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %40, float %42, float %40, float %42) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[4:7], s[4:5], 0x0 ; C0820500 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s0, s[0:3], 0x4 ; C2000104 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 image_sample v0, 8, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[4:7] ; F0800800 00230002 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_cmp_gt_f32_e32 vcc, s0, v0 ; 7C080000 v_cndmask_b32_e64 v0, 0, -1.0, vcc ; D2000000 01A9E680 v_cmpx_le_f32_e32 vcc, 0, v0 ; 7C260080 v_cvt_pkrtz_f16_f32_e64 v0, 0, 0 ; D25E0000 00010080 exp 15, 0, 1, 1, 1, v0, v0, v0, v0 ; F8001C0F 00000000 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 4 Code Size: 92 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL CONST[0..25] DCL TEMP[0..5], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MUL TEMP[0].xyz, IN[0].xyzz, CONST[14].xyzz 1: MUL TEMP[1].x, IN[2].wwww, IN[2].wwww 2: MOV TEMP[2].y, IMM[0].xxxx 3: MOV TEMP[2].x, TEMP[1].xxxx 4: MOV TEMP[2].z, TEMP[1].xxxx 5: MUL TEMP[1], CONST[15], TEMP[0].xxxx 6: MAD TEMP[1], CONST[16], TEMP[0].yyyy, TEMP[1] 7: MAD TEMP[1].xyz, CONST[17], TEMP[0].zzzz, TEMP[1] 8: LRP TEMP[0].xyz, TEMP[2].xyzz, TEMP[1].xyzz, TEMP[0].xyzz 9: DP3 TEMP[1].x, CONST[19].xyzz, TEMP[0].xyzz 10: ADD TEMP[1].x, TEMP[1].xxxx, CONST[19].wwww 11: MUL TEMP[1].xyz, TEMP[1].xxxx, CONST[19].xyzz 12: ADD TEMP[1].xyz, TEMP[0].xyzz, -TEMP[1].xyzz 13: LRP TEMP[0].xyz, CONST[20].xxxx, TEMP[0].xyzz, TEMP[1].xyzz 14: MAD TEMP[1].xy, IN[3].xyyy, CONST[21].xyyy, CONST[21].zwww 15: FSNE TEMP[2].x, CONST[1].zzzz, IMM[0].xxxx 16: UIF TEMP[2].xxxx :0 17: MUL TEMP[2], CONST[2], TEMP[0].xxxx 18: MAD TEMP[2], CONST[3], TEMP[0].yyyy, TEMP[2] 19: MAD TEMP[2], CONST[4], TEMP[0].zzzz, TEMP[2] 20: ADD TEMP[2].xyz, TEMP[2], CONST[5] 21: MOV TEMP[3].x, CONST[6].xxxx 22: MOV TEMP[3].y, CONST[7].xxxx 23: MOV TEMP[3].z, CONST[8].xxxx 24: MOV TEMP[4].x, CONST[6].yyyy 25: MOV TEMP[4].y, CONST[7].yyyy 26: MOV TEMP[4].z, CONST[8].yyyy 27: MOV TEMP[5].x, CONST[6].zzzz 28: MOV TEMP[5].y, CONST[7].zzzz 29: MOV TEMP[5].z, CONST[8].zzzz 30: MUL TEMP[3].xyz, TEMP[3].xyzz, IN[1].xxxx 31: MAD TEMP[3].xyz, TEMP[4].xyzz, IN[1].yyyy, TEMP[3].xyzz 32: MAD TEMP[3].xyz, TEMP[5].xyzz, IN[1].zzzz, TEMP[3].xyzz 33: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz 34: RSQ TEMP[4].x, TEMP[4].xxxx 35: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx 36: MUL TEMP[4].xyz, TEMP[2].xyzz, CONST[0].wwww 37: ADD TEMP[4].xyz, CONST[0].xyzz, -TEMP[4].xyzz 38: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz 39: RSQ TEMP[5].x, TEMP[5].xxxx 40: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx 41: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[4].xyzz 42: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[4].xxxx 43: ADD TEMP[4].x, IMM[0].yyyy, -TEMP[4].xxxx 44: SQRT TEMP[4].x, TEMP[4].xxxx 45: MUL TEMP[4].x, CONST[1].zzzz, TEMP[4].xxxx 46: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx 47: ADD TEMP[2].xyz, TEMP[2].xyzz, -TEMP[3].xyzz 48: MUL TEMP[3], CONST[10], TEMP[2].xxxx 49: MAD TEMP[3], CONST[11], TEMP[2].yyyy, TEMP[3] 50: MAD TEMP[2], CONST[12], TEMP[2].zzzz, TEMP[3] 51: ADD TEMP[2], TEMP[2], CONST[13] 52: ELSE :0 53: MUL TEMP[3], CONST[22], TEMP[0].xxxx 54: MAD TEMP[3], CONST[23], TEMP[0].yyyy, TEMP[3] 55: MAD TEMP[0], CONST[24], TEMP[0].zzzz, TEMP[3] 56: ADD TEMP[2], TEMP[0], CONST[25] 57: ENDIF 58: MOV TEMP[0].xyw, TEMP[2].xyxw 59: RCP TEMP[3].x, TEMP[2].wwww 60: MUL TEMP[3].x, CONST[1].xxxx, TEMP[3].xxxx 61: MOV_SAT TEMP[3].x, TEMP[3].xxxx 62: ADD TEMP[3].x, TEMP[2].zzzz, TEMP[3].xxxx 63: MAX TEMP[2].x, TEMP[3].xxxx, -TEMP[2].wwww 64: LRP TEMP[2].x, CONST[1].yyyy, TEMP[2].xxxx, TEMP[3].xxxx 65: MOV TEMP[0].z, TEMP[2].xxxx 66: MOV TEMP[1].xy, TEMP[1].xyxx 67: MOV OUT[1], TEMP[1] 68: MOV OUT[0], TEMP[0] 69: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 252) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 268) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 336) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 340) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 344) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 348) %40 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %41 = load <16 x i8>, <16 x i8> addrspace(2)* %40, align 16, !tbaa !0 %42 = add i32 %5, %7 %43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %41, i32 0, i32 %42) %44 = extractelement <4 x float> %43, i32 0 %45 = extractelement <4 x float> %43, i32 1 %46 = extractelement <4 x float> %43, i32 2 %47 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %48 = load <16 x i8>, <16 x i8> addrspace(2)* %47, align 16, !tbaa !0 %49 = add i32 %5, %7 %50 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %48, i32 0, i32 %49) %51 = extractelement <4 x float> %50, i32 0 %52 = extractelement <4 x float> %50, i32 1 %53 = extractelement <4 x float> %50, i32 2 %54 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %55 = load <16 x i8>, <16 x i8> addrspace(2)* %54, align 16, !tbaa !0 %56 = add i32 %5, %7 %57 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %55, i32 0, i32 %56) %58 = extractelement <4 x float> %57, i32 3 %59 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %60 = load <16 x i8>, <16 x i8> addrspace(2)* %59, align 16, !tbaa !0 %61 = add i32 %5, %7 %62 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %60, i32 0, i32 %61) %63 = extractelement <4 x float> %62, i32 0 %64 = extractelement <4 x float> %62, i32 1 %65 = fmul float %44, %17 %66 = fmul float %45, %18 %67 = fmul float %46, %19 %68 = fmul float %58, %58 %69 = fmul float %20, %65 %70 = fmul float %21, %65 %71 = fmul float %22, %65 %72 = fmul float %23, %65 %73 = fmul float %24, %66 %74 = fadd float %73, %69 %75 = fmul float %25, %66 %76 = fadd float %75, %70 %77 = fmul float %26, %66 %78 = fadd float %77, %71 %79 = fmul float %27, %66 %80 = fadd float %79, %72 %81 = fmul float %28, %67 %82 = fadd float %81, %74 %83 = fmul float %29, %67 %84 = fadd float %83, %76 %85 = fmul float %30, %67 %86 = fadd float %85, %78 %87 = call float @llvm.AMDGPU.lrp(float %68, float %82, float %65) %88 = call float @llvm.AMDGPU.lrp(float 0.000000e+00, float %84, float %66) %89 = call float @llvm.AMDGPU.lrp(float %68, float %86, float %67) %90 = fmul float %31, %87 %91 = fmul float %32, %88 %92 = fadd float %91, %90 %93 = fmul float %33, %89 %94 = fadd float %92, %93 %95 = fadd float %94, %34 %96 = fmul float %95, %31 %97 = fmul float %95, %32 %98 = fmul float %95, %33 %99 = fsub float %87, %96 %100 = fsub float %88, %97 %101 = fsub float %89, %98 %102 = call float @llvm.AMDGPU.lrp(float %35, float %87, float %99) %103 = call float @llvm.AMDGPU.lrp(float %35, float %88, float %100) %104 = call float @llvm.AMDGPU.lrp(float %35, float %89, float %101) %105 = fmul float %63, %36 %106 = fadd float %105, %38 %107 = fmul float %64, %37 %108 = fadd float %107, %39 %109 = fcmp une float %16, 0.000000e+00 br i1 %109, label %IF, label %ELSE IF: ; preds = %main_body %110 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220) %111 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %112 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %113 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %114 = call float @llvm.SI.load.const(<16 x i8> %12, i32 204) %115 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %116 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196) %117 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %118 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188) %119 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %120 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %121 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %122 = call float @llvm.SI.load.const(<16 x i8> %12, i32 172) %123 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %124 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %125 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %126 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %127 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %128 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %129 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %130 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %131 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %132 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %133 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %134 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %135 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %136 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %137 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %138 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %139 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %140 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %141 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %142 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %143 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %144 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %145 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %146 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %147 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %148 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %149 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %150 = fmul float %146, %102 %151 = fmul float %145, %102 %152 = fmul float %144, %102 %153 = fmul float %143, %103 %154 = fadd float %153, %150 %155 = fmul float %142, %103 %156 = fadd float %155, %151 %157 = fmul float %141, %103 %158 = fadd float %157, %152 %159 = fmul float %140, %104 %160 = fadd float %159, %154 %161 = fmul float %139, %104 %162 = fadd float %161, %156 %163 = fmul float %138, %104 %164 = fadd float %163, %158 %165 = fadd float %160, %137 %166 = fadd float %162, %136 %167 = fadd float %164, %135 %168 = fmul float %134, %51 %169 = fmul float %131, %51 %170 = fmul float %128, %51 %171 = fmul float %133, %52 %172 = fadd float %171, %168 %173 = fmul float %130, %52 %174 = fadd float %173, %169 %175 = fmul float %127, %52 %176 = fadd float %175, %170 %177 = fmul float %132, %53 %178 = fadd float %177, %172 %179 = fmul float %129, %53 %180 = fadd float %179, %174 %181 = fmul float %126, %53 %182 = fadd float %181, %176 %183 = fmul float %178, %178 %184 = fmul float %180, %180 %185 = fadd float %184, %183 %186 = fmul float %182, %182 %187 = fadd float %185, %186 %188 = call float @llvm.AMDGPU.rsq.clamped.f32(float %187) %189 = fmul float %178, %188 %190 = fmul float %180, %188 %191 = fmul float %182, %188 %192 = fmul float %165, %13 %193 = fmul float %166, %13 %194 = fmul float %167, %13 %195 = fsub float %149, %192 %196 = fsub float %148, %193 %197 = fsub float %147, %194 %198 = fmul float %195, %195 %199 = fmul float %196, %196 %200 = fadd float %199, %198 %201 = fmul float %197, %197 %202 = fadd float %200, %201 %203 = call float @llvm.AMDGPU.rsq.clamped.f32(float %202) %204 = fmul float %195, %203 %205 = fmul float %196, %203 %206 = fmul float %197, %203 %207 = fmul float %189, %204 %208 = fmul float %190, %205 %209 = fadd float %208, %207 %210 = fmul float %191, %206 %211 = fadd float %209, %210 %212 = fmul float %211, %211 %213 = fsub float 1.000000e+00, %212 %214 = call float @llvm.sqrt.f32(float %213) %215 = fmul float %16, %214 %216 = fmul float %189, %215 %217 = fmul float %190, %215 %218 = fmul float %191, %215 %219 = fsub float %165, %216 %220 = fsub float %166, %217 %221 = fsub float %167, %218 %222 = fmul float %125, %219 %223 = fmul float %124, %219 %224 = fmul float %123, %219 %225 = fmul float %122, %219 %226 = fmul float %121, %220 %227 = fadd float %226, %222 %228 = fmul float %120, %220 %229 = fadd float %228, %223 %230 = fmul float %119, %220 %231 = fadd float %230, %224 %232 = fmul float %118, %220 %233 = fadd float %232, %225 %234 = fmul float %117, %221 %235 = fadd float %234, %227 %236 = fmul float %116, %221 %237 = fadd float %236, %229 %238 = fmul float %115, %221 %239 = fadd float %238, %231 %240 = fmul float %114, %221 %241 = fadd float %240, %233 %242 = fadd float %235, %113 %243 = fadd float %237, %112 %244 = fadd float %239, %111 %245 = fadd float %241, %110 br label %ENDIF ELSE: ; preds = %main_body %246 = call float @llvm.SI.load.const(<16 x i8> %12, i32 412) %247 = call float @llvm.SI.load.const(<16 x i8> %12, i32 408) %248 = call float @llvm.SI.load.const(<16 x i8> %12, i32 404) %249 = call float @llvm.SI.load.const(<16 x i8> %12, i32 400) %250 = call float @llvm.SI.load.const(<16 x i8> %12, i32 396) %251 = call float @llvm.SI.load.const(<16 x i8> %12, i32 392) %252 = call float @llvm.SI.load.const(<16 x i8> %12, i32 388) %253 = call float @llvm.SI.load.const(<16 x i8> %12, i32 384) %254 = call float @llvm.SI.load.const(<16 x i8> %12, i32 380) %255 = call float @llvm.SI.load.const(<16 x i8> %12, i32 376) %256 = call float @llvm.SI.load.const(<16 x i8> %12, i32 372) %257 = call float @llvm.SI.load.const(<16 x i8> %12, i32 368) %258 = call float @llvm.SI.load.const(<16 x i8> %12, i32 364) %259 = call float @llvm.SI.load.const(<16 x i8> %12, i32 360) %260 = call float @llvm.SI.load.const(<16 x i8> %12, i32 356) %261 = call float @llvm.SI.load.const(<16 x i8> %12, i32 352) %262 = fmul float %261, %102 %263 = fmul float %260, %102 %264 = fmul float %259, %102 %265 = fmul float %258, %102 %266 = fmul float %257, %103 %267 = fadd float %266, %262 %268 = fmul float %256, %103 %269 = fadd float %268, %263 %270 = fmul float %255, %103 %271 = fadd float %270, %264 %272 = fmul float %254, %103 %273 = fadd float %272, %265 %274 = fmul float %253, %104 %275 = fadd float %274, %267 %276 = fmul float %252, %104 %277 = fadd float %276, %269 %278 = fmul float %251, %104 %279 = fadd float %278, %271 %280 = fmul float %250, %104 %281 = fadd float %280, %273 %282 = fadd float %275, %249 %283 = fadd float %277, %248 %284 = fadd float %279, %247 %285 = fadd float %281, %246 br label %ENDIF ENDIF: ; preds = %ELSE, %IF %temp8.0 = phi float [ %242, %IF ], [ %282, %ELSE ] %temp9.0 = phi float [ %243, %IF ], [ %283, %ELSE ] %temp10.0 = phi float [ %244, %IF ], [ %284, %ELSE ] %temp11.0 = phi float [ %245, %IF ], [ %285, %ELSE ] %286 = fdiv float 1.000000e+00, %temp11.0 %287 = fmul float %14, %286 %288 = call float @llvm.AMDIL.clamp.(float %287, float 0.000000e+00, float 1.000000e+00) %289 = fadd float %temp10.0, %288 %290 = fsub float -0.000000e+00, %temp11.0 %291 = call float @llvm.maxnum.f32(float %289, float %290) %292 = call float @llvm.AMDGPU.lrp(float %15, float %291, float %289) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %106, float %108, float %101, float %80) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %temp8.0, float %temp9.0, float %292, float %temp11.0) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[12:15], s[8:9], 0x0 ; C0860900 s_load_dwordx4 s[16:19], s[8:9], 0x4 ; C0880904 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[20:23], s[8:9], 0x8 ; C08A0908 s_load_dwordx4 s[8:11], s[8:9], 0xc ; C084090C s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 buffer_load_format_xyzw v[11:14], v0, s[12:15], 0 idxen ; E00C2000 80030B00 buffer_load_format_xyzw v[7:10], v0, s[16:19], 0 idxen ; E00C2000 80040700 s_buffer_load_dword s5, s[0:3], 0x38 ; C2028138 s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 buffer_load_format_xyzw v[14:17], v0, s[20:23], 0 idxen ; E00C2000 80050E00 buffer_load_format_xyzw v[0:3], v0, s[8:11], 0 idxen ; E00C2000 80020000 s_buffer_load_dword s6, s[0:3], 0x39 ; C2030139 s_buffer_load_dword s7, s[0:3], 0x3a ; C203813A s_buffer_load_dword s8, s[0:3], 0x3c ; C204013C s_buffer_load_dword s9, s[0:3], 0x3d ; C204813D s_buffer_load_dword s10, s[0:3], 0x3e ; C205013E s_buffer_load_dword s11, s[0:3], 0x40 ; C2058140 s_buffer_load_dword s12, s[0:3], 0x41 ; C2060141 s_buffer_load_dword s15, s[0:3], 0x42 ; C2078142 s_buffer_load_dword s16, s[0:3], 0x44 ; C2080144 s_buffer_load_dword s17, s[0:3], 0x45 ; C2088145 s_buffer_load_dword s18, s[0:3], 0x46 ; C2090146 s_buffer_load_dword s19, s[0:3], 0x4c ; C209814C s_buffer_load_dword s20, s[0:3], 0x4d ; C20A014D s_buffer_load_dword s21, s[0:3], 0x4e ; C20A814E s_buffer_load_dword s22, s[0:3], 0x4f ; C20B014F s_buffer_load_dword s23, s[0:3], 0x50 ; C20B8150 s_buffer_load_dword s13, s[0:3], 0x56 ; C2068156 s_buffer_load_dword s14, s[0:3], 0x57 ; C2070157 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_sub_f32_e64 v5, 1.0, s23 ; D2080005 00002EF2 v_mul_f32_e32 v3, s5, v11 ; 10061605 v_mul_f32_e32 v2, s6, v12 ; 10041806 v_mul_f32_e32 v4, s7, v13 ; 10081A07 v_mul_f32_e32 v6, v17, v17 ; 100C2311 v_mad_f32 v10, -v17, v17, 1.0 ; D282000A 23CA2311 v_mul_f32_e32 v11, s8, v3 ; 10160608 v_mac_f32_e32 v11, s11, v2 ; 3E16040B v_mul_f32_e32 v12, s9, v3 ; 10180609 v_mac_f32_e32 v12, s12, v2 ; 3E18040C v_mul_f32_e32 v13, s10, v3 ; 101A060A v_mac_f32_e32 v13, s15, v2 ; 3E1A040F v_mac_f32_e32 v11, s16, v4 ; 3E160810 v_mac_f32_e32 v12, s17, v4 ; 3E180811 v_mac_f32_e32 v13, s18, v4 ; 3E1A0812 v_mul_f32_e32 v14, v4, v10 ; 101C1504 v_mul_f32_e32 v10, v3, v10 ; 10141503 v_mac_f32_e32 v10, v11, v6 ; 3E140D0B v_mac_f32_e32 v14, v13, v6 ; 3E1C0D0D v_mad_f32 v6, 0, v12, v2 ; D2820006 040A1880 v_mul_f32_e32 v4, s19, v10 ; 10081413 v_mac_f32_e32 v4, s20, v6 ; 3E080C14 v_mac_f32_e32 v4, s21, v14 ; 3E081C15 v_add_f32_e32 v4, s22, v4 ; 06080816 v_mad_f32 v11, -v4, s19, v10 ; D282000B 24282704 v_mad_f32 v13, -v4, s20, v6 ; D282000D 24182904 v_mad_f32 v4, -v4, s21, v14 ; D2820004 24382B04 v_mul_f32_e32 v12, v11, v5 ; 10180B0B v_mac_f32_e32 v12, s23, v10 ; 3E181417 v_mul_f32_e32 v11, v13, v5 ; 10160B0D v_mac_f32_e32 v11, s23, v6 ; 3E160C17 v_mul_f32_e32 v10, v4, v5 ; 10140B04 v_mac_f32_e32 v10, s23, v14 ; 3E141C17 v_cmp_eq_f32_e64 s[6:7], 0, s4 ; D0040006 00000880 s_and_saveexec_b64 s[6:7], s[6:7] ; BE862406 s_xor_b64 s[6:7], exec, s[6:7] ; 8986067E s_cbranch_execz BB0_1 ; BF880000 s_buffer_load_dword s5, s[0:3], 0x67 ; C2028167 s_buffer_load_dword s8, s[0:3], 0x62 ; C2040162 s_buffer_load_dword s9, s[0:3], 0x63 ; C2048163 s_buffer_load_dword s10, s[0:3], 0x64 ; C2050164 s_buffer_load_dword s11, s[0:3], 0x65 ; C2058165 s_buffer_load_dword s12, s[0:3], 0x66 ; C2060166 s_buffer_load_dword s15, s[0:3], 0x5d ; C207815D s_buffer_load_dword s16, s[0:3], 0x5e ; C208015E s_buffer_load_dword s17, s[0:3], 0x5f ; C208815F s_buffer_load_dword s18, s[0:3], 0x60 ; C2090160 s_buffer_load_dword s19, s[0:3], 0x61 ; C2098161 s_buffer_load_dword s20, s[0:3], 0x58 ; C20A0158 s_buffer_load_dword s21, s[0:3], 0x59 ; C20A8159 s_buffer_load_dword s22, s[0:3], 0x5a ; C20B015A s_buffer_load_dword s23, s[0:3], 0x5b ; C20B815B s_buffer_load_dword s24, s[0:3], 0x5c ; C20C015C s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s20, v12 ; 100A1814 v_mul_f32_e32 v6, s21, v12 ; 100C1815 v_mul_f32_e32 v15, s22, v12 ; 101E1816 v_mul_f32_e32 v17, s23, v12 ; 10221817 v_mac_f32_e32 v5, s24, v11 ; 3E0A1618 v_mac_f32_e32 v6, s15, v11 ; 3E0C160F v_mac_f32_e32 v15, s16, v11 ; 3E1E1610 v_mac_f32_e32 v17, s17, v11 ; 3E221611 v_mac_f32_e32 v5, s18, v10 ; 3E0A1412 v_mac_f32_e32 v6, s19, v10 ; 3E0C1413 v_mac_f32_e32 v15, s8, v10 ; 3E1E1408 v_mac_f32_e32 v17, s9, v10 ; 3E221409 v_add_f32_e32 v13, s10, v5 ; 061A0A0A v_add_f32_e32 v14, s11, v6 ; 061C0C0B v_add_f32_e32 v16, s12, v15 ; 06201E0C v_add_f32_e32 v15, s5, v17 ; 061E2205 s_or_saveexec_b64 s[6:7], s[6:7] ; BE862506 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_buffer_load_dword s12, s[0:3], 0x3f ; C206013F s_buffer_load_dword s9, s[0:3], 0x43 ; C2048143 s_buffer_load_dword s10, s[0:3], 0x54 ; C2050154 s_buffer_load_dword s11, s[0:3], 0x55 ; C2058155 v_mov_b32_e32 v5, s13 ; 7E0A020D v_mov_b32_e32 v6, s14 ; 7E0C020E s_waitcnt lgkmcnt(0) ; BF8C007F s_xor_b64 exec, exec, s[6:7] ; 89FE067E s_cbranch_execz BB0_4 ; BF880000 s_buffer_load_dword s13, s[0:3], 0x37 ; C2068137 s_buffer_load_dword s14, s[0:3], 0x32 ; C2070132 s_buffer_load_dword s15, s[0:3], 0x33 ; C2078133 s_buffer_load_dword s16, s[0:3], 0x34 ; C2080134 s_buffer_load_dword s17, s[0:3], 0x35 ; C2088135 s_buffer_load_dword s18, s[0:3], 0x36 ; C2090136 s_buffer_load_dword s19, s[0:3], 0x2d ; C209812D s_buffer_load_dword s20, s[0:3], 0x2e ; C20A012E s_buffer_load_dword s21, s[0:3], 0x2f ; C20A812F s_buffer_load_dword s22, s[0:3], 0x30 ; C20B0130 s_buffer_load_dword s23, s[0:3], 0x31 ; C20B8131 s_buffer_load_dword s24, s[0:3], 0x28 ; C20C0128 s_buffer_load_dword s25, s[0:3], 0x29 ; C20C8129 s_buffer_load_dword s26, s[0:3], 0x2a ; C20D012A s_buffer_load_dword s27, s[0:3], 0x2b ; C20D812B s_buffer_load_dword s28, s[0:3], 0x2c ; C20E012C s_buffer_load_dword s29, s[0:3], 0x1d ; C20E811D s_buffer_load_dword s30, s[0:3], 0x1e ; C20F011E s_buffer_load_dword s31, s[0:3], 0x20 ; C20F8120 s_buffer_load_dword s32, s[0:3], 0x21 ; C2100121 s_buffer_load_dword s33, s[0:3], 0x22 ; C2108122 s_buffer_load_dword s34, s[0:3], 0x16 ; C2110116 s_buffer_load_dword s35, s[0:3], 0x18 ; C2118118 s_buffer_load_dword s36, s[0:3], 0x19 ; C2120119 s_buffer_load_dword s37, s[0:3], 0x1a ; C212811A s_buffer_load_dword s38, s[0:3], 0x1c ; C213011C s_buffer_load_dword s39, s[0:3], 0x10 ; C2138110 s_buffer_load_dword s40, s[0:3], 0x11 ; C2140111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v13, s31, v7 ; 101A0E1F v_mac_f32_e32 v13, s32, v8 ; 3E1A1020 v_mac_f32_e32 v13, s33, v9 ; 3E1A1221 s_buffer_load_dword s31, s[0:3], 0x12 ; C20F8112 v_mul_f32_e32 v14, s35, v7 ; 101C0E23 v_mac_f32_e32 v14, s36, v8 ; 3E1C1024 v_mac_f32_e32 v14, s37, v9 ; 3E1C1225 v_mul_f32_e32 v7, s38, v7 ; 100E0E26 v_mac_f32_e32 v7, s29, v8 ; 3E0E101D v_mac_f32_e32 v7, s30, v9 ; 3E0E121E s_buffer_load_dword s29, s[0:3], 0x14 ; C20E8114 s_buffer_load_dword s30, s[0:3], 0x15 ; C20F0115 s_buffer_load_dword s32, s[0:3], 0x9 ; C2100109 s_buffer_load_dword s33, s[0:3], 0xa ; C210810A s_buffer_load_dword s35, s[0:3], 0xc ; C211810C s_buffer_load_dword s36, s[0:3], 0xd ; C212010D s_buffer_load_dword s37, s[0:3], 0xe ; C212810E s_buffer_load_dword s38, s[0:3], 0x0 ; C2130100 v_mul_f32_e32 v8, v14, v14 ; 10101D0E v_mac_f32_e32 v8, v7, v7 ; 3E100F07 v_mac_f32_e32 v8, v13, v13 ; 3E101B0D v_rsq_clamp_f32_e32 v8, v8 ; 7E105908 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v9, s32, v12 ; 10121820 v_mul_f32_e32 v15, s33, v12 ; 101E1821 s_buffer_load_dword s32, s[0:3], 0x1 ; C2100101 v_mac_f32_e32 v9, s36, v11 ; 3E121624 v_mac_f32_e32 v15, s37, v11 ; 3E1E1625 v_mac_f32_e32 v9, s40, v10 ; 3E121428 v_mac_f32_e32 v15, s31, v10 ; 3E1E141F v_add_f32_e32 v9, s30, v9 ; 0612121E v_add_f32_e32 v15, s34, v15 ; 061E1E22 s_buffer_load_dword s30, s[0:3], 0x2 ; C20F0102 s_buffer_load_dword s31, s[0:3], 0x3 ; C20F8103 s_buffer_load_dword s33, s[0:3], 0x8 ; C2108108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v16, s31 ; 7E20021F v_mad_f32 v17, -v9, v16, s32 ; D2820011 20822109 v_mul_f32_e32 v12, s33, v12 ; 10181821 v_mac_f32_e32 v12, s35, v11 ; 3E181623 v_mac_f32_e32 v12, s39, v10 ; 3E181427 v_add_f32_e32 v10, s29, v12 ; 0614181D v_mad_f32 v11, -v10, v16, s38 ; D282000B 209A210A v_mad_f32 v12, -v15, v16, s30 ; D282000C 207A210F v_mul_f32_e32 v16, v11, v11 ; 1020170B v_mac_f32_e32 v16, v17, v17 ; 3E202311 v_mac_f32_e32 v16, v12, v12 ; 3E20190C v_rsq_clamp_f32_e32 v16, v16 ; 7E205910 v_mul_f32_e32 v14, v8, v14 ; 101C1D08 v_mul_f32_e32 v7, v8, v7 ; 100E0F08 v_mul_f32_e32 v8, v8, v13 ; 10101B08 v_mul_f32_e32 v11, v16, v11 ; 10161710 v_mul_f32_e32 v13, v16, v17 ; 101A2310 v_mul_f32_e32 v12, v16, v12 ; 10181910 v_mul_f32_e32 v11, v11, v14 ; 10161D0B v_mac_f32_e32 v11, v13, v7 ; 3E160F0D v_mac_f32_e32 v11, v12, v8 ; 3E16110C v_mad_f32 v11, -v11, v11, 1.0 ; D282000B 23CA170B v_sqrt_f32_e32 v11, v11 ; 7E16670B v_mul_f32_e32 v11, s4, v11 ; 10161604 v_mad_f32 v10, -v14, v11, v10 ; D282000A 242A170E v_mad_f32 v7, -v7, v11, v9 ; D2820007 24261707 v_mad_f32 v8, -v8, v11, v15 ; D2820008 243E1708 v_mul_f32_e32 v9, s24, v10 ; 10121418 v_mul_f32_e32 v11, s25, v10 ; 10161419 v_mul_f32_e32 v12, s26, v10 ; 1018141A v_mul_f32_e32 v10, s27, v10 ; 1014141B v_mac_f32_e32 v9, s28, v7 ; 3E120E1C v_mac_f32_e32 v11, s19, v7 ; 3E160E13 v_mac_f32_e32 v12, s20, v7 ; 3E180E14 v_mac_f32_e32 v10, s21, v7 ; 3E140E15 v_mac_f32_e32 v9, s22, v8 ; 3E121016 v_mac_f32_e32 v11, s23, v8 ; 3E161017 v_mac_f32_e32 v12, s14, v8 ; 3E18100E v_mac_f32_e32 v10, s15, v8 ; 3E14100F v_add_f32_e32 v13, s16, v9 ; 061A1210 v_add_f32_e32 v14, s17, v11 ; 061C1611 v_add_f32_e32 v16, s18, v12 ; 06201812 v_add_f32_e32 v15, s13, v10 ; 061E140D s_or_b64 exec, exec, s[6:7] ; 88FE067E v_mul_f32_e32 v3, s12, v3 ; 1006060C v_rcp_f32_e32 v7, v15 ; 7E0E550F v_mac_f32_e32 v3, s9, v2 ; 3E060409 v_mac_f32_e32 v5, s10, v0 ; 3E0A000A v_mac_f32_e32 v6, s11, v1 ; 3E0C020B v_mul_f32_e32 v0, s8, v7 ; 10000E08 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_add_f32_e32 v0, v0, v16 ; 06002100 v_max_f32_e64 v1, v0, -v15 ; D2200001 40021F00 v_sub_f32_e64 v2, 1.0, s5 ; D2080002 00000AF2 v_mul_f32_e32 v0, v0, v2 ; 10000500 v_mac_f32_e32 v0, s5, v1 ; 3E000205 exp 15, 32, 0, 0, 0, v5, v6, v4, v3 ; F800020F 03040605 exp 15, 12, 0, 1, 0, v13, v14, v0, v15 ; F80008CF 0F000E0D s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 48 VGPRS: 20 Code Size: 1048 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1] DCL TEMP[0], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0].w, TEMP[0], SAMP[0], 2D 2: FSLT TEMP[0].x, TEMP[0].wwww, CONST[1].xxxx 3: AND TEMP[0].x, TEMP[0].xxxx, IMM[0].xxxx 4: KILL_IF -TEMP[0].xxxx 5: MOV OUT[0], IMM[0].yyyy 6: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %25 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %26 = load <32 x i8>, <32 x i8> addrspace(2)* %25, align 32, !tbaa !0 %27 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %28 = load <16 x i8>, <16 x i8> addrspace(2)* %27, align 16, !tbaa !0 %29 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %31 = bitcast float %29 to i32 %32 = bitcast float %30 to i32 %33 = insertelement <2 x i32> undef, i32 %31, i32 0 %34 = insertelement <2 x i32> %33, i32 %32, i32 1 %35 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %34, <32 x i8> %26, <16 x i8> %28, i32 2) %36 = extractelement <4 x float> %35, i32 3 %37 = fcmp olt float %36, %24 %38 = select i1 %37, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %38) %39 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00) %40 = bitcast i32 %39 to float %41 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00) %42 = bitcast i32 %41 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %40, float %42, float %40, float %42) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[4:7], s[4:5], 0x0 ; C0820500 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s0, s[0:3], 0x4 ; C2000104 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 image_sample v0, 8, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[4:7] ; F0800800 00230002 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_cmp_gt_f32_e32 vcc, s0, v0 ; 7C080000 v_cndmask_b32_e64 v0, 0, -1.0, vcc ; D2000000 01A9E680 v_cmpx_le_f32_e32 vcc, 0, v0 ; 7C260080 v_cvt_pkrtz_f16_f32_e64 v0, 0, 0 ; D25E0000 00010080 exp 15, 0, 1, 1, 1, v0, v0, v0, v0 ; F8001C0F 00000000 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 4 Code Size: 92 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL CONST[0..24] DCL TEMP[0..6], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.5000, 0.0000} 0: MUL TEMP[0].xyz, IN[0].xyzz, CONST[13].xyzz 1: MUL TEMP[1].x, IN[2].wwww, IN[2].wwww 2: MOV TEMP[2].y, IMM[0].xxxx 3: MOV TEMP[2].x, TEMP[1].xxxx 4: MOV TEMP[2].z, TEMP[1].xxxx 5: MUL TEMP[1], CONST[14], TEMP[0].xxxx 6: MAD TEMP[1], CONST[15], TEMP[0].yyyy, TEMP[1] 7: MAD TEMP[1].xyz, CONST[16], TEMP[0].zzzz, TEMP[1] 8: LRP TEMP[0].xyz, TEMP[2].xyzz, TEMP[1].xyzz, TEMP[0].xyzz 9: DP3 TEMP[1].x, CONST[18].xyzz, TEMP[0].xyzz 10: ADD TEMP[1].x, TEMP[1].xxxx, CONST[18].wwww 11: MUL TEMP[1].xyz, TEMP[1].xxxx, CONST[18].xyzz 12: ADD TEMP[1].xyz, TEMP[0].xyzz, -TEMP[1].xyzz 13: LRP TEMP[0].xyz, CONST[19].xxxx, TEMP[0].xyzz, TEMP[1].xyzz 14: MUL TEMP[1], CONST[21], TEMP[0].xxxx 15: MAD TEMP[1], CONST[22], TEMP[0].yyyy, TEMP[1] 16: MAD TEMP[0], CONST[23], TEMP[0].zzzz, TEMP[1] 17: ADD TEMP[0], TEMP[0], CONST[24] 18: MUL TEMP[1].xyw, TEMP[0], IMM[0].zzzz 19: MOV TEMP[2].x, TEMP[1].xxxx 20: MUL TEMP[3].x, TEMP[1].yyyy, CONST[0].xxxx 21: MOV TEMP[2].y, TEMP[3].xxxx 22: ADD TEMP[1].xy, TEMP[2].xyyy, TEMP[1].wwww 23: MOV TEMP[1].zw, TEMP[0].wwzw 24: MOV TEMP[2].x, CONST[8].xxxx 25: MOV TEMP[2].y, CONST[9].xxxx 26: MOV TEMP[2].z, CONST[10].xxxx 27: MOV TEMP[3].x, CONST[8].yyyy 28: MOV TEMP[3].y, CONST[9].yyyy 29: MOV TEMP[3].z, CONST[10].yyyy 30: MOV TEMP[4].x, CONST[8].zzzz 31: MOV TEMP[4].y, CONST[9].zzzz 32: MOV TEMP[4].z, CONST[10].zzzz 33: MUL TEMP[2].xyz, TEMP[2].xyzz, IN[1].xxxx 34: MAD TEMP[2].xyz, TEMP[3].xyzz, IN[1].yyyy, TEMP[2].xyzz 35: MAD TEMP[2].xyz, TEMP[4].xyzz, IN[1].zzzz, TEMP[2].xyzz 36: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz 37: RSQ TEMP[3].x, TEMP[3].xxxx 38: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx 39: MOV TEMP[3].w, IMM[0].yyyy 40: MOV TEMP[3].xyz, TEMP[2].xyzx 41: DP4 TEMP[4].x, CONST[1], TEMP[3] 42: DP4 TEMP[5].x, CONST[2], TEMP[3] 43: MOV TEMP[4].y, TEMP[5].xxxx 44: DP4 TEMP[3].x, CONST[3], TEMP[3] 45: MOV TEMP[4].z, TEMP[3].xxxx 46: MUL TEMP[3], TEMP[2].xyzz, TEMP[2].yzzx 47: DP4 TEMP[5].x, CONST[4], TEMP[3] 48: DP4 TEMP[6].x, CONST[5], TEMP[3] 49: MOV TEMP[5].y, TEMP[6].xxxx 50: DP4 TEMP[3].x, CONST[6], TEMP[3] 51: MOV TEMP[5].z, TEMP[3].xxxx 52: MUL TEMP[3].x, TEMP[2].yyyy, TEMP[2].yyyy 53: MAD TEMP[2].x, TEMP[2].xxxx, TEMP[2].xxxx, -TEMP[3].xxxx 54: MAD TEMP[2].xyz, CONST[7].xyzz, TEMP[2].xxxx, TEMP[5].xyzz 55: ADD TEMP[2].xyz, TEMP[2].xyzz, TEMP[4].xyzz 56: MAD TEMP[3].xy, IN[3].xyyy, CONST[20].xyyy, CONST[20].zwww 57: MOV TEMP[3].w, TEMP[2].xxxx 58: MOV TEMP[2].xy, TEMP[2].yzyy 59: MAD TEMP[4].x, TEMP[0].zzzz, CONST[12].zzzz, CONST[12].wwww 60: MOV TEMP[3].z, TEMP[4].xxxx 61: MOV OUT[1], TEMP[1] 62: MOV OUT[3], TEMP[2] 63: MOV OUT[0], TEMP[0] 64: MOV OUT[2], TEMP[3] 65: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 204) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260) %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264) %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288) %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292) %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296) %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300) %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304) %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320) %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 324) %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 328) %72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 332) %73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 336) %74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 340) %75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 344) %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 348) %77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 352) %78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 356) %79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 360) %80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 364) %81 = call float @llvm.SI.load.const(<16 x i8> %12, i32 368) %82 = call float @llvm.SI.load.const(<16 x i8> %12, i32 372) %83 = call float @llvm.SI.load.const(<16 x i8> %12, i32 376) %84 = call float @llvm.SI.load.const(<16 x i8> %12, i32 380) %85 = call float @llvm.SI.load.const(<16 x i8> %12, i32 384) %86 = call float @llvm.SI.load.const(<16 x i8> %12, i32 388) %87 = call float @llvm.SI.load.const(<16 x i8> %12, i32 392) %88 = call float @llvm.SI.load.const(<16 x i8> %12, i32 396) %89 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %90 = load <16 x i8>, <16 x i8> addrspace(2)* %89, align 16, !tbaa !0 %91 = add i32 %5, %7 %92 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %90, i32 0, i32 %91) %93 = extractelement <4 x float> %92, i32 0 %94 = extractelement <4 x float> %92, i32 1 %95 = extractelement <4 x float> %92, i32 2 %96 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %97 = load <16 x i8>, <16 x i8> addrspace(2)* %96, align 16, !tbaa !0 %98 = add i32 %5, %7 %99 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %97, i32 0, i32 %98) %100 = extractelement <4 x float> %99, i32 0 %101 = extractelement <4 x float> %99, i32 1 %102 = extractelement <4 x float> %99, i32 2 %103 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %104 = load <16 x i8>, <16 x i8> addrspace(2)* %103, align 16, !tbaa !0 %105 = add i32 %5, %7 %106 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %104, i32 0, i32 %105) %107 = extractelement <4 x float> %106, i32 3 %108 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %109 = load <16 x i8>, <16 x i8> addrspace(2)* %108, align 16, !tbaa !0 %110 = add i32 %5, %7 %111 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %109, i32 0, i32 %110) %112 = extractelement <4 x float> %111, i32 0 %113 = extractelement <4 x float> %111, i32 1 %114 = fmul float %93, %52 %115 = fmul float %94, %53 %116 = fmul float %95, %54 %117 = fmul float %107, %107 %118 = fmul float %55, %114 %119 = fmul float %56, %114 %120 = fmul float %57, %114 %121 = fmul float %58, %115 %122 = fadd float %121, %118 %123 = fmul float %59, %115 %124 = fadd float %123, %119 %125 = fmul float %60, %115 %126 = fadd float %125, %120 %127 = fmul float %61, %116 %128 = fadd float %127, %122 %129 = fmul float %62, %116 %130 = fadd float %129, %124 %131 = fmul float %63, %116 %132 = fadd float %131, %126 %133 = call float @llvm.AMDGPU.lrp(float %117, float %128, float %114) %134 = call float @llvm.AMDGPU.lrp(float 0.000000e+00, float %130, float %115) %135 = call float @llvm.AMDGPU.lrp(float %117, float %132, float %116) %136 = fmul float %64, %133 %137 = fmul float %65, %134 %138 = fadd float %137, %136 %139 = fmul float %66, %135 %140 = fadd float %138, %139 %141 = fadd float %140, %67 %142 = fmul float %141, %64 %143 = fmul float %141, %65 %144 = fmul float %141, %66 %145 = fsub float %133, %142 %146 = fsub float %134, %143 %147 = fsub float %135, %144 %148 = call float @llvm.AMDGPU.lrp(float %68, float %133, float %145) %149 = call float @llvm.AMDGPU.lrp(float %68, float %134, float %146) %150 = call float @llvm.AMDGPU.lrp(float %68, float %135, float %147) %151 = fmul float %73, %148 %152 = fmul float %74, %148 %153 = fmul float %75, %148 %154 = fmul float %76, %148 %155 = fmul float %77, %149 %156 = fadd float %155, %151 %157 = fmul float %78, %149 %158 = fadd float %157, %152 %159 = fmul float %79, %149 %160 = fadd float %159, %153 %161 = fmul float %80, %149 %162 = fadd float %161, %154 %163 = fmul float %81, %150 %164 = fadd float %163, %156 %165 = fmul float %82, %150 %166 = fadd float %165, %158 %167 = fmul float %83, %150 %168 = fadd float %167, %160 %169 = fmul float %84, %150 %170 = fadd float %169, %162 %171 = fadd float %164, %85 %172 = fadd float %166, %86 %173 = fadd float %168, %87 %174 = fadd float %170, %88 %175 = fmul float %171, 5.000000e-01 %176 = fmul float %172, 5.000000e-01 %177 = fmul float %174, 5.000000e-01 %178 = fmul float %176, %13 %179 = fadd float %175, %177 %180 = fadd float %178, %177 %181 = fmul float %41, %100 %182 = fmul float %44, %100 %183 = fmul float %47, %100 %184 = fmul float %42, %101 %185 = fadd float %184, %181 %186 = fmul float %45, %101 %187 = fadd float %186, %182 %188 = fmul float %48, %101 %189 = fadd float %188, %183 %190 = fmul float %43, %102 %191 = fadd float %190, %185 %192 = fmul float %46, %102 %193 = fadd float %192, %187 %194 = fmul float %49, %102 %195 = fadd float %194, %189 %196 = fmul float %191, %191 %197 = fmul float %193, %193 %198 = fadd float %197, %196 %199 = fmul float %195, %195 %200 = fadd float %198, %199 %201 = call float @llvm.AMDGPU.rsq.clamped.f32(float %200) %202 = fmul float %191, %201 %203 = fmul float %193, %201 %204 = fmul float %195, %201 %205 = fmul float %14, %202 %206 = fmul float %15, %203 %207 = fadd float %205, %206 %208 = fmul float %16, %204 %209 = fadd float %207, %208 %210 = fadd float %209, %17 %211 = fmul float %18, %202 %212 = fmul float %19, %203 %213 = fadd float %211, %212 %214 = fmul float %20, %204 %215 = fadd float %213, %214 %216 = fadd float %215, %21 %217 = fmul float %22, %202 %218 = fmul float %23, %203 %219 = fadd float %217, %218 %220 = fmul float %24, %204 %221 = fadd float %219, %220 %222 = fadd float %221, %25 %223 = fmul float %202, %203 %224 = fmul float %203, %204 %225 = fmul float %204, %204 %226 = fmul float %204, %202 %227 = fmul float %26, %223 %228 = fmul float %27, %224 %229 = fadd float %227, %228 %230 = fmul float %28, %225 %231 = fadd float %229, %230 %232 = fmul float %29, %226 %233 = fadd float %231, %232 %234 = fmul float %30, %223 %235 = fmul float %31, %224 %236 = fadd float %234, %235 %237 = fmul float %32, %225 %238 = fadd float %236, %237 %239 = fmul float %33, %226 %240 = fadd float %238, %239 %241 = fmul float %34, %223 %242 = fmul float %35, %224 %243 = fadd float %241, %242 %244 = fmul float %36, %225 %245 = fadd float %243, %244 %246 = fmul float %37, %226 %247 = fadd float %245, %246 %248 = fmul float %203, %203 %249 = fmul float %202, %202 %250 = fsub float %249, %248 %251 = fmul float %38, %250 %252 = fadd float %251, %233 %253 = fmul float %39, %250 %254 = fadd float %253, %240 %255 = fmul float %40, %250 %256 = fadd float %255, %247 %257 = fadd float %252, %210 %258 = fadd float %254, %216 %259 = fadd float %256, %222 %260 = fmul float %112, %69 %261 = fadd float %260, %71 %262 = fmul float %113, %70 %263 = fadd float %262, %72 %264 = fmul float %173, %50 %265 = fadd float %264, %51 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %179, float %180, float %173, float %174) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %261, float %263, float %265, float %257) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %258, float %259, float %259, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %171, float %172, float %173, float %174) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[68:71], s[2:3], 0x0 ; C0A20300 s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 s_load_dwordx4 s[12:15], s[8:9], 0x8 ; C0860908 s_load_dwordx4 s[8:11], s[8:9], 0xc ; C084090C s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s35, s[68:71], 0x29 ; C211C529 buffer_load_format_xyzw v[1:4], v0, s[0:3], 0 idxen ; E00C2000 80000100 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[4:7], v0, s[4:7], 0 idxen ; E00C2000 80010400 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[7:10], v0, s[12:15], 0 idxen ; E00C2000 80030700 buffer_load_format_xyzw v[11:14], v0, s[8:11], 0 idxen ; E00C2000 80020B00 s_buffer_load_dword s25, s[68:71], 0x2a ; C20CC52A s_buffer_load_dword s0, s[68:71], 0x32 ; C2004532 s_buffer_load_dword s4, s[68:71], 0x33 ; C2024533 s_buffer_load_dword s67, s[68:71], 0x34 ; C221C534 s_buffer_load_dword s2, s[68:71], 0x1c ; C201451C s_buffer_load_dword s3, s[68:71], 0x1d ; C201C51D s_buffer_load_dword s1, s[68:71], 0x1e ; C200C51E s_buffer_load_dword s72, s[68:71], 0x20 ; C2244520 s_buffer_load_dword s45, s[68:71], 0x21 ; C216C521 s_buffer_load_dword s73, s[68:71], 0x35 ; C224C535 s_buffer_load_dword s74, s[68:71], 0x36 ; C2254536 s_buffer_load_dword s51, s[68:71], 0x38 ; C219C538 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v0, s4 ; 7E000204 s_buffer_load_dword s56, s[68:71], 0x39 ; C21C4539 s_buffer_load_dword s53, s[68:71], 0x3a ; C21AC53A s_buffer_load_dword s44, s[68:71], 0x22 ; C2164522 s_buffer_load_dword s75, s[68:71], 0x24 ; C225C524 s_buffer_load_dword s55, s[68:71], 0x25 ; C21BC525 s_buffer_load_dword s46, s[68:71], 0x26 ; C2174526 s_buffer_load_dword s76, s[68:71], 0x28 ; C2264528 s_buffer_load_dword s57, s[68:71], 0x3c ; C21CC53C s_buffer_load_dword s58, s[68:71], 0x3d ; C21D453D s_buffer_load_dword s54, s[68:71], 0x3e ; C21B453E s_buffer_load_dword s50, s[68:71], 0x40 ; C2194540 s_buffer_load_dword s47, s[68:71], 0x41 ; C217C541 s_buffer_load_dword s48, s[68:71], 0x42 ; C2184542 s_buffer_load_dword s38, s[68:71], 0x48 ; C2134548 s_buffer_load_dword s39, s[68:71], 0x49 ; C213C549 s_buffer_load_dword s36, s[68:71], 0x4a ; C212454A s_buffer_load_dword s41, s[68:71], 0x4b ; C214C54B s_buffer_load_dword s24, s[68:71], 0x4c ; C20C454C s_buffer_load_dword s60, s[68:71], 0x50 ; C21E4550 s_buffer_load_dword s59, s[68:71], 0x51 ; C21DC551 s_buffer_load_dword s5, s[68:71], 0x52 ; C202C552 s_buffer_load_dword s6, s[68:71], 0x53 ; C2034553 s_buffer_load_dword s33, s[68:71], 0x54 ; C210C554 s_buffer_load_dword s28, s[68:71], 0x55 ; C20E4555 s_buffer_load_dword s29, s[68:71], 0x56 ; C20EC556 s_buffer_load_dword s26, s[68:71], 0x57 ; C20D4557 s_buffer_load_dword s27, s[68:71], 0x58 ; C20DC558 s_buffer_load_dword s9, s[68:71], 0xd ; C204C50D s_buffer_load_dword s8, s[68:71], 0xe ; C204450E s_buffer_load_dword s4, s[68:71], 0xf ; C202450F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v7, s5 ; 7E0E0205 s_buffer_load_dword s17, s[68:71], 0x10 ; C208C510 v_mov_b32_e32 v8, s6 ; 7E100206 s_buffer_load_dword s22, s[68:71], 0x11 ; C20B4511 s_buffer_load_dword s34, s[68:71], 0x59 ; C2114559 s_buffer_load_dword s30, s[68:71], 0x5a ; C20F455A s_buffer_load_dword s31, s[68:71], 0x5b ; C20FC55B s_buffer_load_dword s19, s[68:71], 0x5c ; C209C55C s_buffer_load_dword s20, s[68:71], 0x5d ; C20A455D s_buffer_load_dword s6, s[68:71], 0x0 ; C2034500 s_buffer_load_dword s11, s[68:71], 0x4 ; C205C504 s_buffer_load_dword s13, s[68:71], 0x5 ; C206C505 s_buffer_load_dword s10, s[68:71], 0x6 ; C2054506 s_buffer_load_dword s7, s[68:71], 0x7 ; C203C507 s_buffer_load_dword s14, s[68:71], 0x8 ; C2074508 s_buffer_load_dword s15, s[68:71], 0x9 ; C207C509 s_buffer_load_dword s12, s[68:71], 0xa ; C206450A s_buffer_load_dword s5, s[68:71], 0xb ; C202C50B s_buffer_load_dword s16, s[68:71], 0xc ; C208450C s_buffer_load_dword s32, s[68:71], 0x12 ; C2104512 s_buffer_load_dword s18, s[68:71], 0x13 ; C2094513 s_buffer_load_dword s42, s[68:71], 0x14 ; C2154514 s_buffer_load_dword s49, s[68:71], 0x15 ; C218C515 s_buffer_load_dword s37, s[68:71], 0x16 ; C212C516 s_buffer_load_dword s21, s[68:71], 0x17 ; C20AC517 s_buffer_load_dword s43, s[68:71], 0x18 ; C215C518 s_buffer_load_dword s52, s[68:71], 0x19 ; C21A4519 s_buffer_load_dword s40, s[68:71], 0x1a ; C214451A s_buffer_load_dword s23, s[68:71], 0x1b ; C20BC51B s_buffer_load_dword s65, s[68:71], 0x5e ; C220C55E s_buffer_load_dword s66, s[68:71], 0x5f ; C221455F s_buffer_load_dword s62, s[68:71], 0x60 ; C21F4560 s_buffer_load_dword s63, s[68:71], 0x61 ; C21FC561 s_buffer_load_dword s61, s[68:71], 0x62 ; C21EC562 s_buffer_load_dword s64, s[68:71], 0x63 ; C2204563 v_mul_f32_e32 v1, s67, v1 ; 10020243 v_mul_f32_e32 v2, s73, v2 ; 10040449 v_mul_f32_e32 v3, s74, v3 ; 1006064A v_mul_f32_e32 v9, s72, v4 ; 10120848 v_mul_f32_e32 v13, s75, v4 ; 101A084B v_mul_f32_e32 v4, s76, v4 ; 1008084C v_mac_f32_e32 v7, s60, v11 ; 3E0E163C v_mac_f32_e32 v8, s59, v12 ; 3E10183B v_mul_f32_e32 v11, s51, v1 ; 10160233 v_mul_f32_e32 v12, s56, v1 ; 10180238 v_mul_f32_e32 v14, s53, v1 ; 101C0235 v_mac_f32_e32 v9, s45, v5 ; 3E120A2D v_mac_f32_e32 v13, s55, v5 ; 3E1A0A37 v_mac_f32_e32 v4, s35, v5 ; 3E080A23 v_mac_f32_e32 v11, s57, v2 ; 3E160439 v_mac_f32_e32 v12, s58, v2 ; 3E18043A v_mac_f32_e32 v14, s54, v2 ; 3E1C0436 v_mac_f32_e32 v9, s44, v6 ; 3E120C2C v_mac_f32_e32 v13, s46, v6 ; 3E1A0C2E v_mac_f32_e32 v4, s25, v6 ; 3E080C19 v_mac_f32_e32 v11, s50, v3 ; 3E160632 v_mac_f32_e32 v12, s47, v3 ; 3E18062F v_mac_f32_e32 v14, s48, v3 ; 3E1C0630 v_mul_f32_e32 v5, v10, v10 ; 100A150A v_mad_f32 v6, -v10, v10, 1.0 ; D2820006 23CA150A v_mul_f32_e32 v1, v1, v6 ; 10020D01 v_mul_f32_e32 v3, v3, v6 ; 10060D03 v_mac_f32_e32 v1, v11, v5 ; 3E020B0B v_mac_f32_e32 v3, v14, v5 ; 3E060B0E v_mac_f32_e32 v2, 0, v12 ; 3E041880 v_mul_f32_e32 v5, s38, v1 ; 100A0226 v_mac_f32_e32 v5, s39, v2 ; 3E0A0427 v_mac_f32_e32 v5, s36, v3 ; 3E0A0624 v_add_f32_e32 v5, s41, v5 ; 060A0A29 v_mad_f32 v6, -v5, s38, v1 ; D2820006 24044D05 v_mad_f32 v10, -v5, s39, v2 ; D282000A 24084F05 v_mad_f32 v5, -v5, s36, v3 ; D2820005 240C4905 v_sub_f32_e64 v11, 1.0, s24 ; D208000B 000030F2 v_mul_f32_e32 v6, v6, v11 ; 100C1706 v_mul_f32_e32 v10, v10, v11 ; 1014170A v_mul_f32_e32 v5, v5, v11 ; 100A1705 v_mac_f32_e32 v6, s24, v1 ; 3E0C0218 v_mac_f32_e32 v10, s24, v2 ; 3E140418 v_mac_f32_e32 v5, s24, v3 ; 3E0A0618 v_mul_f32_e32 v1, s33, v6 ; 10020C21 v_mul_f32_e32 v2, s28, v6 ; 10040C1C v_mul_f32_e32 v3, s29, v6 ; 10060C1D v_mul_f32_e32 v6, s26, v6 ; 100C0C1A v_mac_f32_e32 v1, s27, v10 ; 3E02141B v_mul_f32_e32 v11, v9, v9 ; 10161309 v_mac_f32_e32 v11, v13, v13 ; 3E161B0D v_mac_f32_e32 v11, v4, v4 ; 3E160904 v_rsq_clamp_f32_e32 v11, v11 ; 7E16590B s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v2, s34, v10 ; 3E041422 v_mac_f32_e32 v3, s30, v10 ; 3E06141E v_mac_f32_e32 v6, s31, v10 ; 3E0C141F v_mul_f32_e32 v10, v11, v13 ; 10141B0B v_mul_f32_e32 v4, v11, v4 ; 1008090B v_mul_f32_e32 v12, v4, v10 ; 10181504 v_mul_f32_e32 v13, s22, v12 ; 101A1816 v_mul_f32_e32 v14, s49, v12 ; 101C1831 v_mul_f32_e32 v12, s52, v12 ; 10181834 v_mul_f32_e32 v9, v11, v9 ; 1012130B v_mul_f32_e32 v11, v10, v9 ; 1016130A v_mac_f32_e32 v13, s17, v11 ; 3E1A1611 v_mac_f32_e32 v14, s42, v11 ; 3E1C162A v_mac_f32_e32 v12, s43, v11 ; 3E18162B v_mac_f32_e32 v1, s19, v5 ; 3E020A13 v_mac_f32_e32 v2, s20, v5 ; 3E040A14 v_mac_f32_e32 v3, s65, v5 ; 3E060A41 v_mac_f32_e32 v6, s66, v5 ; 3E0C0A42 v_mul_f32_e32 v5, v4, v4 ; 100A0904 v_mac_f32_e32 v13, s32, v5 ; 3E1A0A20 v_mac_f32_e32 v14, s37, v5 ; 3E1C0A25 v_mac_f32_e32 v12, s40, v5 ; 3E180A28 v_mul_f32_e32 v5, s13, v10 ; 100A140D v_mac_f32_e32 v5, s11, v9 ; 3E0A120B v_mul_f32_e32 v11, s15, v10 ; 1016140F v_mac_f32_e32 v11, s14, v9 ; 3E16120E v_mul_f32_e32 v15, s9, v10 ; 101E1409 v_mac_f32_e32 v15, s16, v9 ; 3E1E1210 v_mac_f32_e32 v5, s10, v4 ; 3E0A080A v_mac_f32_e32 v11, s12, v4 ; 3E16080C v_mac_f32_e32 v15, s8, v4 ; 3E1E0808 v_mul_f32_e32 v4, v9, v4 ; 10080909 v_mac_f32_e32 v13, s18, v4 ; 3E1A0812 v_mac_f32_e32 v14, s21, v4 ; 3E1C0815 v_mac_f32_e32 v12, s23, v4 ; 3E180817 v_mul_f32_e32 v4, v10, v10 ; 1008150A v_mad_f32 v4, v9, v9, -v4 ; D2820004 84121309 v_mac_f32_e32 v13, s2, v4 ; 3E1A0802 v_mac_f32_e32 v14, s3, v4 ; 3E1C0803 v_mac_f32_e32 v12, s1, v4 ; 3E180801 v_add_f32_e32 v1, s62, v1 ; 0602023E v_add_f32_e32 v2, s63, v2 ; 0604043F v_add_f32_e32 v4, s64, v6 ; 06080C40 v_mul_f32_e32 v6, 0.5, v2 ; 100C04F0 v_mul_f32_e32 v9, 0.5, v4 ; 101208F0 v_mad_f32 v10, 0.5, v1, v9 ; D282000A 042602F0 v_mac_f32_e32 v9, s6, v6 ; 3E120C06 v_add_f32_e32 v5, s7, v5 ; 060A0A07 v_add_f32_e32 v3, s61, v3 ; 0606063D v_mac_f32_e32 v0, s0, v3 ; 3E000600 v_add_f32_e32 v5, v5, v13 ; 060A1B05 exp 15, 32, 0, 0, 0, v10, v9, v3, v4 ; F800020F 0403090A exp 15, 33, 0, 0, 0, v7, v8, v0, v5 ; F800021F 05000807 s_waitcnt expcnt(0) ; BF8C070F v_add_f32_e32 v0, s5, v11 ; 06001605 v_add_f32_e32 v5, s4, v15 ; 060A1E04 v_add_f32_e32 v0, v0, v14 ; 06001D00 v_add_f32_e32 v5, v5, v12 ; 060A1905 v_mov_b32_e32 v6, 0 ; 7E0C0280 exp 15, 34, 0, 0, 0, v0, v5, v5, v6 ; F800022F 06050500 exp 15, 12, 0, 1, 0, v1, v2, v3, v4 ; F80008CF 04030201 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 80 VGPRS: 16 Code Size: 900 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL CONST[0..2] DCL CONST[4] DCL CONST[6] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].x, IN[1].wwww 1: MOV TEMP[0].yz, IN[2].yxyy 2: MOV TEMP[1].xy, IN[1].xyyy 3: TEX TEMP[1], TEMP[1], SAMP[0], 2D 4: FSLT TEMP[2].x, TEMP[1].wwww, CONST[6].xxxx 5: AND TEMP[2].x, TEMP[2].xxxx, IMM[0].xxxx 6: KILL_IF -TEMP[2].xxxx 7: MOV TEMP[2].xy, IN[0].xyyy 8: MOV TEMP[2].w, IN[0].wwww 9: TXP TEMP[2], TEMP[2], SAMP[1], 2D 10: LG2 TEMP[3].x, TEMP[2].xxxx 11: LG2 TEMP[3].y, TEMP[2].yyyy 12: LG2 TEMP[3].z, TEMP[2].zzzz 13: LG2 TEMP[3].w, TEMP[2].wwww 14: MOV TEMP[2], -TEMP[3] 15: ADD TEMP[0].xyz, TEMP[2].xyzz, TEMP[0].xyzz 16: MUL TEMP[2].xyz, TEMP[2].wwww, CONST[2] 17: MUL TEMP[3].xyz, TEMP[1].xyzz, CONST[4].xyzz 18: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[0].xyzz 19: MAD TEMP[0].xyz, TEMP[0].xyzz, TEMP[2].xyzz, TEMP[3].xyzz 20: MAD TEMP[1].x, TEMP[2].xyzz, CONST[1].wwww, TEMP[1].wwww 21: MOV TEMP[1].w, TEMP[1].xxxx 22: MOV_SAT TEMP[2].x, IN[1].zzzz 23: LRP TEMP[1].xyz, TEMP[2].xxxx, TEMP[0].xyzz, CONST[0].xyzz 24: MOV OUT[0], TEMP[1] 25: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %35 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %36 = load <32 x i8>, <32 x i8> addrspace(2)* %35, align 32, !tbaa !0 %37 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %40 = bitcast <8 x i32> addrspace(2)* %39 to <32 x i8> addrspace(2)* %41 = load <32 x i8>, <32 x i8> addrspace(2)* %40, align 32, !tbaa !0 %42 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %43 = bitcast <4 x i32> addrspace(2)* %42 to <16 x i8> addrspace(2)* %44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0 %45 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %46 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %47 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %48 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %49 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %50 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %51 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %52 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %53 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %54 = bitcast float %48 to i32 %55 = bitcast float %49 to i32 %56 = insertelement <2 x i32> undef, i32 %54, i32 0 %57 = insertelement <2 x i32> %56, i32 %55, i32 1 %58 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %57, <32 x i8> %36, <16 x i8> %38, i32 2) %59 = extractelement <4 x float> %58, i32 0 %60 = extractelement <4 x float> %58, i32 1 %61 = extractelement <4 x float> %58, i32 2 %62 = extractelement <4 x float> %58, i32 3 %63 = fcmp olt float %62, %34 %64 = select i1 %63, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %64) %65 = fdiv float %45, %47 %66 = fdiv float %46, %47 %67 = bitcast float %65 to i32 %68 = bitcast float %66 to i32 %69 = insertelement <2 x i32> undef, i32 %67, i32 0 %70 = insertelement <2 x i32> %69, i32 %68, i32 1 %71 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %70, <32 x i8> %41, <16 x i8> %44, i32 2) %72 = extractelement <4 x float> %71, i32 0 %73 = extractelement <4 x float> %71, i32 1 %74 = extractelement <4 x float> %71, i32 2 %75 = extractelement <4 x float> %71, i32 3 %76 = call float @llvm.log2.f32(float %72) %77 = call float @llvm.log2.f32(float %73) %78 = call float @llvm.log2.f32(float %74) %79 = call float @llvm.log2.f32(float %75) %80 = fsub float -0.000000e+00, %79 %81 = fsub float %51, %76 %82 = fsub float %52, %77 %83 = fsub float %53, %78 %84 = fmul float %28, %80 %85 = fmul float %29, %80 %86 = fmul float %30, %80 %87 = fmul float %59, %31 %88 = fmul float %60, %32 %89 = fmul float %61, %33 %90 = fmul float %87, %81 %91 = fmul float %88, %82 %92 = fmul float %89, %83 %93 = fmul float %81, %84 %94 = fadd float %93, %90 %95 = fmul float %82, %85 %96 = fadd float %95, %91 %97 = fmul float %83, %86 %98 = fadd float %97, %92 %99 = fmul float %84, %27 %100 = fadd float %99, %62 %101 = call float @llvm.AMDIL.clamp.(float %50, float 0.000000e+00, float 1.000000e+00) %102 = call float @llvm.AMDGPU.lrp(float %101, float %94, float %24) %103 = call float @llvm.AMDGPU.lrp(float %101, float %96, float %25) %104 = call float @llvm.AMDGPU.lrp(float %101, float %98, float %26) %105 = call i32 @llvm.SI.packf16(float %102, float %103) %106 = bitcast i32 %105 to float %107 = call i32 @llvm.SI.packf16(float %104, float %100) %108 = bitcast i32 %107 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %106, float %108, float %106, float %108) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 3, 0, [m0] ; C8100300 v_interp_p2_f32 v4, [v4], v1, 3, 0, [m0] ; C8110301 v_interp_p1_f32 v5, v0, 0, 1, [m0] ; C8140400 v_interp_p2_f32 v5, [v5], v1, 0, 1, [m0] ; C8150401 v_interp_p1_f32 v6, v0, 1, 1, [m0] ; C8180500 v_interp_p2_f32 v6, [v6], v1, 1, 1, [m0] ; C8190501 v_interp_p1_f32 v7, v0, 2, 1, [m0] ; C81C0600 v_interp_p2_f32 v7, [v7], v1, 2, 1, [m0] ; C81D0601 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s20, s[0:3], 0x18 ; C20A0118 v_interp_p1_f32 v8, v0, 3, 1, [m0] ; C8200700 v_interp_p2_f32 v8, [v8], v1, 3, 1, [m0] ; C8210701 v_interp_p1_f32 v9, v0, 0, 2, [m0] ; C8240800 v_interp_p2_f32 v9, [v9], v1, 0, 2, [m0] ; C8250801 v_interp_p1_f32 v0, v0, 1, 2, [m0] ; C8000900 v_interp_p2_f32 v0, [v0], v1, 1, 2, [m0] ; C8010901 s_load_dwordx4 s[24:27], s[4:5], 0x4 ; C08C0504 s_load_dwordx8 s[28:35], s[6:7], 0x8 ; C0CE0708 image_sample v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[12:19], s[8:11] ; F0800F00 00430A05 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_cmp_gt_f32_e32 vcc, s20, v13 ; 7C081A14 v_cndmask_b32_e64 v1, 0, -1.0, vcc ; D2000001 01A9E680 v_cmpx_le_f32_e32 vcc, 0, v1 ; 7C260280 v_mov_b32_e32 v1, 0x6f800000 ; 7E0202FF 6F800000 v_cmp_gt_f32_e64 vcc, |v4|, v1 ; D008016A 00020304 v_mov_b32_e32 v1, 0x2f800000 ; 7E0202FF 2F800000 v_cndmask_b32_e32 v1, 1.0, v1 ; 000202F2 v_mul_f32_e32 v4, v1, v4 ; 10080901 v_rcp_f32_e32 v4, v4 ; 7E085504 v_mul_f32_e32 v2, v4, v2 ; 10040504 v_mul_f32_e32 v3, v4, v3 ; 10060704 v_mul_f32_e32 v4, v2, v1 ; 10080302 v_mul_f32_e32 v5, v3, v1 ; 100A0303 image_sample v[1:4], 15, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[28:35], s[24:27] ; F0800F00 00C70104 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v3, v3 ; 7E064F03 s_buffer_load_dword s5, s[0:3], 0x12 ; C2028112 v_subrev_f32_e32 v0, v3, v0 ; 0A000103 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 v_log_f32_e32 v3, v4 ; 7E064F04 s_buffer_load_dword s7, s[0:3], 0x10 ; C2038110 s_buffer_load_dword s8, s[0:3], 0x11 ; C2040111 s_buffer_load_dword s9, s[0:3], 0x8 ; C2048108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v3 ; 10080604 v_mul_f32_e32 v5, s5, v12 ; 100A1805 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 v_mul_f32_e32 v4, v4, v0 ; 10080104 v_mad_f32 v0, v5, v0, -v4 ; D2820000 84120105 v_add_f32_e64 v4, 0, v7 clamp ; D2060804 00020E80 v_sub_f32_e32 v5, 1.0, v4 ; 080A08F2 v_mul_f32_e32 v6, s6, v5 ; 100C0A06 v_mac_f32_e32 v6, v0, v4 ; 3E0C0900 v_mul_f32_e64 v0, s9, -v3 ; D2100000 40020609 v_mul_f32_e32 v7, s7, v10 ; 100E1407 v_mul_f32_e32 v10, s8, v11 ; 10141608 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v13, s4, v0 ; 3E1A0004 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 v_cvt_pkrtz_f16_f32_e32 v6, v6, v13 ; 5E0C1B06 v_log_f32_e32 v1, v1 ; 7E024F01 s_buffer_load_dword s5, s[0:3], 0x0 ; C2028100 s_buffer_load_dword s0, s[0:3], 0x1 ; C2000101 v_log_f32_e32 v2, v2 ; 7E044F02 v_subrev_f32_e32 v1, v1, v8 ; 0A021101 v_mul_f32_e32 v7, v1, v7 ; 100E0F01 v_mac_f32_e32 v7, v0, v1 ; 3E0E0300 v_subrev_f32_e32 v0, v2, v9 ; 0A001302 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s4, v3 ; 10020604 v_mul_f32_e32 v1, v1, v0 ; 10020101 v_mad_f32 v0, v10, v0, -v1 ; D2820000 8406010A v_mul_f32_e32 v1, s5, v5 ; 10020A05 v_mac_f32_e32 v1, v7, v4 ; 3E020907 v_mul_f32_e32 v2, s0, v5 ; 10040A00 v_mac_f32_e32 v2, v0, v4 ; 3E040900 v_cvt_pkrtz_f16_f32_e32 v0, v1, v2 ; 5E000501 exp 15, 0, 1, 1, 1, v0, v6, v0, v6 ; F8001C0F 06000600 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 16 Code Size: 400 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL CONST[0..24] DCL TEMP[0..6], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.5000, 0.0000} 0: MUL TEMP[0].xyz, IN[0].xyzz, CONST[13].xyzz 1: MUL TEMP[1].x, IN[2].wwww, IN[2].wwww 2: MOV TEMP[2].y, IMM[0].xxxx 3: MOV TEMP[2].x, TEMP[1].xxxx 4: MOV TEMP[2].z, TEMP[1].xxxx 5: MUL TEMP[1], CONST[14], TEMP[0].xxxx 6: MAD TEMP[1], CONST[15], TEMP[0].yyyy, TEMP[1] 7: MAD TEMP[1].xyz, CONST[16], TEMP[0].zzzz, TEMP[1] 8: LRP TEMP[0].xyz, TEMP[2].xyzz, TEMP[1].xyzz, TEMP[0].xyzz 9: DP3 TEMP[1].x, CONST[18].xyzz, TEMP[0].xyzz 10: ADD TEMP[1].x, TEMP[1].xxxx, CONST[18].wwww 11: MUL TEMP[1].xyz, TEMP[1].xxxx, CONST[18].xyzz 12: ADD TEMP[1].xyz, TEMP[0].xyzz, -TEMP[1].xyzz 13: LRP TEMP[0].xyz, CONST[19].xxxx, TEMP[0].xyzz, TEMP[1].xyzz 14: MUL TEMP[1], CONST[21], TEMP[0].xxxx 15: MAD TEMP[1], CONST[22], TEMP[0].yyyy, TEMP[1] 16: MAD TEMP[0], CONST[23], TEMP[0].zzzz, TEMP[1] 17: ADD TEMP[0], TEMP[0], CONST[24] 18: MUL TEMP[1].xyw, TEMP[0], IMM[0].zzzz 19: MOV TEMP[2].x, TEMP[1].xxxx 20: MUL TEMP[3].x, TEMP[1].yyyy, CONST[0].xxxx 21: MOV TEMP[2].y, TEMP[3].xxxx 22: ADD TEMP[1].xy, TEMP[2].xyyy, TEMP[1].wwww 23: MOV TEMP[1].zw, TEMP[0].wwzw 24: MOV TEMP[2].x, CONST[8].xxxx 25: MOV TEMP[2].y, CONST[9].xxxx 26: MOV TEMP[2].z, CONST[10].xxxx 27: MOV TEMP[3].x, CONST[8].yyyy 28: MOV TEMP[3].y, CONST[9].yyyy 29: MOV TEMP[3].z, CONST[10].yyyy 30: MOV TEMP[4].x, CONST[8].zzzz 31: MOV TEMP[4].y, CONST[9].zzzz 32: MOV TEMP[4].z, CONST[10].zzzz 33: MUL TEMP[2].xyz, TEMP[2].xyzz, IN[1].xxxx 34: MAD TEMP[2].xyz, TEMP[3].xyzz, IN[1].yyyy, TEMP[2].xyzz 35: MAD TEMP[2].xyz, TEMP[4].xyzz, IN[1].zzzz, TEMP[2].xyzz 36: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz 37: RSQ TEMP[3].x, TEMP[3].xxxx 38: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx 39: MOV TEMP[3].w, IMM[0].yyyy 40: MOV TEMP[3].xyz, TEMP[2].xyzx 41: DP4 TEMP[4].x, CONST[1], TEMP[3] 42: DP4 TEMP[5].x, CONST[2], TEMP[3] 43: MOV TEMP[4].y, TEMP[5].xxxx 44: DP4 TEMP[3].x, CONST[3], TEMP[3] 45: MOV TEMP[4].z, TEMP[3].xxxx 46: MUL TEMP[3], TEMP[2].xyzz, TEMP[2].yzzx 47: DP4 TEMP[5].x, CONST[4], TEMP[3] 48: DP4 TEMP[6].x, CONST[5], TEMP[3] 49: MOV TEMP[5].y, TEMP[6].xxxx 50: DP4 TEMP[3].x, CONST[6], TEMP[3] 51: MOV TEMP[5].z, TEMP[3].xxxx 52: MUL TEMP[3].x, TEMP[2].yyyy, TEMP[2].yyyy 53: MAD TEMP[2].x, TEMP[2].xxxx, TEMP[2].xxxx, -TEMP[3].xxxx 54: MAD TEMP[2].xyz, CONST[7].xyzz, TEMP[2].xxxx, TEMP[5].xyzz 55: ADD TEMP[2].xyz, TEMP[2].xyzz, TEMP[4].xyzz 56: MAD TEMP[3].xy, IN[3].xyyy, CONST[20].xyyy, CONST[20].zwww 57: MOV TEMP[3].w, TEMP[2].xxxx 58: MOV TEMP[2].xy, TEMP[2].yzyy 59: MAD TEMP[4].x, TEMP[0].zzzz, CONST[12].zzzz, CONST[12].wwww 60: MOV TEMP[3].z, TEMP[4].xxxx 61: MOV OUT[1], TEMP[1] 62: MOV OUT[3], TEMP[2] 63: MOV OUT[0], TEMP[0] 64: MOV OUT[2], TEMP[3] 65: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 204) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260) %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264) %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288) %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292) %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296) %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300) %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304) %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320) %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 324) %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 328) %72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 332) %73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 336) %74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 340) %75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 344) %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 348) %77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 352) %78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 356) %79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 360) %80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 364) %81 = call float @llvm.SI.load.const(<16 x i8> %12, i32 368) %82 = call float @llvm.SI.load.const(<16 x i8> %12, i32 372) %83 = call float @llvm.SI.load.const(<16 x i8> %12, i32 376) %84 = call float @llvm.SI.load.const(<16 x i8> %12, i32 380) %85 = call float @llvm.SI.load.const(<16 x i8> %12, i32 384) %86 = call float @llvm.SI.load.const(<16 x i8> %12, i32 388) %87 = call float @llvm.SI.load.const(<16 x i8> %12, i32 392) %88 = call float @llvm.SI.load.const(<16 x i8> %12, i32 396) %89 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %90 = load <16 x i8>, <16 x i8> addrspace(2)* %89, align 16, !tbaa !0 %91 = add i32 %5, %7 %92 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %90, i32 0, i32 %91) %93 = extractelement <4 x float> %92, i32 0 %94 = extractelement <4 x float> %92, i32 1 %95 = extractelement <4 x float> %92, i32 2 %96 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %97 = load <16 x i8>, <16 x i8> addrspace(2)* %96, align 16, !tbaa !0 %98 = add i32 %5, %7 %99 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %97, i32 0, i32 %98) %100 = extractelement <4 x float> %99, i32 0 %101 = extractelement <4 x float> %99, i32 1 %102 = extractelement <4 x float> %99, i32 2 %103 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %104 = load <16 x i8>, <16 x i8> addrspace(2)* %103, align 16, !tbaa !0 %105 = add i32 %5, %7 %106 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %104, i32 0, i32 %105) %107 = extractelement <4 x float> %106, i32 3 %108 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %109 = load <16 x i8>, <16 x i8> addrspace(2)* %108, align 16, !tbaa !0 %110 = add i32 %5, %7 %111 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %109, i32 0, i32 %110) %112 = extractelement <4 x float> %111, i32 0 %113 = extractelement <4 x float> %111, i32 1 %114 = fmul float %93, %52 %115 = fmul float %94, %53 %116 = fmul float %95, %54 %117 = fmul float %107, %107 %118 = fmul float %55, %114 %119 = fmul float %56, %114 %120 = fmul float %57, %114 %121 = fmul float %58, %115 %122 = fadd float %121, %118 %123 = fmul float %59, %115 %124 = fadd float %123, %119 %125 = fmul float %60, %115 %126 = fadd float %125, %120 %127 = fmul float %61, %116 %128 = fadd float %127, %122 %129 = fmul float %62, %116 %130 = fadd float %129, %124 %131 = fmul float %63, %116 %132 = fadd float %131, %126 %133 = call float @llvm.AMDGPU.lrp(float %117, float %128, float %114) %134 = call float @llvm.AMDGPU.lrp(float 0.000000e+00, float %130, float %115) %135 = call float @llvm.AMDGPU.lrp(float %117, float %132, float %116) %136 = fmul float %64, %133 %137 = fmul float %65, %134 %138 = fadd float %137, %136 %139 = fmul float %66, %135 %140 = fadd float %138, %139 %141 = fadd float %140, %67 %142 = fmul float %141, %64 %143 = fmul float %141, %65 %144 = fmul float %141, %66 %145 = fsub float %133, %142 %146 = fsub float %134, %143 %147 = fsub float %135, %144 %148 = call float @llvm.AMDGPU.lrp(float %68, float %133, float %145) %149 = call float @llvm.AMDGPU.lrp(float %68, float %134, float %146) %150 = call float @llvm.AMDGPU.lrp(float %68, float %135, float %147) %151 = fmul float %73, %148 %152 = fmul float %74, %148 %153 = fmul float %75, %148 %154 = fmul float %76, %148 %155 = fmul float %77, %149 %156 = fadd float %155, %151 %157 = fmul float %78, %149 %158 = fadd float %157, %152 %159 = fmul float %79, %149 %160 = fadd float %159, %153 %161 = fmul float %80, %149 %162 = fadd float %161, %154 %163 = fmul float %81, %150 %164 = fadd float %163, %156 %165 = fmul float %82, %150 %166 = fadd float %165, %158 %167 = fmul float %83, %150 %168 = fadd float %167, %160 %169 = fmul float %84, %150 %170 = fadd float %169, %162 %171 = fadd float %164, %85 %172 = fadd float %166, %86 %173 = fadd float %168, %87 %174 = fadd float %170, %88 %175 = fmul float %171, 5.000000e-01 %176 = fmul float %172, 5.000000e-01 %177 = fmul float %174, 5.000000e-01 %178 = fmul float %176, %13 %179 = fadd float %175, %177 %180 = fadd float %178, %177 %181 = fmul float %41, %100 %182 = fmul float %44, %100 %183 = fmul float %47, %100 %184 = fmul float %42, %101 %185 = fadd float %184, %181 %186 = fmul float %45, %101 %187 = fadd float %186, %182 %188 = fmul float %48, %101 %189 = fadd float %188, %183 %190 = fmul float %43, %102 %191 = fadd float %190, %185 %192 = fmul float %46, %102 %193 = fadd float %192, %187 %194 = fmul float %49, %102 %195 = fadd float %194, %189 %196 = fmul float %191, %191 %197 = fmul float %193, %193 %198 = fadd float %197, %196 %199 = fmul float %195, %195 %200 = fadd float %198, %199 %201 = call float @llvm.AMDGPU.rsq.clamped.f32(float %200) %202 = fmul float %191, %201 %203 = fmul float %193, %201 %204 = fmul float %195, %201 %205 = fmul float %14, %202 %206 = fmul float %15, %203 %207 = fadd float %205, %206 %208 = fmul float %16, %204 %209 = fadd float %207, %208 %210 = fadd float %209, %17 %211 = fmul float %18, %202 %212 = fmul float %19, %203 %213 = fadd float %211, %212 %214 = fmul float %20, %204 %215 = fadd float %213, %214 %216 = fadd float %215, %21 %217 = fmul float %22, %202 %218 = fmul float %23, %203 %219 = fadd float %217, %218 %220 = fmul float %24, %204 %221 = fadd float %219, %220 %222 = fadd float %221, %25 %223 = fmul float %202, %203 %224 = fmul float %203, %204 %225 = fmul float %204, %204 %226 = fmul float %204, %202 %227 = fmul float %26, %223 %228 = fmul float %27, %224 %229 = fadd float %227, %228 %230 = fmul float %28, %225 %231 = fadd float %229, %230 %232 = fmul float %29, %226 %233 = fadd float %231, %232 %234 = fmul float %30, %223 %235 = fmul float %31, %224 %236 = fadd float %234, %235 %237 = fmul float %32, %225 %238 = fadd float %236, %237 %239 = fmul float %33, %226 %240 = fadd float %238, %239 %241 = fmul float %34, %223 %242 = fmul float %35, %224 %243 = fadd float %241, %242 %244 = fmul float %36, %225 %245 = fadd float %243, %244 %246 = fmul float %37, %226 %247 = fadd float %245, %246 %248 = fmul float %203, %203 %249 = fmul float %202, %202 %250 = fsub float %249, %248 %251 = fmul float %38, %250 %252 = fadd float %251, %233 %253 = fmul float %39, %250 %254 = fadd float %253, %240 %255 = fmul float %40, %250 %256 = fadd float %255, %247 %257 = fadd float %252, %210 %258 = fadd float %254, %216 %259 = fadd float %256, %222 %260 = fmul float %112, %69 %261 = fadd float %260, %71 %262 = fmul float %113, %70 %263 = fadd float %262, %72 %264 = fmul float %173, %50 %265 = fadd float %264, %51 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %179, float %180, float %173, float %174) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %261, float %263, float %265, float %257) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %258, float %259, float %259, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %171, float %172, float %173, float %174) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[68:71], s[2:3], 0x0 ; C0A20300 s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 s_load_dwordx4 s[12:15], s[8:9], 0x8 ; C0860908 s_load_dwordx4 s[8:11], s[8:9], 0xc ; C084090C s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s35, s[68:71], 0x29 ; C211C529 buffer_load_format_xyzw v[1:4], v0, s[0:3], 0 idxen ; E00C2000 80000100 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[4:7], v0, s[4:7], 0 idxen ; E00C2000 80010400 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[7:10], v0, s[12:15], 0 idxen ; E00C2000 80030700 buffer_load_format_xyzw v[11:14], v0, s[8:11], 0 idxen ; E00C2000 80020B00 s_buffer_load_dword s25, s[68:71], 0x2a ; C20CC52A s_buffer_load_dword s0, s[68:71], 0x32 ; C2004532 s_buffer_load_dword s4, s[68:71], 0x33 ; C2024533 s_buffer_load_dword s67, s[68:71], 0x34 ; C221C534 s_buffer_load_dword s2, s[68:71], 0x1c ; C201451C s_buffer_load_dword s3, s[68:71], 0x1d ; C201C51D s_buffer_load_dword s1, s[68:71], 0x1e ; C200C51E s_buffer_load_dword s72, s[68:71], 0x20 ; C2244520 s_buffer_load_dword s45, s[68:71], 0x21 ; C216C521 s_buffer_load_dword s73, s[68:71], 0x35 ; C224C535 s_buffer_load_dword s74, s[68:71], 0x36 ; C2254536 s_buffer_load_dword s51, s[68:71], 0x38 ; C219C538 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v0, s4 ; 7E000204 s_buffer_load_dword s56, s[68:71], 0x39 ; C21C4539 s_buffer_load_dword s53, s[68:71], 0x3a ; C21AC53A s_buffer_load_dword s44, s[68:71], 0x22 ; C2164522 s_buffer_load_dword s75, s[68:71], 0x24 ; C225C524 s_buffer_load_dword s55, s[68:71], 0x25 ; C21BC525 s_buffer_load_dword s46, s[68:71], 0x26 ; C2174526 s_buffer_load_dword s76, s[68:71], 0x28 ; C2264528 s_buffer_load_dword s57, s[68:71], 0x3c ; C21CC53C s_buffer_load_dword s58, s[68:71], 0x3d ; C21D453D s_buffer_load_dword s54, s[68:71], 0x3e ; C21B453E s_buffer_load_dword s50, s[68:71], 0x40 ; C2194540 s_buffer_load_dword s47, s[68:71], 0x41 ; C217C541 s_buffer_load_dword s48, s[68:71], 0x42 ; C2184542 s_buffer_load_dword s38, s[68:71], 0x48 ; C2134548 s_buffer_load_dword s39, s[68:71], 0x49 ; C213C549 s_buffer_load_dword s36, s[68:71], 0x4a ; C212454A s_buffer_load_dword s41, s[68:71], 0x4b ; C214C54B s_buffer_load_dword s24, s[68:71], 0x4c ; C20C454C s_buffer_load_dword s60, s[68:71], 0x50 ; C21E4550 s_buffer_load_dword s59, s[68:71], 0x51 ; C21DC551 s_buffer_load_dword s5, s[68:71], 0x52 ; C202C552 s_buffer_load_dword s6, s[68:71], 0x53 ; C2034553 s_buffer_load_dword s33, s[68:71], 0x54 ; C210C554 s_buffer_load_dword s28, s[68:71], 0x55 ; C20E4555 s_buffer_load_dword s29, s[68:71], 0x56 ; C20EC556 s_buffer_load_dword s26, s[68:71], 0x57 ; C20D4557 s_buffer_load_dword s27, s[68:71], 0x58 ; C20DC558 s_buffer_load_dword s9, s[68:71], 0xd ; C204C50D s_buffer_load_dword s8, s[68:71], 0xe ; C204450E s_buffer_load_dword s4, s[68:71], 0xf ; C202450F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v7, s5 ; 7E0E0205 s_buffer_load_dword s17, s[68:71], 0x10 ; C208C510 v_mov_b32_e32 v8, s6 ; 7E100206 s_buffer_load_dword s22, s[68:71], 0x11 ; C20B4511 s_buffer_load_dword s34, s[68:71], 0x59 ; C2114559 s_buffer_load_dword s30, s[68:71], 0x5a ; C20F455A s_buffer_load_dword s31, s[68:71], 0x5b ; C20FC55B s_buffer_load_dword s19, s[68:71], 0x5c ; C209C55C s_buffer_load_dword s20, s[68:71], 0x5d ; C20A455D s_buffer_load_dword s6, s[68:71], 0x0 ; C2034500 s_buffer_load_dword s11, s[68:71], 0x4 ; C205C504 s_buffer_load_dword s13, s[68:71], 0x5 ; C206C505 s_buffer_load_dword s10, s[68:71], 0x6 ; C2054506 s_buffer_load_dword s7, s[68:71], 0x7 ; C203C507 s_buffer_load_dword s14, s[68:71], 0x8 ; C2074508 s_buffer_load_dword s15, s[68:71], 0x9 ; C207C509 s_buffer_load_dword s12, s[68:71], 0xa ; C206450A s_buffer_load_dword s5, s[68:71], 0xb ; C202C50B s_buffer_load_dword s16, s[68:71], 0xc ; C208450C s_buffer_load_dword s32, s[68:71], 0x12 ; C2104512 s_buffer_load_dword s18, s[68:71], 0x13 ; C2094513 s_buffer_load_dword s42, s[68:71], 0x14 ; C2154514 s_buffer_load_dword s49, s[68:71], 0x15 ; C218C515 s_buffer_load_dword s37, s[68:71], 0x16 ; C212C516 s_buffer_load_dword s21, s[68:71], 0x17 ; C20AC517 s_buffer_load_dword s43, s[68:71], 0x18 ; C215C518 s_buffer_load_dword s52, s[68:71], 0x19 ; C21A4519 s_buffer_load_dword s40, s[68:71], 0x1a ; C214451A s_buffer_load_dword s23, s[68:71], 0x1b ; C20BC51B s_buffer_load_dword s65, s[68:71], 0x5e ; C220C55E s_buffer_load_dword s66, s[68:71], 0x5f ; C221455F s_buffer_load_dword s62, s[68:71], 0x60 ; C21F4560 s_buffer_load_dword s63, s[68:71], 0x61 ; C21FC561 s_buffer_load_dword s61, s[68:71], 0x62 ; C21EC562 s_buffer_load_dword s64, s[68:71], 0x63 ; C2204563 v_mul_f32_e32 v1, s67, v1 ; 10020243 v_mul_f32_e32 v2, s73, v2 ; 10040449 v_mul_f32_e32 v3, s74, v3 ; 1006064A v_mul_f32_e32 v9, s72, v4 ; 10120848 v_mul_f32_e32 v13, s75, v4 ; 101A084B v_mul_f32_e32 v4, s76, v4 ; 1008084C v_mac_f32_e32 v7, s60, v11 ; 3E0E163C v_mac_f32_e32 v8, s59, v12 ; 3E10183B v_mul_f32_e32 v11, s51, v1 ; 10160233 v_mul_f32_e32 v12, s56, v1 ; 10180238 v_mul_f32_e32 v14, s53, v1 ; 101C0235 v_mac_f32_e32 v9, s45, v5 ; 3E120A2D v_mac_f32_e32 v13, s55, v5 ; 3E1A0A37 v_mac_f32_e32 v4, s35, v5 ; 3E080A23 v_mac_f32_e32 v11, s57, v2 ; 3E160439 v_mac_f32_e32 v12, s58, v2 ; 3E18043A v_mac_f32_e32 v14, s54, v2 ; 3E1C0436 v_mac_f32_e32 v9, s44, v6 ; 3E120C2C v_mac_f32_e32 v13, s46, v6 ; 3E1A0C2E v_mac_f32_e32 v4, s25, v6 ; 3E080C19 v_mac_f32_e32 v11, s50, v3 ; 3E160632 v_mac_f32_e32 v12, s47, v3 ; 3E18062F v_mac_f32_e32 v14, s48, v3 ; 3E1C0630 v_mul_f32_e32 v5, v10, v10 ; 100A150A v_mad_f32 v6, -v10, v10, 1.0 ; D2820006 23CA150A v_mul_f32_e32 v1, v1, v6 ; 10020D01 v_mul_f32_e32 v3, v3, v6 ; 10060D03 v_mac_f32_e32 v1, v11, v5 ; 3E020B0B v_mac_f32_e32 v3, v14, v5 ; 3E060B0E v_mac_f32_e32 v2, 0, v12 ; 3E041880 v_mul_f32_e32 v5, s38, v1 ; 100A0226 v_mac_f32_e32 v5, s39, v2 ; 3E0A0427 v_mac_f32_e32 v5, s36, v3 ; 3E0A0624 v_add_f32_e32 v5, s41, v5 ; 060A0A29 v_mad_f32 v6, -v5, s38, v1 ; D2820006 24044D05 v_mad_f32 v10, -v5, s39, v2 ; D282000A 24084F05 v_mad_f32 v5, -v5, s36, v3 ; D2820005 240C4905 v_sub_f32_e64 v11, 1.0, s24 ; D208000B 000030F2 v_mul_f32_e32 v6, v6, v11 ; 100C1706 v_mul_f32_e32 v10, v10, v11 ; 1014170A v_mul_f32_e32 v5, v5, v11 ; 100A1705 v_mac_f32_e32 v6, s24, v1 ; 3E0C0218 v_mac_f32_e32 v10, s24, v2 ; 3E140418 v_mac_f32_e32 v5, s24, v3 ; 3E0A0618 v_mul_f32_e32 v1, s33, v6 ; 10020C21 v_mul_f32_e32 v2, s28, v6 ; 10040C1C v_mul_f32_e32 v3, s29, v6 ; 10060C1D v_mul_f32_e32 v6, s26, v6 ; 100C0C1A v_mac_f32_e32 v1, s27, v10 ; 3E02141B v_mul_f32_e32 v11, v9, v9 ; 10161309 v_mac_f32_e32 v11, v13, v13 ; 3E161B0D v_mac_f32_e32 v11, v4, v4 ; 3E160904 v_rsq_clamp_f32_e32 v11, v11 ; 7E16590B s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v2, s34, v10 ; 3E041422 v_mac_f32_e32 v3, s30, v10 ; 3E06141E v_mac_f32_e32 v6, s31, v10 ; 3E0C141F v_mul_f32_e32 v10, v11, v13 ; 10141B0B v_mul_f32_e32 v4, v11, v4 ; 1008090B v_mul_f32_e32 v12, v4, v10 ; 10181504 v_mul_f32_e32 v13, s22, v12 ; 101A1816 v_mul_f32_e32 v14, s49, v12 ; 101C1831 v_mul_f32_e32 v12, s52, v12 ; 10181834 v_mul_f32_e32 v9, v11, v9 ; 1012130B v_mul_f32_e32 v11, v10, v9 ; 1016130A v_mac_f32_e32 v13, s17, v11 ; 3E1A1611 v_mac_f32_e32 v14, s42, v11 ; 3E1C162A v_mac_f32_e32 v12, s43, v11 ; 3E18162B v_mac_f32_e32 v1, s19, v5 ; 3E020A13 v_mac_f32_e32 v2, s20, v5 ; 3E040A14 v_mac_f32_e32 v3, s65, v5 ; 3E060A41 v_mac_f32_e32 v6, s66, v5 ; 3E0C0A42 v_mul_f32_e32 v5, v4, v4 ; 100A0904 v_mac_f32_e32 v13, s32, v5 ; 3E1A0A20 v_mac_f32_e32 v14, s37, v5 ; 3E1C0A25 v_mac_f32_e32 v12, s40, v5 ; 3E180A28 v_mul_f32_e32 v5, s13, v10 ; 100A140D v_mac_f32_e32 v5, s11, v9 ; 3E0A120B v_mul_f32_e32 v11, s15, v10 ; 1016140F v_mac_f32_e32 v11, s14, v9 ; 3E16120E v_mul_f32_e32 v15, s9, v10 ; 101E1409 v_mac_f32_e32 v15, s16, v9 ; 3E1E1210 v_mac_f32_e32 v5, s10, v4 ; 3E0A080A v_mac_f32_e32 v11, s12, v4 ; 3E16080C v_mac_f32_e32 v15, s8, v4 ; 3E1E0808 v_mul_f32_e32 v4, v9, v4 ; 10080909 v_mac_f32_e32 v13, s18, v4 ; 3E1A0812 v_mac_f32_e32 v14, s21, v4 ; 3E1C0815 v_mac_f32_e32 v12, s23, v4 ; 3E180817 v_mul_f32_e32 v4, v10, v10 ; 1008150A v_mad_f32 v4, v9, v9, -v4 ; D2820004 84121309 v_mac_f32_e32 v13, s2, v4 ; 3E1A0802 v_mac_f32_e32 v14, s3, v4 ; 3E1C0803 v_mac_f32_e32 v12, s1, v4 ; 3E180801 v_add_f32_e32 v1, s62, v1 ; 0602023E v_add_f32_e32 v2, s63, v2 ; 0604043F v_add_f32_e32 v4, s64, v6 ; 06080C40 v_mul_f32_e32 v6, 0.5, v2 ; 100C04F0 v_mul_f32_e32 v9, 0.5, v4 ; 101208F0 v_mad_f32 v10, 0.5, v1, v9 ; D282000A 042602F0 v_mac_f32_e32 v9, s6, v6 ; 3E120C06 v_add_f32_e32 v5, s7, v5 ; 060A0A07 v_add_f32_e32 v3, s61, v3 ; 0606063D v_mac_f32_e32 v0, s0, v3 ; 3E000600 v_add_f32_e32 v5, v5, v13 ; 060A1B05 exp 15, 32, 0, 0, 0, v10, v9, v3, v4 ; F800020F 0403090A exp 15, 33, 0, 0, 0, v7, v8, v0, v5 ; F800021F 05000807 s_waitcnt expcnt(0) ; BF8C070F v_add_f32_e32 v0, s5, v11 ; 06001605 v_add_f32_e32 v5, s4, v15 ; 060A1E04 v_add_f32_e32 v0, v0, v14 ; 06001D00 v_add_f32_e32 v5, v5, v12 ; 060A1905 v_mov_b32_e32 v6, 0 ; 7E0C0280 exp 15, 34, 0, 0, 0, v0, v5, v5, v6 ; F800022F 06050500 exp 15, 12, 0, 1, 0, v1, v2, v3, v4 ; F80008CF 04030201 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 80 VGPRS: 16 Code Size: 900 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL CONST[0..3] DCL CONST[7] DCL CONST[9] DCL TEMP[0..5], LOCAL IMM[0] FLT32 { 0.0000, 0.7000, 32.0000, 0.5000} IMM[1] FLT32 { 2.3000, 0.1000, 1.0000, 140.0000} IMM[2] FLT32 { 0.3500, 3.0000, 0.0000, 0.0000} 0: MOV TEMP[0].x, IN[1].wwww 1: MOV TEMP[0].yz, IN[2].yxyy 2: MOV TEMP[1].xy, IN[1].xyyy 3: TEX TEMP[1], TEMP[1], SAMP[0], 2D 4: MUL TEMP[2].xyz, TEMP[1].xyzz, CONST[7].xyzz 5: MOV TEMP[3].x, IMM[0].xxxx 6: MUL TEMP[4].x, CONST[0].xxxx, IMM[0].yyyy 7: MOV TEMP[3].y, TEMP[4].xxxx 8: MOV TEMP[5].x, IMM[0].xxxx 9: MOV TEMP[5].y, TEMP[4].xxxx 10: MOV TEMP[4].xy, IN[1].xyyy 11: TEX TEMP[4].x, TEMP[4], SAMP[1], 2D 12: MAD TEMP[3].xy, IN[1].xyyy, IMM[0].zzzz, TEMP[3].xyyy 13: MOV TEMP[3].xy, TEMP[3].xyyy 14: TEX TEMP[3].x, TEMP[3], SAMP[2], 2D 15: MAD TEMP[5].xy, -IN[1].xyyy, IMM[0].zzzz, TEMP[5].xyyy 16: MOV TEMP[5].xy, TEMP[5].xyyy 17: TEX TEMP[5].x, TEMP[5], SAMP[2], 2D 18: ADD TEMP[3].x, TEMP[3].xxxx, TEMP[5].xxxx 19: MUL TEMP[3].x, TEMP[3].xxxx, IMM[0].wwww 20: POW TEMP[3].x, TEMP[3].xxxx, IMM[1].xxxx 21: MUL TEMP[3].x, TEMP[4].xxxx, TEMP[3].xxxx 22: MUL TEMP[3].x, TEMP[3].xxxx, IMM[1].yyyy 23: FSLT TEMP[4].x, TEMP[1].wwww, CONST[9].xxxx 24: AND TEMP[4].x, TEMP[4].xxxx, IMM[1].zzzz 25: KILL_IF -TEMP[4].xxxx 26: MOV TEMP[4].xy, IN[0].xyyy 27: MOV TEMP[4].w, IN[0].wwww 28: TXP TEMP[4], TEMP[4], SAMP[3], 2D 29: LG2 TEMP[5].x, TEMP[4].xxxx 30: LG2 TEMP[5].y, TEMP[4].yyyy 31: LG2 TEMP[5].z, TEMP[4].zzzz 32: LG2 TEMP[5].w, TEMP[4].wwww 33: MOV TEMP[4], -TEMP[5] 34: ADD TEMP[0].xyz, TEMP[4].xyzz, TEMP[0].xyzz 35: MUL TEMP[4].xyz, TEMP[4].wwww, CONST[3] 36: MUL TEMP[5].xyz, TEMP[2].xyzz, TEMP[0].xyzz 37: MAD TEMP[5].xyz, TEMP[0].xyzz, TEMP[4].xyzz, TEMP[5].xyzz 38: MAD TEMP[1].x, TEMP[4].xyzz, CONST[2].wwww, TEMP[1].wwww 39: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx 40: ADD TEMP[0].xyz, IMM[2].xxxx, -TEMP[0].xyzz 41: MUL TEMP[0].xyz, TEMP[0].xyzz, IMM[2].yyyy 42: MOV_SAT TEMP[0].xyz, TEMP[0].xyzz 43: MUL TEMP[0].xyz, IMM[1].wwww, TEMP[0].xyzz 44: MAD TEMP[5].xyz, TEMP[2].xyzz, TEMP[0].xyzz, TEMP[5].xyzz 45: MOV TEMP[0].w, TEMP[1].xxxx 46: ADD TEMP[0].xyz, TEMP[5].xyzz, TEMP[3].xxxx 47: MOV_SAT TEMP[1].x, IN[1].zzzz 48: LRP TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[1].xyzz 49: MOV OUT[0], TEMP[0] 50: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %36 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %37 = load <32 x i8>, <32 x i8> addrspace(2)* %36, align 32, !tbaa !0 %38 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %39 = load <16 x i8>, <16 x i8> addrspace(2)* %38, align 16, !tbaa !0 %40 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %41 = bitcast <8 x i32> addrspace(2)* %40 to <32 x i8> addrspace(2)* %42 = load <32 x i8>, <32 x i8> addrspace(2)* %41, align 32, !tbaa !0 %43 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %44 = bitcast <4 x i32> addrspace(2)* %43 to <16 x i8> addrspace(2)* %45 = load <16 x i8>, <16 x i8> addrspace(2)* %44, align 16, !tbaa !0 %46 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %47 = load <8 x i32>, <8 x i32> addrspace(2)* %46, align 32, !tbaa !0 %48 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %49 = load <4 x i32>, <4 x i32> addrspace(2)* %48, align 16, !tbaa !0 %50 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %51 = bitcast <8 x i32> addrspace(2)* %50 to <32 x i8> addrspace(2)* %52 = load <32 x i8>, <32 x i8> addrspace(2)* %51, align 32, !tbaa !0 %53 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %54 = bitcast <4 x i32> addrspace(2)* %53 to <16 x i8> addrspace(2)* %55 = load <16 x i8>, <16 x i8> addrspace(2)* %54, align 16, !tbaa !0 %56 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %57 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %58 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %59 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %60 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %61 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %62 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %63 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %64 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %65 = bitcast float %59 to i32 %66 = bitcast float %60 to i32 %67 = insertelement <2 x i32> undef, i32 %65, i32 0 %68 = insertelement <2 x i32> %67, i32 %66, i32 1 %69 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %68, <32 x i8> %37, <16 x i8> %39, i32 2) %70 = extractelement <4 x float> %69, i32 0 %71 = extractelement <4 x float> %69, i32 1 %72 = extractelement <4 x float> %69, i32 2 %73 = extractelement <4 x float> %69, i32 3 %74 = fmul float %70, %32 %75 = fmul float %71, %33 %76 = fmul float %72, %34 %77 = fmul float %24, 0x3FE6666660000000 %78 = bitcast float %59 to i32 %79 = bitcast float %60 to i32 %80 = insertelement <2 x i32> undef, i32 %78, i32 0 %81 = insertelement <2 x i32> %80, i32 %79, i32 1 %82 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %81, <32 x i8> %42, <16 x i8> %45, i32 2) %83 = extractelement <4 x float> %82, i32 0 %84 = fmul float %59, 3.200000e+01 %85 = fadd float %84, 0.000000e+00 %86 = fmul float %60, 3.200000e+01 %87 = fadd float %86, %77 %88 = bitcast float %85 to i32 %89 = bitcast float %87 to i32 %90 = insertelement <2 x i32> undef, i32 %88, i32 0 %91 = insertelement <2 x i32> %90, i32 %89, i32 1 %92 = bitcast <8 x i32> %47 to <32 x i8> %93 = bitcast <4 x i32> %49 to <16 x i8> %94 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %91, <32 x i8> %92, <16 x i8> %93, i32 2) %95 = extractelement <4 x float> %94, i32 0 %96 = fmul float %59, -3.200000e+01 %97 = fadd float %96, 0.000000e+00 %98 = fmul float %60, -3.200000e+01 %99 = fadd float %98, %77 %100 = bitcast float %97 to i32 %101 = bitcast float %99 to i32 %102 = insertelement <2 x i32> undef, i32 %100, i32 0 %103 = insertelement <2 x i32> %102, i32 %101, i32 1 %104 = bitcast <8 x i32> %47 to <32 x i8> %105 = bitcast <4 x i32> %49 to <16 x i8> %106 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %103, <32 x i8> %104, <16 x i8> %105, i32 2) %107 = extractelement <4 x float> %106, i32 0 %108 = fadd float %95, %107 %109 = fmul float %108, 5.000000e-01 %110 = call float @llvm.pow.f32(float %109, float 0x4002666660000000) %111 = fmul float %83, %110 %112 = fmul float %111, 0x3FB99999A0000000 %113 = fcmp olt float %73, %35 %114 = select i1 %113, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %114) %115 = fdiv float %56, %58 %116 = fdiv float %57, %58 %117 = bitcast float %115 to i32 %118 = bitcast float %116 to i32 %119 = insertelement <2 x i32> undef, i32 %117, i32 0 %120 = insertelement <2 x i32> %119, i32 %118, i32 1 %121 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %120, <32 x i8> %52, <16 x i8> %55, i32 2) %122 = extractelement <4 x float> %121, i32 0 %123 = extractelement <4 x float> %121, i32 1 %124 = extractelement <4 x float> %121, i32 2 %125 = extractelement <4 x float> %121, i32 3 %126 = call float @llvm.log2.f32(float %122) %127 = call float @llvm.log2.f32(float %123) %128 = call float @llvm.log2.f32(float %124) %129 = call float @llvm.log2.f32(float %125) %130 = fsub float -0.000000e+00, %129 %131 = fsub float %62, %126 %132 = fsub float %63, %127 %133 = fsub float %64, %128 %134 = fmul float %29, %130 %135 = fmul float %30, %130 %136 = fmul float %31, %130 %137 = fmul float %74, %131 %138 = fmul float %75, %132 %139 = fmul float %76, %133 %140 = fmul float %131, %134 %141 = fadd float %140, %137 %142 = fmul float %132, %135 %143 = fadd float %142, %138 %144 = fmul float %133, %136 %145 = fadd float %144, %139 %146 = fmul float %134, %28 %147 = fadd float %146, %73 %148 = fmul float %74, %112 %149 = fmul float %75, %112 %150 = fmul float %76, %112 %151 = fsub float 0x3FD6666660000000, %131 %152 = fsub float 0x3FD6666660000000, %132 %153 = fsub float 0x3FD6666660000000, %133 %154 = fmul float %151, 3.000000e+00 %155 = fmul float %152, 3.000000e+00 %156 = fmul float %153, 3.000000e+00 %157 = call float @llvm.AMDIL.clamp.(float %154, float 0.000000e+00, float 1.000000e+00) %158 = call float @llvm.AMDIL.clamp.(float %155, float 0.000000e+00, float 1.000000e+00) %159 = call float @llvm.AMDIL.clamp.(float %156, float 0.000000e+00, float 1.000000e+00) %160 = fmul float %157, 1.400000e+02 %161 = fmul float %158, 1.400000e+02 %162 = fmul float %159, 1.400000e+02 %163 = fmul float %148, %160 %164 = fadd float %163, %141 %165 = fmul float %149, %161 %166 = fadd float %165, %143 %167 = fmul float %150, %162 %168 = fadd float %167, %145 %169 = fadd float %164, %112 %170 = fadd float %166, %112 %171 = fadd float %168, %112 %172 = call float @llvm.AMDIL.clamp.(float %61, float 0.000000e+00, float 1.000000e+00) %173 = call float @llvm.AMDGPU.lrp(float %172, float %169, float %25) %174 = call float @llvm.AMDGPU.lrp(float %172, float %170, float %26) %175 = call float @llvm.AMDGPU.lrp(float %172, float %171, float %27) %176 = call i32 @llvm.SI.packf16(float %173, float %174) %177 = bitcast i32 %176 to float %178 = call i32 @llvm.SI.packf16(float %175, float %147) %179 = bitcast i32 %178 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %177, float %179, float %177, float %179) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 3, 0, [m0] ; C8100300 v_interp_p2_f32 v4, [v4], v1, 3, 0, [m0] ; C8110301 v_interp_p1_f32 v5, v0, 0, 1, [m0] ; C8140400 v_interp_p2_f32 v5, [v5], v1, 0, 1, [m0] ; C8150401 v_interp_p1_f32 v6, v0, 1, 1, [m0] ; C8180500 v_interp_p2_f32 v6, [v6], v1, 1, 1, [m0] ; C8190501 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s48, s[0:3], 0x0 ; C2180100 v_interp_p1_f32 v7, v0, 2, 1, [m0] ; C81C0600 v_interp_p2_f32 v7, [v7], v1, 2, 1, [m0] ; C81D0601 v_interp_p1_f32 v8, v0, 3, 1, [m0] ; C8200700 v_interp_p2_f32 v8, [v8], v1, 3, 1, [m0] ; C8210701 v_interp_p1_f32 v9, v0, 0, 2, [m0] ; C8240800 v_interp_p2_f32 v9, [v9], v1, 0, 2, [m0] ; C8250801 v_interp_p1_f32 v0, v0, 1, 2, [m0] ; C8000900 v_interp_p2_f32 v0, [v0], v1, 1, 2, [m0] ; C8010901 s_load_dwordx4 s[36:39], s[4:5], 0x4 ; C0920504 s_load_dwordx4 s[20:23], s[4:5], 0x8 ; C08A0508 s_load_dwordx8 s[52:59], s[6:7], 0x0 ; C0DA0700 s_load_dwordx8 s[40:47], s[6:7], 0x8 ; C0D40708 s_load_dwordx4 s[8:11], s[4:5], 0xc ; C084050C s_load_dwordx8 s[24:31], s[6:7], 0x10 ; C0CC0710 s_load_dwordx8 s[12:19], s[6:7], 0x18 ; C0C60718 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[52:59], s[32:35] ; F0800F00 010D0A05 image_sample v1, 1, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[40:47], s[36:39] ; F0800100 012A0105 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_buffer_load_dword s6, s[0:3], 0x6 ; C2030106 s_buffer_load_dword s7, s[0:3], 0xb ; C203810B v_mov_b32_e32 v14, 0x3f333333 ; 7E1C02FF 3F333333 v_mul_f32_e32 v15, s48, v14 ; 101E1C30 v_mov_b32_e32 v14, 0x42000000 ; 7E1C02FF 42000000 v_mad_f32 v16, v5, v14, 0 ; D2820010 02021D05 v_mad_f32 v17, v14, v6, v15 ; D2820011 043E0D0E v_mov_b32_e32 v18, 0xc2000000 ; 7E2402FF C2000000 v_mad_f32 v14, v5, v18, 0 ; D282000E 02022505 v_mac_f32_e32 v15, v18, v6 ; 3E1E0D12 s_buffer_load_dword s32, s[0:3], 0x24 ; C2100124 image_sample v5, 1, 0, 0, 0, 0, 0, 0, 0, v[16:17], s[24:31], s[20:23] ; F0800100 00A60510 image_sample v6, 1, 0, 0, 0, 0, 0, 0, 0, v[14:15], s[24:31], s[20:23] ; F0800100 00A6060E s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_add_f32_e32 v5, v6, v5 ; 060A0B06 v_mul_f32_e32 v5, 0.5, v5 ; 100A0AF0 s_buffer_load_dword s20, s[0:3], 0x1c ; C20A011C s_buffer_load_dword s21, s[0:3], 0x1d ; C20A811D s_buffer_load_dword s22, s[0:3], 0x1e ; C20B011E v_log_f32_e32 v5, v5 ; 7E0A4F05 v_cmp_gt_f32_e32 vcc, s32, v13 ; 7C081A20 v_cndmask_b32_e64 v6, 0, -1.0, vcc ; D2000006 01A9E680 v_mul_legacy_f32_e32 v5, 0x40133333, v5 ; 0E0A0AFF 40133333 v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_mul_f32_e32 v1, v5, v1 ; 10020305 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s20, v10 ; 100A1414 v_mul_f32_e32 v10, s21, v11 ; 10141615 v_mul_f32_e32 v11, s22, v12 ; 10161816 v_mov_b32_e32 v12, 0x3dcccccd ; 7E1802FF 3DCCCCCD v_mul_f32_e32 v14, v12, v1 ; 101C030C v_cmpx_le_f32_e32 vcc, 0, v6 ; 7C260C80 v_mov_b32_e32 v6, 0x6f800000 ; 7E0C02FF 6F800000 v_cmp_gt_f32_e64 vcc, |v4|, v6 ; D008016A 00020D04 v_mov_b32_e32 v6, 0x2f800000 ; 7E0C02FF 2F800000 v_cndmask_b32_e32 v6, 1.0, v6 ; 000C0CF2 v_mul_f32_e32 v4, v6, v4 ; 10080906 v_rcp_f32_e32 v4, v4 ; 7E085504 v_mul_f32_e32 v2, v4, v2 ; 10040504 v_mul_f32_e32 v3, v4, v3 ; 10060704 v_mul_f32_e32 v15, v2, v6 ; 101E0D02 s_buffer_load_dword s20, s[0:3], 0xc ; C20A010C v_mul_f32_e32 v16, v3, v6 ; 10200D03 image_sample v[15:18], 15, 0, 0, 0, 0, 0, 0, 0, v[15:16], s[12:19], s[8:11] ; F0800F00 00430F0F s_buffer_load_dword s8, s[0:3], 0xd ; C204010D s_buffer_load_dword s0, s[0:3], 0xe ; C200010E s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_log_f32_e32 v2, v15 ; 7E044F0F v_log_f32_e32 v3, v18 ; 7E064F12 v_log_f32_e32 v4, v16 ; 7E084F10 v_log_f32_e32 v6, v17 ; 7E0C4F11 v_subrev_f32_e32 v2, v2, v8 ; 0A041102 v_mul_f32_e64 v8, s20, -v3 ; D2100008 40020614 v_mac_f32_e32 v13, s7, v8 ; 3E1A1007 v_mul_f32_e32 v15, v2, v5 ; 101E0B02 v_mac_f32_e32 v15, v8, v2 ; 3E1E0508 v_mul_f32_e32 v5, v14, v5 ; 100A0B0E v_mov_b32_e32 v8, 0x3eb33333 ; 7E1002FF 3EB33333 v_subrev_f32_e32 v2, v2, v8 ; 0A041102 v_mov_b32_e32 v16, 0x40400000 ; 7E2002FF 40400000 v_mul_f32_e32 v2, v16, v2 ; 10040510 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_mov_b32_e32 v17, 0x430c0000 ; 7E2202FF 430C0000 v_mul_f32_e32 v2, v17, v2 ; 10040511 v_mac_f32_e32 v15, v2, v5 ; 3E1E0B02 v_subrev_f32_e32 v2, v4, v9 ; 0A041304 v_mul_f32_e32 v4, s8, v3 ; 10080608 v_mul_f32_e32 v4, v4, v2 ; 10080504 v_mad_f32 v4, v10, v2, -v4 ; D2820004 8412050A v_mul_f32_e32 v5, v14, v10 ; 100A150E v_subrev_f32_e32 v2, v2, v8 ; 0A041102 v_mul_f32_e32 v2, v16, v2 ; 10040510 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_mul_f32_e32 v2, v17, v2 ; 10040511 v_mac_f32_e32 v4, v2, v5 ; 3E080B02 v_subrev_f32_e32 v0, v6, v0 ; 0A000106 v_mul_f32_e32 v2, s0, v3 ; 10040600 v_mul_f32_e32 v2, v2, v0 ; 10040102 v_mad_f32 v2, v11, v0, -v2 ; D2820002 840A010B v_mul_f32_e32 v3, v14, v11 ; 1006170E v_subrev_f32_e32 v0, v0, v8 ; 0A001100 v_mul_f32_e32 v0, v16, v0 ; 10000110 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_mul_f32_e32 v0, v17, v0 ; 10000111 v_mac_f32_e32 v2, v0, v3 ; 3E040700 v_mac_f32_e32 v15, v12, v1 ; 3E1E030C v_mac_f32_e32 v4, v12, v1 ; 3E08030C v_mac_f32_e32 v2, v12, v1 ; 3E04030C v_add_f32_e64 v0, 0, v7 clamp ; D2060800 00020E80 v_sub_f32_e32 v1, 1.0, v0 ; 080200F2 v_mul_f32_e32 v3, s4, v1 ; 10060204 v_mac_f32_e32 v3, v15, v0 ; 3E06010F v_mul_f32_e32 v5, s5, v1 ; 100A0205 v_mac_f32_e32 v5, v4, v0 ; 3E0A0104 v_mul_f32_e32 v1, s6, v1 ; 10020206 v_mac_f32_e32 v1, v2, v0 ; 3E020102 v_cvt_pkrtz_f16_f32_e32 v0, v1, v13 ; 5E001B01 v_cvt_pkrtz_f16_f32_e32 v1, v3, v5 ; 5E020B03 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 64 VGPRS: 20 Code Size: 656 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..8] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[3].xyxx 1: MOV TEMP[1].x, IN[2].yyyy 2: ADD TEMP[2].xyz, IN[0].xyzz, -CONST[3].xyzz 3: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz 4: FSLT TEMP[3].x, CONST[4].xxxx, TEMP[3].xxxx 5: UIF TEMP[3].xxxx :0 6: MOV TEMP[1].x, IMM[0].xxxx 7: MOV TEMP[0].xy, IMM[0].xxxx 8: ENDIF 9: MAD TEMP[2].xyz, CONST[0].xyzz, TEMP[0].xxxx, IN[0].xyzz 10: LRP TEMP[1].x, CONST[3].wwww, TEMP[1].xxxx, TEMP[0].yyyy 11: MAD TEMP[2].xyz, CONST[1].xyzz, TEMP[1].xxxx, TEMP[2].xyzz 12: ABS TEMP[0].x, TEMP[0].xxxx 13: MUL TEMP[0].xyz, CONST[2].xyzz, TEMP[0].xxxx 14: MAD TEMP[2].xyz, TEMP[0].xyzz, CONST[1].wwww, TEMP[2].xyzz 15: MUL TEMP[0], CONST[5], TEMP[2].xxxx 16: MAD TEMP[0], CONST[6], TEMP[2].yyyy, TEMP[0] 17: MAD TEMP[2], CONST[7], TEMP[2].zzzz, TEMP[0] 18: MAD TEMP[2], CONST[8], IN[0].wwww, TEMP[2] 19: MOV TEMP[0].x, IN[2].xxxx 20: FSLT TEMP[1].x, IMM[0].xxxx, IN[2].yyyy 21: AND TEMP[1].x, TEMP[1].xxxx, IMM[0].yyyy 22: MOV TEMP[0].y, TEMP[1].xxxx 23: MOV TEMP[0].xy, TEMP[0].xyxx 24: MOV TEMP[0].z, TEMP[2].zzzz 25: MOV OUT[2], TEMP[0] 26: MOV OUT[0], TEMP[2] 27: MOV OUT[1], IN[1] 28: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %44 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %45 = load <16 x i8>, <16 x i8> addrspace(2)* %44, align 16, !tbaa !0 %46 = add i32 %5, %7 %47 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %45, i32 0, i32 %46) %48 = extractelement <4 x float> %47, i32 0 %49 = extractelement <4 x float> %47, i32 1 %50 = extractelement <4 x float> %47, i32 2 %51 = extractelement <4 x float> %47, i32 3 %52 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %53 = load <16 x i8>, <16 x i8> addrspace(2)* %52, align 16, !tbaa !0 %54 = add i32 %5, %7 %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %54) %56 = extractelement <4 x float> %55, i32 0 %57 = extractelement <4 x float> %55, i32 1 %58 = extractelement <4 x float> %55, i32 2 %59 = extractelement <4 x float> %55, i32 3 %60 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %61 = load <16 x i8>, <16 x i8> addrspace(2)* %60, align 16, !tbaa !0 %62 = add i32 %5, %7 %63 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %61, i32 0, i32 %62) %64 = extractelement <4 x float> %63, i32 0 %65 = extractelement <4 x float> %63, i32 1 %66 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %67 = load <16 x i8>, <16 x i8> addrspace(2)* %66, align 16, !tbaa !0 %68 = add i32 %5, %7 %69 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %67, i32 0, i32 %68) %70 = extractelement <4 x float> %69, i32 0 %71 = extractelement <4 x float> %69, i32 1 %72 = fsub float %48, %23 %73 = fsub float %49, %24 %74 = fsub float %50, %25 %75 = fmul float %72, %72 %76 = fmul float %73, %73 %77 = fadd float %76, %75 %78 = fmul float %74, %74 %79 = fadd float %77, %78 %80 = fcmp olt float %27, %79 %. = select i1 %80, float 0.000000e+00, float %70 %.16 = select i1 %80, float 0.000000e+00, float %71 %.17 = select i1 %80, float 0.000000e+00, float %65 %81 = fmul float %13, %. %82 = fadd float %81, %48 %83 = fmul float %14, %. %84 = fadd float %83, %49 %85 = fmul float %15, %. %86 = fadd float %85, %50 %87 = call float @llvm.AMDGPU.lrp(float %26, float %.17, float %.16) %88 = fmul float %16, %87 %89 = fadd float %88, %82 %90 = fmul float %17, %87 %91 = fadd float %90, %84 %92 = fmul float %18, %87 %93 = fadd float %92, %86 %94 = call float @llvm.fabs.f32(float %.) %95 = fmul float %20, %94 %96 = fmul float %21, %94 %97 = fmul float %22, %94 %98 = fmul float %95, %19 %99 = fadd float %98, %89 %100 = fmul float %96, %19 %101 = fadd float %100, %91 %102 = fmul float %97, %19 %103 = fadd float %102, %93 %104 = fmul float %28, %99 %105 = fmul float %29, %99 %106 = fmul float %30, %99 %107 = fmul float %31, %99 %108 = fmul float %32, %101 %109 = fadd float %108, %104 %110 = fmul float %33, %101 %111 = fadd float %110, %105 %112 = fmul float %34, %101 %113 = fadd float %112, %106 %114 = fmul float %35, %101 %115 = fadd float %114, %107 %116 = fmul float %36, %103 %117 = fadd float %116, %109 %118 = fmul float %37, %103 %119 = fadd float %118, %111 %120 = fmul float %38, %103 %121 = fadd float %120, %113 %122 = fmul float %39, %103 %123 = fadd float %122, %115 %124 = fmul float %40, %51 %125 = fadd float %124, %117 %126 = fmul float %41, %51 %127 = fadd float %126, %119 %128 = fmul float %42, %51 %129 = fadd float %128, %121 %130 = fmul float %43, %51 %131 = fadd float %130, %123 %132 = fcmp ogt float %65, 0.000000e+00 %133 = select i1 %132, float 1.000000e+00, float 0.000000e+00 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %56, float %57, float %58, float %59) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %64, float %133, float %129, float %115) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %125, float %127, float %129, float %131) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 s_load_dwordx4 s[20:23], s[8:9], 0xc ; C08A090C v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 buffer_load_format_xyzw v[9:12], v0, s[16:19], 0 idxen ; E00C2000 80040900 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[11:14], v0, s[20:23], 0 idxen ; E00C2000 80050B00 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x4 ; C2038104 s_buffer_load_dword s8, s[0:3], 0x5 ; C2040105 s_buffer_load_dword s9, s[0:3], 0x6 ; C2048106 s_buffer_load_dword s10, s[0:3], 0x7 ; C2050107 s_buffer_load_dword s11, s[0:3], 0x8 ; C2058108 s_buffer_load_dword s12, s[0:3], 0x9 ; C2060109 s_buffer_load_dword s13, s[0:3], 0xa ; C206810A s_buffer_load_dword s14, s[0:3], 0xc ; C207010C s_buffer_load_dword s15, s[0:3], 0xd ; C207810D s_buffer_load_dword s16, s[0:3], 0xe ; C208010E s_buffer_load_dword s17, s[0:3], 0xf ; C208810F s_buffer_load_dword s18, s[0:3], 0x10 ; C2090110 s_buffer_load_dword s19, s[0:3], 0x14 ; C2098114 s_buffer_load_dword s20, s[0:3], 0x15 ; C20A0115 s_buffer_load_dword s21, s[0:3], 0x16 ; C20A8116 s_buffer_load_dword s22, s[0:3], 0x17 ; C20B0117 s_buffer_load_dword s23, s[0:3], 0x18 ; C20B8118 s_buffer_load_dword s24, s[0:3], 0x19 ; C20C0119 s_buffer_load_dword s25, s[0:3], 0x1a ; C20C811A s_buffer_load_dword s26, s[0:3], 0x1b ; C20D011B s_buffer_load_dword s27, s[0:3], 0x1c ; C20D811C s_buffer_load_dword s28, s[0:3], 0x1d ; C20E011D s_buffer_load_dword s29, s[0:3], 0x1e ; C20E811E s_buffer_load_dword s30, s[0:3], 0x1f ; C20F011F s_buffer_load_dword s31, s[0:3], 0x20 ; C20F8120 s_buffer_load_dword s32, s[0:3], 0x21 ; C2100121 s_buffer_load_dword s33, s[0:3], 0x22 ; C2108122 s_buffer_load_dword s0, s[0:3], 0x23 ; C2000123 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v0, s14, v1 ; 0A00020E v_subrev_f32_e32 v13, s15, v2 ; 0A1A040F v_mul_f32_e32 v0, v0, v0 ; 10000100 v_mac_f32_e32 v0, v13, v13 ; 3E001B0D v_subrev_f32_e32 v13, s16, v3 ; 0A1A0610 v_mac_f32_e32 v0, v13, v13 ; 3E001B0D v_cmp_lt_f32_e32 vcc, s18, v0 ; 7C020012 v_cndmask_b32_e64 v0, v11, 0, vcc ; D2000000 01A9010B v_cndmask_b32_e64 v11, v12, 0, vcc ; D200000B 01A9010C v_cndmask_b32_e64 v12, v10, 0, vcc ; D200000C 01A9010A v_sub_f32_e64 v13, 1.0, s17 ; D208000D 000022F2 v_mul_f32_e32 v11, v11, v13 ; 10161B0B v_mac_f32_e32 v11, s17, v12 ; 3E161811 v_mad_f32 v1, s4, v0, v1 ; D2820001 04060004 v_mac_f32_e32 v1, s7, v11 ; 3E021607 v_mad_f32 v2, s5, v0, v2 ; D2820002 040A0005 v_mac_f32_e32 v2, s8, v11 ; 3E041608 v_mad_f32 v3, s6, v0, v3 ; D2820003 040E0006 v_mac_f32_e32 v3, s9, v11 ; 3E061609 v_mul_f32_e64 v11, s11, |v0| ; D210020B 0002000B v_mul_f32_e64 v12, s12, |v0| ; D210020C 0002000C v_mul_f32_e64 v0, s13, |v0| ; D2100200 0002000D v_mac_f32_e32 v1, s10, v11 ; 3E02160A v_mac_f32_e32 v2, s10, v12 ; 3E04180A v_mac_f32_e32 v3, s10, v0 ; 3E06000A v_mul_f32_e32 v0, s19, v1 ; 10000213 v_mul_f32_e32 v11, s20, v1 ; 10160214 v_mul_f32_e32 v12, s21, v1 ; 10180215 v_mul_f32_e32 v1, s22, v1 ; 10020216 v_mac_f32_e32 v0, s23, v2 ; 3E000417 v_mac_f32_e32 v11, s24, v2 ; 3E160418 v_mac_f32_e32 v12, s25, v2 ; 3E180419 v_mac_f32_e32 v1, s26, v2 ; 3E02041A v_mac_f32_e32 v0, s27, v3 ; 3E00061B v_mac_f32_e32 v11, s28, v3 ; 3E16061C v_mac_f32_e32 v12, s29, v3 ; 3E18061D v_mad_f32 v2, s30, v3, v1 ; D2820002 0406061E v_mac_f32_e32 v0, s31, v4 ; 3E00081F v_mac_f32_e32 v11, s32, v4 ; 3E160820 v_mac_f32_e32 v12, s33, v4 ; 3E180821 v_mac_f32_e32 v2, s0, v4 ; 3E040800 v_cmp_lt_f32_e32 vcc, 0, v10 ; 7C021480 v_cndmask_b32_e64 v3, 0, 1.0, vcc ; D2000003 01A9E480 exp 15, 32, 0, 0, 0, v5, v6, v7, v8 ; F800020F 08070605 exp 15, 33, 0, 0, 0, v9, v3, v12, v1 ; F800021F 010C0309 exp 15, 12, 0, 1, 0, v0, v11, v12, v2 ; F80008CF 020C0B00 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 16 Code Size: 444 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[0..1] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.7000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[1].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MOV TEMP[1].w, TEMP[0].wwww 3: MUL TEMP[1].xyz, TEMP[0].xyzz, IN[0].xyzz 4: FSLT TEMP[0].x, TEMP[0].wwww, IMM[0].xxxx 5: AND TEMP[0].x, TEMP[0].xxxx, IMM[0].yyyy 6: KILL_IF -TEMP[0].xxxx 7: MAD TEMP[0].x, IN[1].zzzz, CONST[1].zzzz, CONST[1].wwww 8: MOV_SAT TEMP[0].x, TEMP[0].xxxx 9: LRP TEMP[1].xyz, TEMP[0].xxxx, TEMP[1].xyzz, CONST[0].xyzz 10: MOV OUT[0], TEMP[1] 11: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %29 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %30 = load <32 x i8>, <32 x i8> addrspace(2)* %29, align 32, !tbaa !0 %31 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %32 = load <16 x i8>, <16 x i8> addrspace(2)* %31, align 16, !tbaa !0 %33 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %35 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %36 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %37 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %38 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %39 = bitcast float %36 to i32 %40 = bitcast float %37 to i32 %41 = insertelement <2 x i32> undef, i32 %39, i32 0 %42 = insertelement <2 x i32> %41, i32 %40, i32 1 %43 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %42, <32 x i8> %30, <16 x i8> %32, i32 2) %44 = extractelement <4 x float> %43, i32 0 %45 = extractelement <4 x float> %43, i32 1 %46 = extractelement <4 x float> %43, i32 2 %47 = extractelement <4 x float> %43, i32 3 %48 = fmul float %44, %33 %49 = fmul float %45, %34 %50 = fmul float %46, %35 %51 = fcmp olt float %47, 0x3FE6666660000000 %52 = select i1 %51, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %52) %53 = fmul float %38, %27 %54 = fadd float %53, %28 %55 = call float @llvm.AMDIL.clamp.(float %54, float 0.000000e+00, float 1.000000e+00) %56 = call float @llvm.AMDGPU.lrp(float %55, float %48, float %24) %57 = call float @llvm.AMDGPU.lrp(float %55, float %49, float %25) %58 = call float @llvm.AMDGPU.lrp(float %55, float %50, float %26) %59 = call i32 @llvm.SI.packf16(float %56, float %57) %60 = bitcast i32 %59 to float %61 = call i32 @llvm.SI.packf16(float %58, float %47) %62 = bitcast i32 %61 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %60, float %62, float %60, float %62) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 s_load_dwordx4 s[4:7], s[4:5], 0x0 ; C0820500 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 0, 1, [m0] ; C8140400 v_interp_p2_f32 v5, [v5], v1, 0, 1, [m0] ; C8150401 v_interp_p1_f32 v6, v0, 1, 1, [m0] ; C8180500 v_interp_p2_f32 v6, [v6], v1, 1, 1, [m0] ; C8190501 v_interp_p1_f32 v0, v0, 2, 1, [m0] ; C8000600 v_interp_p2_f32 v0, [v0], v1, 2, 1, [m0] ; C8010601 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[5:8], 15, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[8:15], s[4:7] ; F0800F00 00220505 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_buffer_load_dword s5, s[0:3], 0x7 ; C2028107 s_buffer_load_dword s6, s[0:3], 0x0 ; C2030100 s_buffer_load_dword s7, s[0:3], 0x1 ; C2038101 s_buffer_load_dword s0, s[0:3], 0x2 ; C2000102 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v1, v2, v5 ; 10020B02 v_mul_f32_e32 v2, v3, v6 ; 10040D03 v_mul_f32_e32 v3, v4, v7 ; 10060F04 v_mov_b32_e32 v4, 0x3f333333 ; 7E0802FF 3F333333 v_cmp_gt_f32_e32 vcc, v4, v8 ; 7C081104 v_cndmask_b32_e64 v4, 0, -1.0, vcc ; D2000004 01A9E680 v_cmpx_le_f32_e32 vcc, 0, v4 ; 7C260880 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v4, s5 ; 7E080205 v_mac_f32_e32 v4, s4, v0 ; 3E080004 v_add_f32_e64 v0, 0, v4 clamp ; D2060800 00020880 v_sub_f32_e32 v4, 1.0, v0 ; 080800F2 v_mul_f32_e32 v5, s6, v4 ; 100A0806 v_mac_f32_e32 v5, v1, v0 ; 3E0A0101 v_mul_f32_e32 v1, s7, v4 ; 10020807 v_mac_f32_e32 v1, v2, v0 ; 3E020102 v_mul_f32_e32 v2, s0, v4 ; 10040800 v_mac_f32_e32 v2, v3, v0 ; 3E040103 v_cvt_pkrtz_f16_f32_e32 v0, v2, v8 ; 5E001102 v_cvt_pkrtz_f16_f32_e32 v1, v5, v1 ; 5E020305 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 208 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..13] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[2].xyxx 1: MOV TEMP[1].x, IN[1].yyyy 2: ADD TEMP[2].xyz, IN[0].xyzz, -CONST[4].xyzz 3: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz 4: FSLT TEMP[3].x, CONST[5].xxxx, TEMP[3].xxxx 5: UIF TEMP[3].xxxx :0 6: MOV TEMP[1].x, IMM[0].xxxx 7: MOV TEMP[0].xy, IMM[0].xxxx 8: ENDIF 9: MAD TEMP[2].xyz, CONST[1].xyzz, TEMP[0].xxxx, IN[0].xyzz 10: LRP TEMP[1].x, CONST[4].wwww, TEMP[1].xxxx, TEMP[0].yyyy 11: MAD TEMP[2].xyz, CONST[2].xyzz, TEMP[1].xxxx, TEMP[2].xyzz 12: ABS TEMP[0].x, TEMP[0].xxxx 13: MUL TEMP[0].xyz, CONST[3].xyzz, TEMP[0].xxxx 14: MAD TEMP[2].xyz, TEMP[0].xyzz, CONST[2].wwww, TEMP[2].xyzz 15: MOV TEMP[0].x, IN[1].xxxx 16: FSLT TEMP[1].x, IMM[0].xxxx, IN[1].yyyy 17: AND TEMP[1].x, TEMP[1].xxxx, IMM[0].yyyy 18: MOV TEMP[0].y, TEMP[1].xxxx 19: MOV TEMP[1].xyz, IMM[0].xxyx 20: MUL TEMP[3], CONST[10], TEMP[2].xxxx 21: MAD TEMP[3], CONST[11], TEMP[2].yyyy, TEMP[3] 22: MAD TEMP[3], CONST[12], TEMP[2].zzzz, TEMP[3] 23: MAD TEMP[3].z, CONST[13], IN[0].wwww, TEMP[3] 24: MUL TEMP[3].x, TEMP[3].zzzz, CONST[0].wwww 25: MOV TEMP[1].w, -TEMP[3].xxxx 26: MUL TEMP[3], CONST[6], TEMP[2].xxxx 27: MAD TEMP[3], CONST[7], TEMP[2].yyyy, TEMP[3] 28: MAD TEMP[2], CONST[8], TEMP[2].zzzz, TEMP[3] 29: MAD TEMP[2], CONST[9], IN[0].wwww, TEMP[2] 30: MOV TEMP[0].xy, TEMP[0].xyxx 31: MOV OUT[1], TEMP[1] 32: MOV OUT[2], TEMP[0] 33: MOV OUT[0], TEMP[2] 34: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 156) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %49 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %50 = load <16 x i8>, <16 x i8> addrspace(2)* %49, align 16, !tbaa !0 %51 = add i32 %5, %7 %52 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %50, i32 0, i32 %51) %53 = extractelement <4 x float> %52, i32 0 %54 = extractelement <4 x float> %52, i32 1 %55 = extractelement <4 x float> %52, i32 2 %56 = extractelement <4 x float> %52, i32 3 %57 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %58 = load <16 x i8>, <16 x i8> addrspace(2)* %57, align 16, !tbaa !0 %59 = add i32 %5, %7 %60 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %58, i32 0, i32 %59) %61 = extractelement <4 x float> %60, i32 0 %62 = extractelement <4 x float> %60, i32 1 %63 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %64 = load <16 x i8>, <16 x i8> addrspace(2)* %63, align 16, !tbaa !0 %65 = add i32 %5, %7 %66 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %64, i32 0, i32 %65) %67 = extractelement <4 x float> %66, i32 0 %68 = extractelement <4 x float> %66, i32 1 %69 = fsub float %53, %24 %70 = fsub float %54, %25 %71 = fsub float %55, %26 %72 = fmul float %69, %69 %73 = fmul float %70, %70 %74 = fadd float %73, %72 %75 = fmul float %71, %71 %76 = fadd float %74, %75 %77 = fcmp olt float %28, %76 %. = select i1 %77, float 0.000000e+00, float %67 %.16 = select i1 %77, float 0.000000e+00, float %68 %.17 = select i1 %77, float 0.000000e+00, float %62 %78 = fmul float %14, %. %79 = fadd float %78, %53 %80 = fmul float %15, %. %81 = fadd float %80, %54 %82 = fmul float %16, %. %83 = fadd float %82, %55 %84 = call float @llvm.AMDGPU.lrp(float %27, float %.17, float %.16) %85 = fmul float %17, %84 %86 = fadd float %85, %79 %87 = fmul float %18, %84 %88 = fadd float %87, %81 %89 = fmul float %19, %84 %90 = fadd float %89, %83 %91 = call float @llvm.fabs.f32(float %.) %92 = fmul float %21, %91 %93 = fmul float %22, %91 %94 = fmul float %23, %91 %95 = fmul float %92, %20 %96 = fadd float %95, %86 %97 = fmul float %93, %20 %98 = fadd float %97, %88 %99 = fmul float %94, %20 %100 = fadd float %99, %90 %101 = fcmp ogt float %62, 0.000000e+00 %102 = select i1 %101, float 1.000000e+00, float 0.000000e+00 %103 = fmul float %45, %96 %104 = fmul float %46, %98 %105 = fadd float %104, %103 %106 = fmul float %47, %100 %107 = fadd float %106, %105 %108 = fmul float %48, %56 %109 = fadd float %108, %107 %110 = fmul float %109, %13 %111 = fsub float -0.000000e+00, %110 %112 = fmul float %29, %96 %113 = fmul float %30, %96 %114 = fmul float %31, %96 %115 = fmul float %32, %96 %116 = fmul float %33, %98 %117 = fadd float %116, %112 %118 = fmul float %34, %98 %119 = fadd float %118, %113 %120 = fmul float %35, %98 %121 = fadd float %120, %114 %122 = fmul float %36, %98 %123 = fadd float %122, %115 %124 = fmul float %37, %100 %125 = fadd float %124, %117 %126 = fmul float %38, %100 %127 = fadd float %126, %119 %128 = fmul float %39, %100 %129 = fadd float %128, %121 %130 = fmul float %40, %100 %131 = fadd float %130, %123 %132 = fmul float %41, %56 %133 = fadd float %132, %125 %134 = fmul float %42, %56 %135 = fadd float %134, %127 %136 = fmul float %43, %56 %137 = fadd float %136, %129 %138 = fmul float %44, %56 %139 = fadd float %138, %131 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, float %111) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %61, float %102, float %94, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %133, float %135, float %137, float %139) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0xe ; C204010E buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[7:10], v0, s[16:19], 0 idxen ; E00C2000 80040700 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_buffer_load_dword s5, s[0:3], 0x11 ; C2028111 s_buffer_load_dword s6, s[0:3], 0x12 ; C2030112 s_buffer_load_dword s7, s[0:3], 0x13 ; C2038113 s_buffer_load_dword s9, s[0:3], 0x14 ; C2048114 s_buffer_load_dword s10, s[0:3], 0x18 ; C2050118 s_buffer_load_dword s11, s[0:3], 0x19 ; C2058119 s_buffer_load_dword s12, s[0:3], 0x1a ; C206011A s_buffer_load_dword s13, s[0:3], 0x1b ; C206811B s_buffer_load_dword s14, s[0:3], 0x9 ; C2070109 s_buffer_load_dword s15, s[0:3], 0xa ; C207810A s_buffer_load_dword s16, s[0:3], 0xb ; C208010B s_buffer_load_dword s17, s[0:3], 0xc ; C208810C s_buffer_load_dword s18, s[0:3], 0xd ; C209010D s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_sub_f32_e64 v0, 1.0, s7 ; D2080000 00000EF2 s_buffer_load_dword s19, s[0:3], 0x4 ; C2098104 s_buffer_load_dword s20, s[0:3], 0x5 ; C20A0105 s_buffer_load_dword s21, s[0:3], 0x6 ; C20A8106 s_buffer_load_dword s22, s[0:3], 0x8 ; C20B0108 v_subrev_f32_e32 v9, s4, v1 ; 0A120204 v_subrev_f32_e32 v10, s5, v2 ; 0A140405 v_mul_f32_e32 v9, v9, v9 ; 10121309 v_mac_f32_e32 v9, v10, v10 ; 3E12150A v_subrev_f32_e32 v10, s6, v3 ; 0A140606 v_mac_f32_e32 v9, v10, v10 ; 3E12150A v_cmp_lt_f32_e32 vcc, s9, v9 ; 7C021209 v_cndmask_b32_e64 v7, v7, 0, vcc ; D2000007 01A90107 v_cndmask_b32_e64 v8, v8, 0, vcc ; D2000008 01A90108 v_cndmask_b32_e64 v9, v6, 0, vcc ; D2000009 01A90106 v_mul_f32_e32 v0, v8, v0 ; 10000108 v_mac_f32_e32 v0, s7, v9 ; 3E001207 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, s19, v7, v1 ; D2820001 04060E13 v_mac_f32_e32 v1, s22, v0 ; 3E020016 v_mad_f32 v2, s20, v7, v2 ; D2820002 040A0E14 v_mac_f32_e32 v2, s14, v0 ; 3E04000E v_mad_f32 v3, s21, v7, v3 ; D2820003 040E0E15 v_mac_f32_e32 v3, s15, v0 ; 3E06000F v_mul_f32_e64 v0, s17, |v7| ; D2100200 00020E11 s_buffer_load_dword s4, s[0:3], 0x2a ; C202012A s_buffer_load_dword s5, s[0:3], 0x2e ; C202812E s_buffer_load_dword s6, s[0:3], 0x32 ; C2030132 s_buffer_load_dword s7, s[0:3], 0x36 ; C2038136 s_buffer_load_dword s9, s[0:3], 0x3 ; C2048103 v_mul_f32_e64 v8, s18, |v7| ; D2100208 00020E12 v_mul_f32_e64 v7, s8, |v7| ; D2100207 00020E08 v_mac_f32_e32 v1, s16, v0 ; 3E020010 v_mac_f32_e32 v2, s16, v8 ; 3E041010 v_mac_f32_e32 v3, s16, v7 ; 3E060E10 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s4, v1 ; 10000204 v_mac_f32_e32 v0, s5, v2 ; 3E000405 v_mac_f32_e32 v0, s6, v3 ; 3E000606 v_mac_f32_e32 v0, s7, v4 ; 3E000807 v_mul_f32_e32 v0, s9, v0 ; 10000009 v_xor_b32_e32 v0, 0x80000000, v0 ; 3A0000FF 80000000 v_mov_b32_e32 v8, 0 ; 7E100280 v_mov_b32_e32 v9, 1.0 ; 7E1202F2 exp 15, 32, 0, 0, 0, v8, v8, v9, v0 ; F800020F 00090808 v_cmp_lt_f32_e32 vcc, 0, v6 ; 7C020C80 s_waitcnt expcnt(0) ; BF8C070F v_cndmask_b32_e64 v0, 0, 1.0, vcc ; D2000000 01A9E480 exp 15, 33, 0, 0, 0, v5, v0, v7, v8 ; F800021F 08070005 s_buffer_load_dword s4, s[0:3], 0x26 ; C2020126 s_buffer_load_dword s5, s[0:3], 0x27 ; C2028127 s_buffer_load_dword s6, s[0:3], 0x1c ; C203011C s_buffer_load_dword s7, s[0:3], 0x1d ; C203811D s_buffer_load_dword s8, s[0:3], 0x1e ; C204011E s_buffer_load_dword s9, s[0:3], 0x1f ; C204811F s_buffer_load_dword s14, s[0:3], 0x20 ; C2070120 s_buffer_load_dword s15, s[0:3], 0x21 ; C2078121 s_buffer_load_dword s16, s[0:3], 0x22 ; C2080122 s_buffer_load_dword s17, s[0:3], 0x23 ; C2088123 s_buffer_load_dword s18, s[0:3], 0x24 ; C2090124 s_buffer_load_dword s0, s[0:3], 0x25 ; C2000125 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s10, v1 ; 1000020A v_mul_f32_e32 v5, s11, v1 ; 100A020B v_mul_f32_e32 v6, s12, v1 ; 100C020C v_mul_f32_e32 v1, s13, v1 ; 1002020D s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v0, s6, v2 ; 3E000406 v_mac_f32_e32 v5, s7, v2 ; 3E0A0407 v_mac_f32_e32 v6, s8, v2 ; 3E0C0408 v_mac_f32_e32 v1, s9, v2 ; 3E020409 v_mac_f32_e32 v0, s14, v3 ; 3E00060E v_mac_f32_e32 v5, s15, v3 ; 3E0A060F v_mac_f32_e32 v6, s16, v3 ; 3E0C0610 v_mac_f32_e32 v1, s17, v3 ; 3E020611 v_mac_f32_e32 v0, s18, v4 ; 3E000812 v_mac_f32_e32 v5, s0, v4 ; 3E0A0800 v_mac_f32_e32 v6, s4, v4 ; 3E0C0804 v_mac_f32_e32 v1, s5, v4 ; 3E020805 exp 15, 12, 0, 1, 0, v0, v5, v6, v1 ; F80008CF 01060500 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 12 Code Size: 500 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 0.0010, 1.0000, 0.2813, 0.5000} IMM[1] FLT32 { 1.0000, 255.0000, 0.0039, 0.0000} 0: MOV TEMP[0].xy, IN[1].xyyy 1: TEX TEMP[0].w, TEMP[0], SAMP[0], 2D 2: FSLT TEMP[0].x, TEMP[0].wwww, IMM[0].xxxx 3: AND TEMP[0].x, TEMP[0].xxxx, IMM[0].yyyy 4: KILL_IF -TEMP[0].xxxx 5: ADD TEMP[0].x, IN[0].zzzz, IMM[0].yyyy 6: RCP TEMP[0].x, TEMP[0].xxxx 7: MUL TEMP[0].xy, IN[0].xyyy, TEMP[0].xxxx 8: MAD TEMP[0].xy, IMM[0].zzzz, TEMP[0].xyyy, IMM[0].wwww 9: MUL TEMP[1].xy, IMM[1].xyyy, IN[0].wwww 10: FRC TEMP[1].xy, TEMP[1].xyyy 11: MOV TEMP[2].y, TEMP[1].yyyy 12: MUL TEMP[3].x, TEMP[1].yyyy, IMM[1].zzzz 13: ADD TEMP[2].x, TEMP[1].xxxx, -TEMP[3].xxxx 14: MOV TEMP[0].zw, TEMP[2].yyxy 15: MOV OUT[0], TEMP[0] 16: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %32 = bitcast float %30 to i32 %33 = bitcast float %31 to i32 %34 = insertelement <2 x i32> undef, i32 %32, i32 0 %35 = insertelement <2 x i32> %34, i32 %33, i32 1 %36 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %35, <32 x i8> %23, <16 x i8> %25, i32 2) %37 = extractelement <4 x float> %36, i32 3 %38 = fcmp olt float %37, 0x3F50624DE0000000 %39 = select i1 %38, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %39) %40 = fadd float %28, 1.000000e+00 %41 = fdiv float 1.000000e+00, %40 %42 = fmul float %26, %41 %43 = fmul float %27, %41 %44 = fmul float %42, 0x3FD20033A0000000 %45 = fadd float %44, 5.000000e-01 %46 = fmul float %43, 0x3FD20033A0000000 %47 = fadd float %46, 5.000000e-01 %48 = fmul float %29, 2.550000e+02 %49 = call float @llvm.floor.f32(float %29) %50 = fsub float %29, %49 %51 = call float @llvm.floor.f32(float %48) %52 = fsub float %48, %51 %53 = fmul float %52, 0x3F70101020000000 %54 = fsub float %50, %53 %55 = call i32 @llvm.SI.packf16(float %45, float %47) %56 = bitcast i32 %55 to float %57 = call i32 @llvm.SI.packf16(float %54, float %52) %58 = bitcast i32 %57 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %56, float %58, float %56, float %58) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: nounwind readnone declare float @llvm.floor.f32(float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v0, 8, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[8:15], s[0:3] ; F0800800 00020006 v_mov_b32_e32 v1, 0x3a83126f ; 7E0202FF 3A83126F s_waitcnt vmcnt(0) ; BF8C0770 v_cmp_gt_f32_e32 vcc, v1, v0 ; 7C080101 v_cndmask_b32_e64 v0, 0, -1.0, vcc ; D2000000 01A9E680 v_cmpx_le_f32_e32 vcc, 0, v0 ; 7C260080 v_add_f32_e32 v0, 1.0, v4 ; 060008F2 v_rcp_f32_e32 v0, v0 ; 7E005500 v_mul_f32_e32 v1, v0, v2 ; 10020500 v_mul_f32_e32 v0, v0, v3 ; 10000700 v_mov_b32_e32 v2, 0x3e90019d ; 7E0402FF 3E90019D v_mad_f32 v1, v1, v2, 0.5 ; D2820001 03C20501 v_mad_f32 v0, v0, v2, 0.5 ; D2820000 03C20500 v_mov_b32_e32 v2, 0x437f0000 ; 7E0402FF 437F0000 v_mul_f32_e32 v3, v2, v5 ; 10060B02 v_floor_f32_e32 v4, v5 ; 7E084905 v_subrev_f32_e32 v4, v4, v5 ; 0A080B04 v_floor_f32_e32 v3, v3 ; 7E064903 v_mad_f32 v2, v5, v2, -v3 ; D2820002 840E0505 v_madmk_f32_e32 v3, v2, v4, 0xbb808081 ; 40060902 BB808081 v_cvt_pkrtz_f16_f32_e32 v0, v1, v0 ; 5E000101 v_cvt_pkrtz_f16_f32_e32 v1, v3, v2 ; 5E020503 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 8 Code Size: 204 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL CONST[0..18] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: FSNE TEMP[0].x, CONST[1].zzzz, IMM[0].xxxx 1: UIF TEMP[0].xxxx :0 2: MUL TEMP[0], CONST[2], IN[0].xxxx 3: MAD TEMP[0], CONST[3], IN[0].yyyy, TEMP[0] 4: MAD TEMP[0], CONST[4], IN[0].zzzz, TEMP[0] 5: ADD TEMP[0].xyz, TEMP[0], CONST[5] 6: MOV TEMP[1].x, CONST[6].xxxx 7: MOV TEMP[1].y, CONST[7].xxxx 8: MOV TEMP[1].z, CONST[8].xxxx 9: MOV TEMP[2].x, CONST[6].yyyy 10: MOV TEMP[2].y, CONST[7].yyyy 11: MOV TEMP[2].z, CONST[8].yyyy 12: MOV TEMP[3].x, CONST[6].zzzz 13: MOV TEMP[3].y, CONST[7].zzzz 14: MOV TEMP[3].z, CONST[8].zzzz 15: MUL TEMP[1].xyz, TEMP[1].xyzz, IN[1].xxxx 16: MAD TEMP[1].xyz, TEMP[2].xyzz, IN[1].yyyy, TEMP[1].xyzz 17: MAD TEMP[1].xyz, TEMP[3].xyzz, IN[1].zzzz, TEMP[1].xyzz 18: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz 19: RSQ TEMP[2].x, TEMP[2].xxxx 20: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx 21: MUL TEMP[2].xyz, TEMP[0].xyzz, CONST[0].wwww 22: ADD TEMP[2].xyz, CONST[0].xyzz, -TEMP[2].xyzz 23: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz 24: RSQ TEMP[3].x, TEMP[3].xxxx 25: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx 26: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[2].xyzz 27: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[2].xxxx 28: ADD TEMP[2].x, IMM[0].yyyy, -TEMP[2].xxxx 29: SQRT TEMP[2].x, TEMP[2].xxxx 30: MUL TEMP[2].x, CONST[1].zzzz, TEMP[2].xxxx 31: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx 32: ADD TEMP[0].xyz, TEMP[0].xyzz, -TEMP[1].xyzz 33: MUL TEMP[1], CONST[10], TEMP[0].xxxx 34: MAD TEMP[1], CONST[11], TEMP[0].yyyy, TEMP[1] 35: MAD TEMP[0], CONST[12], TEMP[0].zzzz, TEMP[1] 36: ADD TEMP[0], TEMP[0], CONST[13] 37: ELSE :0 38: MUL TEMP[1], CONST[15], IN[0].xxxx 39: MAD TEMP[1], CONST[16], IN[0].yyyy, TEMP[1] 40: MAD TEMP[1], CONST[17], IN[0].zzzz, TEMP[1] 41: ADD TEMP[0], TEMP[1], CONST[18] 42: ENDIF 43: MOV TEMP[1].xyw, TEMP[0].xyxw 44: RCP TEMP[2].x, TEMP[0].wwww 45: MUL TEMP[2].x, CONST[1].xxxx, TEMP[2].xxxx 46: MOV_SAT TEMP[2].x, TEMP[2].xxxx 47: ADD TEMP[2].x, TEMP[0].zzzz, TEMP[2].xxxx 48: MAX TEMP[0].x, TEMP[2].xxxx, -TEMP[0].wwww 49: LRP TEMP[0].x, CONST[1].yyyy, TEMP[0].xxxx, TEMP[2].xxxx 50: MOV TEMP[1].z, TEMP[0].xxxx 51: MAD TEMP[0].xy, IN[2].xyyy, CONST[14].xyyy, CONST[14].zwww 52: MOV OUT[1], TEMP[0] 53: MOV OUT[0], TEMP[1] 54: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236) %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0 %23 = add i32 %5, %7 %24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %22, i32 0, i32 %23) %25 = extractelement <4 x float> %24, i32 0 %26 = extractelement <4 x float> %24, i32 1 %27 = extractelement <4 x float> %24, i32 2 %28 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %29 = load <16 x i8>, <16 x i8> addrspace(2)* %28, align 16, !tbaa !0 %30 = add i32 %5, %7 %31 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %29, i32 0, i32 %30) %32 = extractelement <4 x float> %31, i32 0 %33 = extractelement <4 x float> %31, i32 1 %34 = extractelement <4 x float> %31, i32 2 %35 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0 %37 = add i32 %5, %7 %38 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %36, i32 0, i32 %37) %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = fcmp une float %16, 0.000000e+00 br i1 %41, label %IF, label %ELSE IF: ; preds = %main_body %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 204) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 172) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %81 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %82 = fmul float %78, %25 %83 = fmul float %77, %25 %84 = fmul float %76, %25 %85 = fmul float %75, %26 %86 = fadd float %85, %82 %87 = fmul float %74, %26 %88 = fadd float %87, %83 %89 = fmul float %73, %26 %90 = fadd float %89, %84 %91 = fmul float %72, %27 %92 = fadd float %91, %86 %93 = fmul float %71, %27 %94 = fadd float %93, %88 %95 = fmul float %70, %27 %96 = fadd float %95, %90 %97 = fadd float %92, %69 %98 = fadd float %94, %68 %99 = fadd float %96, %67 %100 = fmul float %66, %32 %101 = fmul float %63, %32 %102 = fmul float %60, %32 %103 = fmul float %65, %33 %104 = fadd float %103, %100 %105 = fmul float %62, %33 %106 = fadd float %105, %101 %107 = fmul float %59, %33 %108 = fadd float %107, %102 %109 = fmul float %64, %34 %110 = fadd float %109, %104 %111 = fmul float %61, %34 %112 = fadd float %111, %106 %113 = fmul float %58, %34 %114 = fadd float %113, %108 %115 = fmul float %110, %110 %116 = fmul float %112, %112 %117 = fadd float %116, %115 %118 = fmul float %114, %114 %119 = fadd float %117, %118 %120 = call float @llvm.AMDGPU.rsq.clamped.f32(float %119) %121 = fmul float %110, %120 %122 = fmul float %112, %120 %123 = fmul float %114, %120 %124 = fmul float %97, %13 %125 = fmul float %98, %13 %126 = fmul float %99, %13 %127 = fsub float %81, %124 %128 = fsub float %80, %125 %129 = fsub float %79, %126 %130 = fmul float %127, %127 %131 = fmul float %128, %128 %132 = fadd float %131, %130 %133 = fmul float %129, %129 %134 = fadd float %132, %133 %135 = call float @llvm.AMDGPU.rsq.clamped.f32(float %134) %136 = fmul float %127, %135 %137 = fmul float %128, %135 %138 = fmul float %129, %135 %139 = fmul float %121, %136 %140 = fmul float %122, %137 %141 = fadd float %140, %139 %142 = fmul float %123, %138 %143 = fadd float %141, %142 %144 = fmul float %143, %143 %145 = fsub float 1.000000e+00, %144 %146 = call float @llvm.sqrt.f32(float %145) %147 = fmul float %16, %146 %148 = fmul float %121, %147 %149 = fmul float %122, %147 %150 = fmul float %123, %147 %151 = fsub float %97, %148 %152 = fsub float %98, %149 %153 = fsub float %99, %150 %154 = fmul float %57, %151 %155 = fmul float %56, %151 %156 = fmul float %55, %151 %157 = fmul float %54, %151 %158 = fmul float %53, %152 %159 = fadd float %158, %154 %160 = fmul float %52, %152 %161 = fadd float %160, %155 %162 = fmul float %51, %152 %163 = fadd float %162, %156 %164 = fmul float %50, %152 %165 = fadd float %164, %157 %166 = fmul float %49, %153 %167 = fadd float %166, %159 %168 = fmul float %48, %153 %169 = fadd float %168, %161 %170 = fmul float %47, %153 %171 = fadd float %170, %163 %172 = fmul float %46, %153 %173 = fadd float %172, %165 %174 = fadd float %167, %45 %175 = fadd float %169, %44 %176 = fadd float %171, %43 %177 = fadd float %173, %42 br label %ENDIF ELSE: ; preds = %main_body %178 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300) %179 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296) %180 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292) %181 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288) %182 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284) %183 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280) %184 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276) %185 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272) %186 = call float @llvm.SI.load.const(<16 x i8> %12, i32 268) %187 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264) %188 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260) %189 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256) %190 = call float @llvm.SI.load.const(<16 x i8> %12, i32 252) %191 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248) %192 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244) %193 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240) %194 = fmul float %193, %25 %195 = fmul float %192, %25 %196 = fmul float %191, %25 %197 = fmul float %190, %25 %198 = fmul float %189, %26 %199 = fadd float %198, %194 %200 = fmul float %188, %26 %201 = fadd float %200, %195 %202 = fmul float %187, %26 %203 = fadd float %202, %196 %204 = fmul float %186, %26 %205 = fadd float %204, %197 %206 = fmul float %185, %27 %207 = fadd float %206, %199 %208 = fmul float %184, %27 %209 = fadd float %208, %201 %210 = fmul float %183, %27 %211 = fadd float %210, %203 %212 = fmul float %182, %27 %213 = fadd float %212, %205 %214 = fadd float %207, %181 %215 = fadd float %209, %180 %216 = fadd float %211, %179 %217 = fadd float %213, %178 br label %ENDIF ENDIF: ; preds = %ELSE, %IF %temp.0 = phi float [ %174, %IF ], [ %214, %ELSE ] %temp1.0 = phi float [ %175, %IF ], [ %215, %ELSE ] %temp2.0 = phi float [ %176, %IF ], [ %216, %ELSE ] %temp3.0 = phi float [ %177, %IF ], [ %217, %ELSE ] %218 = fdiv float 1.000000e+00, %temp3.0 %219 = fmul float %14, %218 %220 = call float @llvm.AMDIL.clamp.(float %219, float 0.000000e+00, float 1.000000e+00) %221 = fadd float %temp2.0, %220 %222 = fsub float -0.000000e+00, %temp3.0 %223 = call float @llvm.maxnum.f32(float %221, float %222) %224 = call float @llvm.AMDGPU.lrp(float %15, float %223, float %221) %225 = fmul float %39, %17 %226 = fadd float %225, %19 %227 = fmul float %40, %18 %228 = fadd float %227, %20 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %226, float %228, float %temp2.0, float %temp3.0) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %temp.0, float %temp1.0, float %224, float %temp3.0) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[4:7], v0, s[4:7], 0 idxen ; E00C2000 80010400 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[7:10], v0, s[12:15], 0 idxen ; E00C2000 80030700 buffer_load_format_xyzw v[0:3], v0, s[16:19], 0 idxen ; E00C2000 80040000 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_buffer_load_dword s12, s[0:3], 0x3a ; C206013A s_buffer_load_dword s11, s[0:3], 0x3b ; C205813B s_waitcnt vmcnt(1) ; BF8C0771 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_cmp_eq_f32_e64 s[6:7], 0, s4 ; D0040006 00000880 s_and_saveexec_b64 s[6:7], s[6:7] ; BE862406 s_xor_b64 s[6:7], exec, s[6:7] ; 8986067E s_cbranch_execz BB0_1 ; BF880000 s_buffer_load_dword s5, s[0:3], 0x4b ; C202814B s_buffer_load_dword s8, s[0:3], 0x46 ; C2040146 s_buffer_load_dword s9, s[0:3], 0x47 ; C2048147 s_buffer_load_dword s10, s[0:3], 0x48 ; C2050148 s_buffer_load_dword s13, s[0:3], 0x49 ; C2068149 s_buffer_load_dword s14, s[0:3], 0x4a ; C207014A s_buffer_load_dword s15, s[0:3], 0x41 ; C2078141 s_buffer_load_dword s16, s[0:3], 0x42 ; C2080142 s_buffer_load_dword s17, s[0:3], 0x43 ; C2088143 s_buffer_load_dword s18, s[0:3], 0x44 ; C2090144 s_buffer_load_dword s19, s[0:3], 0x45 ; C2098145 s_buffer_load_dword s20, s[0:3], 0x3c ; C20A013C s_buffer_load_dword s21, s[0:3], 0x3d ; C20A813D s_buffer_load_dword s22, s[0:3], 0x3e ; C20B013E s_buffer_load_dword s23, s[0:3], 0x3f ; C20B813F s_buffer_load_dword s24, s[0:3], 0x40 ; C20C0140 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s20, v4 ; 10040814 v_mul_f32_e32 v3, s21, v4 ; 10060815 v_mul_f32_e32 v12, s22, v4 ; 10180816 v_mul_f32_e32 v13, s23, v4 ; 101A0817 v_mac_f32_e32 v2, s24, v5 ; 3E040A18 v_mac_f32_e32 v3, s15, v5 ; 3E060A0F v_mac_f32_e32 v12, s16, v5 ; 3E180A10 v_mac_f32_e32 v13, s17, v5 ; 3E1A0A11 v_mac_f32_e32 v2, s18, v6 ; 3E040C12 v_mac_f32_e32 v3, s19, v6 ; 3E060C13 v_mac_f32_e32 v12, s8, v6 ; 3E180C08 v_mac_f32_e32 v13, s9, v6 ; 3E1A0C09 v_add_f32_e32 v10, s10, v2 ; 0614040A v_add_f32_e32 v11, s13, v3 ; 0616060D v_add_f32_e32 v12, s14, v12 ; 0618180E v_add_f32_e32 v13, s5, v13 ; 061A1A05 s_or_saveexec_b64 s[6:7], s[6:7] ; BE862506 s_buffer_load_dword s10, s[0:3], 0x4 ; C2050104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s8, s[0:3], 0x38 ; C2040138 s_buffer_load_dword s5, s[0:3], 0x39 ; C2028139 v_mov_b32_e32 v2, s12 ; 7E04020C v_mov_b32_e32 v3, s11 ; 7E06020B s_waitcnt lgkmcnt(0) ; BF8C007F s_xor_b64 exec, exec, s[6:7] ; 89FE067E s_cbranch_execz BB0_4 ; BF880000 s_buffer_load_dword s11, s[0:3], 0x37 ; C2058137 s_buffer_load_dword s12, s[0:3], 0x32 ; C2060132 s_buffer_load_dword s13, s[0:3], 0x33 ; C2068133 s_buffer_load_dword s14, s[0:3], 0x34 ; C2070134 s_buffer_load_dword s15, s[0:3], 0x35 ; C2078135 s_buffer_load_dword s16, s[0:3], 0x36 ; C2080136 s_buffer_load_dword s17, s[0:3], 0x2d ; C208812D s_buffer_load_dword s18, s[0:3], 0x2e ; C209012E s_buffer_load_dword s19, s[0:3], 0x2f ; C209812F s_buffer_load_dword s20, s[0:3], 0x30 ; C20A0130 s_buffer_load_dword s21, s[0:3], 0x31 ; C20A8131 s_buffer_load_dword s22, s[0:3], 0x28 ; C20B0128 s_buffer_load_dword s23, s[0:3], 0x29 ; C20B8129 s_buffer_load_dword s24, s[0:3], 0x2a ; C20C012A s_buffer_load_dword s25, s[0:3], 0x2b ; C20C812B s_buffer_load_dword s26, s[0:3], 0x2c ; C20D012C s_buffer_load_dword s27, s[0:3], 0x1d ; C20D811D s_buffer_load_dword s28, s[0:3], 0x1e ; C20E011E s_buffer_load_dword s29, s[0:3], 0x20 ; C20E8120 s_buffer_load_dword s30, s[0:3], 0x21 ; C20F0121 s_buffer_load_dword s31, s[0:3], 0x22 ; C20F8122 s_buffer_load_dword s32, s[0:3], 0x16 ; C2100116 s_buffer_load_dword s33, s[0:3], 0x18 ; C2108118 s_buffer_load_dword s34, s[0:3], 0x19 ; C2110119 s_buffer_load_dword s35, s[0:3], 0x1a ; C211811A s_buffer_load_dword s36, s[0:3], 0x1c ; C212011C s_buffer_load_dword s37, s[0:3], 0x10 ; C2128110 s_buffer_load_dword s38, s[0:3], 0x11 ; C2130111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v10, s29, v7 ; 10140E1D v_mac_f32_e32 v10, s30, v8 ; 3E14101E v_mac_f32_e32 v10, s31, v9 ; 3E14121F s_buffer_load_dword s29, s[0:3], 0x12 ; C20E8112 v_mul_f32_e32 v11, s33, v7 ; 10160E21 v_mac_f32_e32 v11, s34, v8 ; 3E161022 v_mac_f32_e32 v11, s35, v9 ; 3E161223 v_mul_f32_e32 v7, s36, v7 ; 100E0E24 v_mac_f32_e32 v7, s27, v8 ; 3E0E101B v_mac_f32_e32 v7, s28, v9 ; 3E0E121C s_buffer_load_dword s27, s[0:3], 0x14 ; C20D8114 s_buffer_load_dword s28, s[0:3], 0x15 ; C20E0115 s_buffer_load_dword s30, s[0:3], 0x9 ; C20F0109 s_buffer_load_dword s31, s[0:3], 0xa ; C20F810A s_buffer_load_dword s33, s[0:3], 0xc ; C210810C s_buffer_load_dword s34, s[0:3], 0xd ; C211010D s_buffer_load_dword s35, s[0:3], 0xe ; C211810E s_buffer_load_dword s36, s[0:3], 0x0 ; C2120100 v_mul_f32_e32 v8, v11, v11 ; 1010170B v_mac_f32_e32 v8, v7, v7 ; 3E100F07 v_mac_f32_e32 v8, v10, v10 ; 3E10150A v_rsq_clamp_f32_e32 v8, v8 ; 7E105908 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v9, s30, v4 ; 1012081E v_mul_f32_e32 v12, s31, v4 ; 1018081F s_buffer_load_dword s30, s[0:3], 0x1 ; C20F0101 v_mac_f32_e32 v9, s34, v5 ; 3E120A22 v_mac_f32_e32 v12, s35, v5 ; 3E180A23 v_mac_f32_e32 v9, s38, v6 ; 3E120C26 v_mac_f32_e32 v12, s29, v6 ; 3E180C1D v_add_f32_e32 v9, s28, v9 ; 0612121C v_add_f32_e32 v12, s32, v12 ; 06181820 s_buffer_load_dword s28, s[0:3], 0x2 ; C20E0102 s_buffer_load_dword s29, s[0:3], 0x3 ; C20E8103 s_buffer_load_dword s31, s[0:3], 0x8 ; C20F8108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v13, s29 ; 7E1A021D v_mad_f32 v14, -v9, v13, s30 ; D282000E 207A1B09 v_mul_f32_e32 v4, s31, v4 ; 1008081F v_mac_f32_e32 v4, s33, v5 ; 3E080A21 v_mac_f32_e32 v4, s37, v6 ; 3E080C25 v_add_f32_e32 v4, s27, v4 ; 0608081B v_mad_f32 v5, -v4, v13, s36 ; D2820005 20921B04 v_mad_f32 v6, -v12, v13, s28 ; D2820006 20721B0C v_mul_f32_e32 v13, v5, v5 ; 101A0B05 v_mac_f32_e32 v13, v14, v14 ; 3E1A1D0E v_mac_f32_e32 v13, v6, v6 ; 3E1A0D06 v_rsq_clamp_f32_e32 v13, v13 ; 7E1A590D v_mul_f32_e32 v11, v8, v11 ; 10161708 v_mul_f32_e32 v7, v8, v7 ; 100E0F08 v_mul_f32_e32 v8, v8, v10 ; 10101508 v_mul_f32_e32 v5, v13, v5 ; 100A0B0D v_mul_f32_e32 v10, v13, v14 ; 10141D0D v_mul_f32_e32 v6, v13, v6 ; 100C0D0D v_mul_f32_e32 v5, v5, v11 ; 100A1705 v_mac_f32_e32 v5, v10, v7 ; 3E0A0F0A v_mac_f32_e32 v5, v6, v8 ; 3E0A1106 v_mad_f32 v5, -v5, v5, 1.0 ; D2820005 23CA0B05 v_sqrt_f32_e32 v5, v5 ; 7E0A6705 v_mul_f32_e32 v5, s4, v5 ; 100A0A04 v_mad_f32 v4, -v11, v5, v4 ; D2820004 24120B0B v_mad_f32 v6, -v7, v5, v9 ; D2820006 24260B07 v_mad_f32 v5, -v8, v5, v12 ; D2820005 24320B08 v_mul_f32_e32 v7, s22, v4 ; 100E0816 v_mul_f32_e32 v8, s23, v4 ; 10100817 v_mul_f32_e32 v9, s24, v4 ; 10120818 v_mul_f32_e32 v4, s25, v4 ; 10080819 v_mac_f32_e32 v7, s26, v6 ; 3E0E0C1A v_mac_f32_e32 v8, s17, v6 ; 3E100C11 v_mac_f32_e32 v9, s18, v6 ; 3E120C12 v_mac_f32_e32 v4, s19, v6 ; 3E080C13 v_mac_f32_e32 v7, s20, v5 ; 3E0E0A14 v_mac_f32_e32 v8, s21, v5 ; 3E100A15 v_mac_f32_e32 v9, s12, v5 ; 3E120A0C v_mac_f32_e32 v4, s13, v5 ; 3E080A0D v_add_f32_e32 v10, s14, v7 ; 06140E0E v_add_f32_e32 v11, s15, v8 ; 0616100F v_add_f32_e32 v12, s16, v9 ; 06181210 v_add_f32_e32 v13, s11, v4 ; 061A080B s_or_b64 exec, exec, s[6:7] ; 88FE067E v_rcp_f32_e32 v4, v13 ; 7E08550D v_sub_f32_e64 v5, 1.0, s9 ; D2080005 000012F2 v_mul_f32_e32 v4, s10, v4 ; 1008080A v_add_f32_e64 v4, 0, v4 clamp ; D2060804 00020880 v_add_f32_e32 v4, v4, v12 ; 06081904 v_max_f32_e64 v6, v4, -v13 ; D2200006 40021B04 v_mul_f32_e32 v4, v4, v5 ; 10080B04 v_mac_f32_e32 v4, s9, v6 ; 3E080C09 v_mac_f32_e32 v2, s8, v0 ; 3E040008 v_mac_f32_e32 v3, s5, v1 ; 3E060205 exp 15, 32, 0, 0, 0, v2, v3, v12, v13 ; F800020F 0D0C0302 exp 15, 12, 0, 1, 0, v10, v11, v4, v13 ; F80008CF 0D040B0A s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 48 VGPRS: 16 Code Size: 800 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[0..1] DCL TEMP[0], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0].w, TEMP[0], SAMP[0], 2D 2: MUL TEMP[0].x, TEMP[0].wwww, CONST[0].wwww 3: FSLT TEMP[0].x, TEMP[0].xxxx, CONST[1].xxxx 4: AND TEMP[0].x, TEMP[0].xxxx, IMM[0].xxxx 5: KILL_IF -TEMP[0].xxxx 6: MOV OUT[0], IMM[0].yyyy 7: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %26 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %27 = load <32 x i8>, <32 x i8> addrspace(2)* %26, align 32, !tbaa !0 %28 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %29 = load <16 x i8>, <16 x i8> addrspace(2)* %28, align 16, !tbaa !0 %30 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %32 = bitcast float %30 to i32 %33 = bitcast float %31 to i32 %34 = insertelement <2 x i32> undef, i32 %32, i32 0 %35 = insertelement <2 x i32> %34, i32 %33, i32 1 %36 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %35, <32 x i8> %27, <16 x i8> %29, i32 2) %37 = extractelement <4 x float> %36, i32 3 %38 = fmul float %37, %24 %39 = fcmp olt float %38, %25 %40 = select i1 %39, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %40) %41 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00) %42 = bitcast i32 %41 to float %43 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00) %44 = bitcast i32 %43 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %42, float %44, float %42, float %44) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[4:7], s[4:5], 0x0 ; C0820500 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x3 ; C2040103 s_buffer_load_dword s0, s[0:3], 0x4 ; C2000104 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 image_sample v0, 8, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[4:7] ; F0800800 00230002 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v0, s8, v0 ; 10000008 v_cmp_gt_f32_e32 vcc, s0, v0 ; 7C080000 v_cndmask_b32_e64 v0, 0, -1.0, vcc ; D2000000 01A9E680 v_cmpx_le_f32_e32 vcc, 0, v0 ; 7C260080 v_cvt_pkrtz_f16_f32_e64 v0, 0, 0 ; D25E0000 00010080 exp 15, 0, 1, 1, 1, v0, v0, v0, v0 ; F8001C0F 00000000 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 4 Code Size: 100 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL OUT[5], GENERIC[4] DCL OUT[6], GENERIC[5] DCL CONST[0..20] DCL TEMP[0..8], LOCAL IMM[0] FLT32 { 0.0000, 0.5000, 0.0000, 0.0000} 0: MUL TEMP[0], CONST[6], IN[0].xxxx 1: MAD TEMP[0], CONST[7], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[8], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0].xyz, CONST[9], IN[0].wwww, TEMP[0] 4: MUL TEMP[1], CONST[17], IN[0].xxxx 5: MAD TEMP[1], CONST[18], IN[0].yyyy, TEMP[1] 6: MAD TEMP[1], CONST[19], IN[0].zzzz, TEMP[1] 7: MAD TEMP[1], CONST[20], IN[0].wwww, TEMP[1] 8: MAD TEMP[2].xy, IN[2].xyyy, CONST[14].xyyy, CONST[14].zwww 9: FSEQ TEMP[3].x, CONST[16].xxxx, IMM[0].xxxx 10: UIF TEMP[3].xxxx :0 11: MOV TEMP[3].xy, IN[2].xyxx 12: ELSE :0 13: MOV TEMP[3].xy, IN[3].xyxx 14: ENDIF 15: MAD TEMP[3].xy, TEMP[3].xyyy, CONST[15].xyyy, CONST[15].zwww 16: MOV TEMP[2].zw, TEMP[3].yyxy 17: MOV TEMP[3].x, CONST[10].xxxx 18: MOV TEMP[3].y, CONST[11].xxxx 19: MOV TEMP[3].z, CONST[12].xxxx 20: MOV TEMP[4].x, CONST[10].yyyy 21: MOV TEMP[4].y, CONST[11].yyyy 22: MOV TEMP[4].z, CONST[12].yyyy 23: MOV TEMP[5].x, CONST[10].zzzz 24: MOV TEMP[5].y, CONST[11].zzzz 25: MOV TEMP[5].z, CONST[12].zzzz 26: MUL TEMP[3].xyz, TEMP[3].xyzz, IN[1].xxxx 27: MAD TEMP[3].xyz, TEMP[4].xyzz, IN[1].yyyy, TEMP[3].xyzz 28: MAD TEMP[3].xyz, TEMP[5].xyzz, IN[1].zzzz, TEMP[3].xyzz 29: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz 30: RSQ TEMP[4].x, TEMP[4].xxxx 31: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx 32: MOV TEMP[4].xyz, TEMP[3].xyzx 33: MUL TEMP[5].xyw, TEMP[1], IMM[0].yyyy 34: MOV TEMP[6].x, TEMP[5].xxxx 35: MUL TEMP[7].x, TEMP[5].yyyy, CONST[1].xxxx 36: MOV TEMP[6].y, TEMP[7].xxxx 37: ADD TEMP[5].xy, TEMP[6].xyyy, TEMP[5].wwww 38: MOV TEMP[5].zw, TEMP[1].wwzw 39: MUL TEMP[6], TEMP[3].xyzz, TEMP[3].yzzx 40: DP4 TEMP[7].x, CONST[2], TEMP[6] 41: DP4 TEMP[8].x, CONST[3], TEMP[6] 42: MOV TEMP[7].y, TEMP[8].xxxx 43: DP4 TEMP[6].x, CONST[4], TEMP[6] 44: MOV TEMP[7].z, TEMP[6].xxxx 45: MUL TEMP[6].x, TEMP[3].yyyy, TEMP[3].yyyy 46: MAD TEMP[3].x, TEMP[3].xxxx, TEMP[3].xxxx, -TEMP[6].xxxx 47: MAD TEMP[3].xyz, CONST[5].xyzz, TEMP[3].xxxx, TEMP[7].xyzz 48: ADD TEMP[6].xyz, TEMP[0].xyzz, -CONST[0].xyzz 49: MOV TEMP[6].yzw, TEMP[6].yxyz 50: MOV TEMP[6].x, TEMP[1].zzzz 51: MOV TEMP[0].xyz, TEMP[0].xyzx 52: MOV OUT[6], TEMP[0] 53: MOV OUT[1], TEMP[2] 54: MOV OUT[2], TEMP[4] 55: MOV OUT[3], TEMP[3] 56: MOV OUT[4], TEMP[5] 57: MOV OUT[0], TEMP[1] 58: MOV OUT[5], TEMP[6] 59: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248) %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 252) %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256) %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272) %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276) %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280) %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284) %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288) %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292) %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296) %72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300) %73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304) %74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308) %75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312) %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316) %77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320) %78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 324) %79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 328) %80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 332) %81 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %82 = load <16 x i8>, <16 x i8> addrspace(2)* %81, align 16, !tbaa !0 %83 = add i32 %5, %7 %84 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %82, i32 0, i32 %83) %85 = extractelement <4 x float> %84, i32 0 %86 = extractelement <4 x float> %84, i32 1 %87 = extractelement <4 x float> %84, i32 2 %88 = extractelement <4 x float> %84, i32 3 %89 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %90 = load <16 x i8>, <16 x i8> addrspace(2)* %89, align 16, !tbaa !0 %91 = add i32 %5, %7 %92 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %90, i32 0, i32 %91) %93 = extractelement <4 x float> %92, i32 0 %94 = extractelement <4 x float> %92, i32 1 %95 = extractelement <4 x float> %92, i32 2 %96 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %97 = load <16 x i8>, <16 x i8> addrspace(2)* %96, align 16, !tbaa !0 %98 = add i32 %5, %7 %99 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %97, i32 0, i32 %98) %100 = extractelement <4 x float> %99, i32 0 %101 = extractelement <4 x float> %99, i32 1 %102 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %103 = load <16 x i8>, <16 x i8> addrspace(2)* %102, align 16, !tbaa !0 %104 = add i32 %5, %7 %105 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %103, i32 0, i32 %104) %106 = extractelement <4 x float> %105, i32 0 %107 = extractelement <4 x float> %105, i32 1 %108 = fmul float %32, %85 %109 = fmul float %33, %85 %110 = fmul float %34, %85 %111 = fmul float %35, %85 %112 = fmul float %36, %86 %113 = fadd float %112, %108 %114 = fmul float %37, %86 %115 = fadd float %114, %109 %116 = fmul float %38, %86 %117 = fadd float %116, %110 %118 = fmul float %39, %86 %119 = fadd float %118, %111 %120 = fmul float %40, %87 %121 = fadd float %120, %113 %122 = fmul float %41, %87 %123 = fadd float %122, %115 %124 = fmul float %42, %87 %125 = fadd float %124, %117 %126 = fmul float %43, %87 %127 = fadd float %126, %119 %128 = fmul float %44, %88 %129 = fadd float %128, %121 %130 = fmul float %45, %88 %131 = fadd float %130, %123 %132 = fmul float %46, %88 %133 = fadd float %132, %125 %134 = fmul float %65, %85 %135 = fmul float %66, %85 %136 = fmul float %67, %85 %137 = fmul float %68, %85 %138 = fmul float %69, %86 %139 = fadd float %138, %134 %140 = fmul float %70, %86 %141 = fadd float %140, %135 %142 = fmul float %71, %86 %143 = fadd float %142, %136 %144 = fmul float %72, %86 %145 = fadd float %144, %137 %146 = fmul float %73, %87 %147 = fadd float %146, %139 %148 = fmul float %74, %87 %149 = fadd float %148, %141 %150 = fmul float %75, %87 %151 = fadd float %150, %143 %152 = fmul float %76, %87 %153 = fadd float %152, %145 %154 = fmul float %77, %88 %155 = fadd float %154, %147 %156 = fmul float %78, %88 %157 = fadd float %156, %149 %158 = fmul float %79, %88 %159 = fadd float %158, %151 %160 = fmul float %80, %88 %161 = fadd float %160, %153 %162 = fmul float %100, %56 %163 = fadd float %162, %58 %164 = fmul float %101, %57 %165 = fadd float %164, %59 %166 = fcmp oeq float %64, 0.000000e+00 %. = select i1 %166, float %100, float %106 %.36 = select i1 %166, float %101, float %107 %167 = fmul float %., %60 %168 = fadd float %167, %62 %169 = fmul float %.36, %61 %170 = fadd float %169, %63 %171 = fmul float %47, %93 %172 = fmul float %50, %93 %173 = fmul float %53, %93 %174 = fmul float %48, %94 %175 = fadd float %174, %171 %176 = fmul float %51, %94 %177 = fadd float %176, %172 %178 = fmul float %54, %94 %179 = fadd float %178, %173 %180 = fmul float %49, %95 %181 = fadd float %180, %175 %182 = fmul float %52, %95 %183 = fadd float %182, %177 %184 = fmul float %55, %95 %185 = fadd float %184, %179 %186 = fmul float %181, %181 %187 = fmul float %183, %183 %188 = fadd float %187, %186 %189 = fmul float %185, %185 %190 = fadd float %188, %189 %191 = call float @llvm.AMDGPU.rsq.clamped.f32(float %190) %192 = fmul float %181, %191 %193 = fmul float %183, %191 %194 = fmul float %185, %191 %195 = fmul float %155, 5.000000e-01 %196 = fmul float %157, 5.000000e-01 %197 = fmul float %161, 5.000000e-01 %198 = fmul float %196, %16 %199 = fadd float %195, %197 %200 = fadd float %198, %197 %201 = fmul float %192, %193 %202 = fmul float %193, %194 %203 = fmul float %194, %194 %204 = fmul float %194, %192 %205 = fmul float %17, %201 %206 = fmul float %18, %202 %207 = fadd float %205, %206 %208 = fmul float %19, %203 %209 = fadd float %207, %208 %210 = fmul float %20, %204 %211 = fadd float %209, %210 %212 = fmul float %21, %201 %213 = fmul float %22, %202 %214 = fadd float %212, %213 %215 = fmul float %23, %203 %216 = fadd float %214, %215 %217 = fmul float %24, %204 %218 = fadd float %216, %217 %219 = fmul float %25, %201 %220 = fmul float %26, %202 %221 = fadd float %219, %220 %222 = fmul float %27, %203 %223 = fadd float %221, %222 %224 = fmul float %28, %204 %225 = fadd float %223, %224 %226 = fmul float %193, %193 %227 = fmul float %192, %192 %228 = fsub float %227, %226 %229 = fmul float %29, %228 %230 = fadd float %229, %211 %231 = fmul float %30, %228 %232 = fadd float %231, %218 %233 = fmul float %31, %228 %234 = fadd float %233, %225 %235 = fsub float %129, %13 %236 = fsub float %131, %14 %237 = fsub float %133, %15 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %163, float %165, float %168, float %170) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %192, float %193, float %194, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %230, float %232, float %234, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %199, float %200, float %159, float %161) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %159, float %235, float %236, float %237) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %129, float %131, float %133, float %127) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %155, float %157, float %159, float %161) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0 ; 7E020280 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[20:23], s[2:3], 0x0 ; C08A0300 s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 s_load_dwordx4 s[12:15], s[8:9], 0x8 ; C0860908 s_load_dwordx4 s[8:11], s[8:9], 0xc ; C084090C s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s19, s[20:23], 0x23 ; C2099523 buffer_load_format_xyzw v[2:5], v0, s[0:3], 0 idxen ; E00C2000 80000200 buffer_load_format_xyzw v[6:9], v0, s[4:7], 0 idxen ; E00C2000 80010600 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[9:12], v0, s[12:15], 0 idxen ; E00C2000 80030900 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[11:14], v0, s[8:11], 0 idxen ; E00C2000 80020B00 s_buffer_load_dword s24, s[20:23], 0x24 ; C20C1524 s_buffer_load_dword s25, s[20:23], 0x25 ; C20C9525 s_buffer_load_dword s26, s[20:23], 0x26 ; C20D1526 s_buffer_load_dword s27, s[20:23], 0x28 ; C20D9528 s_buffer_load_dword s28, s[20:23], 0x29 ; C20E1529 s_buffer_load_dword s29, s[20:23], 0x2a ; C20E952A s_buffer_load_dword s30, s[20:23], 0x2c ; C20F152C s_buffer_load_dword s31, s[20:23], 0x2d ; C20F952D s_buffer_load_dword s32, s[20:23], 0x2e ; C210152E s_buffer_load_dword s33, s[20:23], 0x30 ; C2109530 s_buffer_load_dword s34, s[20:23], 0x31 ; C2111531 s_buffer_load_dword s35, s[20:23], 0x32 ; C2119532 s_buffer_load_dword s36, s[20:23], 0x38 ; C2121538 s_buffer_load_dword s37, s[20:23], 0x39 ; C2129539 s_buffer_load_dword s11, s[20:23], 0x9 ; C2059509 s_buffer_load_dword s5, s[20:23], 0xa ; C202950A s_buffer_load_dword s3, s[20:23], 0xb ; C201950B s_buffer_load_dword s9, s[20:23], 0xc ; C204950C s_buffer_load_dword s12, s[20:23], 0xd ; C206150D s_buffer_load_dword s7, s[20:23], 0xe ; C203950E s_buffer_load_dword s4, s[20:23], 0xf ; C202150F s_buffer_load_dword s10, s[20:23], 0x10 ; C2051510 s_buffer_load_dword s13, s[20:23], 0x11 ; C2069511 s_buffer_load_dword s8, s[20:23], 0x12 ; C2041512 s_buffer_load_dword s0, s[20:23], 0x3f ; C200153F s_buffer_load_dword s1, s[20:23], 0x40 ; C2009540 s_buffer_load_dword s38, s[20:23], 0x44 ; C2131544 s_buffer_load_dword s39, s[20:23], 0x45 ; C2139545 s_buffer_load_dword s40, s[20:23], 0x46 ; C2141546 s_buffer_load_dword s41, s[20:23], 0x47 ; C2149547 s_buffer_load_dword s42, s[20:23], 0x48 ; C2151548 s_buffer_load_dword s43, s[20:23], 0x49 ; C2159549 s_buffer_load_dword s44, s[20:23], 0x4a ; C216154A s_buffer_load_dword s45, s[20:23], 0x4b ; C216954B s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v0, s0 ; 7E000200 s_buffer_load_dword s0, s[20:23], 0x0 ; C2001500 v_cmp_eq_f32_e64 vcc, 0, s1 ; D004006A 00000280 s_buffer_load_dword s1, s[20:23], 0x1 ; C2009501 s_buffer_load_dword s2, s[20:23], 0x2 ; C2011502 s_buffer_load_dword s6, s[20:23], 0x4 ; C2031504 s_buffer_load_dword s14, s[20:23], 0x8 ; C2071508 s_buffer_load_dword s46, s[20:23], 0x3a ; C217153A s_buffer_load_dword s47, s[20:23], 0x3b ; C217953B s_buffer_load_dword s48, s[20:23], 0x3c ; C218153C s_buffer_load_dword s49, s[20:23], 0x3d ; C218953D s_buffer_load_dword s50, s[20:23], 0x3e ; C219153E s_buffer_load_dword s18, s[20:23], 0x13 ; C2091513 s_buffer_load_dword s15, s[20:23], 0x14 ; C2079514 s_buffer_load_dword s16, s[20:23], 0x15 ; C2081515 s_buffer_load_dword s17, s[20:23], 0x16 ; C2089516 s_buffer_load_dword s51, s[20:23], 0x18 ; C2199518 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v13, s46 ; 7E1A022E s_buffer_load_dword s46, s[20:23], 0x19 ; C2171519 s_buffer_load_dword s52, s[20:23], 0x1a ; C21A151A s_buffer_load_dword s53, s[20:23], 0x1b ; C21A951B s_buffer_load_dword s54, s[20:23], 0x1c ; C21B151C s_buffer_load_dword s55, s[20:23], 0x1d ; C21B951D s_buffer_load_dword s56, s[20:23], 0x1e ; C21C151E s_buffer_load_dword s57, s[20:23], 0x1f ; C21C951F s_buffer_load_dword s58, s[20:23], 0x20 ; C21D1520 s_buffer_load_dword s59, s[20:23], 0x21 ; C21D9521 s_buffer_load_dword s60, s[20:23], 0x22 ; C21E1522 s_buffer_load_dword s61, s[20:23], 0x4c ; C21E954C s_buffer_load_dword s62, s[20:23], 0x4d ; C21F154D s_buffer_load_dword s63, s[20:23], 0x4e ; C21F954E s_buffer_load_dword s64, s[20:23], 0x4f ; C220154F s_buffer_load_dword s65, s[20:23], 0x50 ; C2209550 s_buffer_load_dword s66, s[20:23], 0x51 ; C2211551 s_buffer_load_dword s67, s[20:23], 0x52 ; C2219552 s_buffer_load_dword s20, s[20:23], 0x53 ; C20A1553 v_mac_f32_e32 v13, s36, v9 ; 3E1A1224 v_mov_b32_e32 v14, s47 ; 7E1C022F v_mul_f32_e32 v15, s51, v2 ; 101E0433 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v16, s46, v2 ; 1020042E v_mul_f32_e32 v17, s27, v6 ; 10220C1B v_mul_f32_e32 v18, s30, v6 ; 10240C1E v_mul_f32_e32 v6, s33, v6 ; 100C0C21 v_mac_f32_e32 v17, s28, v7 ; 3E220E1C v_mac_f32_e32 v18, s31, v7 ; 3E240E1F v_mac_f32_e32 v6, s34, v7 ; 3E0C0E22 v_mac_f32_e32 v17, s29, v8 ; 3E22101D v_mac_f32_e32 v18, s32, v8 ; 3E241020 v_mac_f32_e32 v6, s35, v8 ; 3E0C1023 v_mul_f32_e32 v7, s52, v2 ; 100E0434 v_mul_f32_e32 v8, s53, v2 ; 10100435 v_mul_f32_e32 v19, s38, v2 ; 10260426 v_mac_f32_e32 v14, s37, v10 ; 3E1C1425 v_cndmask_b32_e32 v9, v11, v9 ; 0012130B v_cndmask_b32_e32 v10, v12, v10 ; 0014150C v_mac_f32_e32 v15, s54, v3 ; 3E1E0636 v_mac_f32_e32 v16, s55, v3 ; 3E200637 v_mac_f32_e32 v7, s56, v3 ; 3E0E0638 v_mac_f32_e32 v8, s57, v3 ; 3E100639 v_mac_f32_e32 v19, s42, v3 ; 3E26062A v_mul_f32_e32 v11, s39, v2 ; 10160427 v_mac_f32_e32 v11, s43, v3 ; 3E16062B v_mul_f32_e32 v12, s40, v2 ; 10180428 v_mac_f32_e32 v12, s44, v3 ; 3E18062C v_mul_f32_e32 v2, s41, v2 ; 10040429 v_mac_f32_e32 v2, s45, v3 ; 3E04062D v_mac_f32_e32 v15, s58, v4 ; 3E1E083A v_mac_f32_e32 v16, s59, v4 ; 3E20083B v_mac_f32_e32 v7, s60, v4 ; 3E0E083C v_mac_f32_e32 v8, s19, v4 ; 3E100813 v_mac_f32_e32 v19, s61, v4 ; 3E26083D v_mac_f32_e32 v11, s62, v4 ; 3E16083E v_mac_f32_e32 v12, s63, v4 ; 3E18083F v_mac_f32_e32 v2, s64, v4 ; 3E040840 v_mac_f32_e32 v15, s24, v5 ; 3E1E0A18 v_mac_f32_e32 v16, s25, v5 ; 3E200A19 v_mac_f32_e32 v7, s26, v5 ; 3E0E0A1A v_mac_f32_e32 v19, s65, v5 ; 3E260A41 v_mac_f32_e32 v11, s66, v5 ; 3E160A42 v_mac_f32_e32 v12, s67, v5 ; 3E180A43 v_mac_f32_e32 v2, s20, v5 ; 3E040A14 v_mov_b32_e32 v3, s50 ; 7E060232 v_mul_f32_e32 v4, v17, v17 ; 10082311 v_mac_f32_e32 v4, v18, v18 ; 3E082512 v_mac_f32_e32 v4, v6, v6 ; 3E080D06 v_rsq_clamp_f32_e32 v4, v4 ; 7E085904 v_mac_f32_e32 v3, s48, v9 ; 3E061230 v_mac_f32_e32 v0, s49, v10 ; 3E001431 exp 15, 32, 0, 0, 0, v13, v14, v3, v0 ; F800020F 00030E0D s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, v4, v17 ; 10002304 v_mul_f32_e32 v3, v4, v18 ; 10062504 v_mul_f32_e32 v4, v4, v6 ; 10080D04 v_mul_f32_e32 v5, v4, v3 ; 100A0704 v_mul_f32_e32 v6, s11, v5 ; 100C0A0B v_mul_f32_e32 v9, s12, v5 ; 10120A0C v_mul_f32_e32 v5, s13, v5 ; 100A0A0D v_mul_f32_e32 v10, v3, v0 ; 10140103 v_mac_f32_e32 v6, s14, v10 ; 3E0C140E v_mac_f32_e32 v9, s9, v10 ; 3E121409 v_mac_f32_e32 v5, s10, v10 ; 3E0A140A v_mul_f32_e32 v10, v4, v4 ; 10140904 v_mac_f32_e32 v6, s5, v10 ; 3E0C1405 v_mac_f32_e32 v9, s7, v10 ; 3E121407 v_mac_f32_e32 v5, s8, v10 ; 3E0A1408 v_mul_f32_e32 v10, v0, v4 ; 10140900 v_mac_f32_e32 v6, s3, v10 ; 3E0C1403 v_mac_f32_e32 v9, s4, v10 ; 3E121404 v_mac_f32_e32 v5, s18, v10 ; 3E0A1412 exp 15, 33, 0, 0, 0, v0, v3, v4, v1 ; F800021F 01040300 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v3, v3, v3 ; 10060703 v_mad_f32 v0, v0, v0, -v3 ; D2820000 840E0100 v_mac_f32_e32 v6, s15, v0 ; 3E0C000F v_mac_f32_e32 v9, s16, v0 ; 3E120010 v_mac_f32_e32 v5, s17, v0 ; 3E0A0011 v_mul_f32_e32 v0, 0.5, v11 ; 100016F0 v_mul_f32_e32 v3, 0.5, v2 ; 100604F0 exp 15, 34, 0, 0, 0, v6, v9, v5, v1 ; F800022F 01050906 s_waitcnt expcnt(0) ; BF8C070F v_mad_f32 v1, 0.5, v19, v3 ; D2820001 040E26F0 v_mac_f32_e32 v3, s6, v0 ; 3E060006 exp 15, 35, 0, 0, 0, v1, v3, v12, v2 ; F800023F 020C0301 v_subrev_f32_e32 v0, s0, v15 ; 0A001E00 s_waitcnt expcnt(0) ; BF8C070F v_subrev_f32_e32 v1, s1, v16 ; 0A022001 v_subrev_f32_e32 v3, s2, v7 ; 0A060E02 exp 15, 36, 0, 0, 0, v12, v0, v1, v3 ; F800024F 0301000C exp 15, 37, 0, 0, 0, v15, v16, v7, v8 ; F800025F 0807100F exp 15, 12, 0, 1, 0, v19, v11, v12, v2 ; F80008CF 020C0B13 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 72 VGPRS: 20 Code Size: 788 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL IN[4], GENERIC[4], PERSPECTIVE DCL IN[5], GENERIC[5], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SVIEW[0], CUBE, FLOAT DCL SVIEW[1], CUBE, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL SVIEW[4], 2D, FLOAT DCL SVIEW[5], 2D, FLOAT DCL CONST[0..5] DCL CONST[8..20] DCL CONST[23..24] DCL CONST[26] DCL TEMP[0..18], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 2.0000, 0.5000} IMM[1] FLT32 { 0.7500, 7.0000, 1.0000, 10.0000} IMM[2] FLT32 { 0.9680, 0.0300, 0.0001, -1.0000} 0: DP3 TEMP[0].x, IN[1].xyzz, IN[1].xyzz 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MUL TEMP[0].xyz, IN[1].xyzz, TEMP[0].xxxx 3: DP3 TEMP[1].x, IN[4].yzww, IN[4].yzww 4: RSQ TEMP[1].x, TEMP[1].xxxx 5: MUL TEMP[1].xyz, IN[4].yzww, TEMP[1].xxxx 6: MOV TEMP[2].xy, IN[0].xyyy 7: TEX TEMP[2].x, TEMP[2], SAMP[3], 2D 8: MOV TEMP[3].xyz, IMM[0].xxxx 9: FSLT TEMP[2].x, IMM[0].yyyy, TEMP[2].xxxx 10: UIF TEMP[2].xxxx :0 11: MUL TEMP[2].xyz, CONST[20].xyzz, CONST[19].xyzz 12: MOV TEMP[4].xy, IN[0].xyyy 13: TEX TEMP[4].xyz, TEMP[4], SAMP[2], 2D 14: MUL TEMP[3].xyz, TEMP[2].xyzz, TEMP[4].xyzz 15: ELSE :0 16: MOV TEMP[2].xy, IN[0].xyyy 17: TEX TEMP[2].xyz, TEMP[2], SAMP[2], 2D 18: MUL TEMP[3].xyz, CONST[19].xyzz, TEMP[2].xyzz 19: ENDIF 20: LRP TEMP[2].xyz, CONST[23].xxxx, TEMP[3].xyzz, CONST[16].xyzz 21: MUL TEMP[4].x, CONST[23].xxxx, CONST[16].wwww 22: ADD TEMP[4].x, CONST[16].wwww, -TEMP[4].xxxx 23: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx 24: MOV TEMP[5].xy, IN[0].xyyy 25: TEX TEMP[5].y, TEMP[5], SAMP[4], 2D 26: ADD TEMP[6].x, IMM[0].xxxx, -CONST[26].xxxx 27: MAD TEMP[5].x, TEMP[5].yyyy, CONST[26].xxxx, TEMP[6].xxxx 28: DP3 TEMP[6].x, TEMP[0].xyzz, CONST[0].xyzz 29: MAX TEMP[6].x, IMM[0].yyyy, TEMP[6].xxxx 30: MOV TEMP[7].xyz, IMM[0].yyyy 31: MOV TEMP[8].w, IMM[0].xxxx 32: MOV TEMP[8].xyz, TEMP[0].xyzx 33: DP4 TEMP[9].x, CONST[1], TEMP[8] 34: DP4 TEMP[10].x, CONST[2], TEMP[8] 35: MOV TEMP[9].y, TEMP[10].xxxx 36: DP4 TEMP[8].x, CONST[3], TEMP[8] 37: MOV TEMP[9].z, TEMP[8].xxxx 38: ADD TEMP[8].xyz, IN[2].xyzz, TEMP[9].xyzz 39: MOV TEMP[9].xy, IN[3].xyyy 40: MOV TEMP[9].w, IN[3].wwww 41: TXP TEMP[9].x, TEMP[9], SAMP[5], 2D 42: MUL TEMP[9].xyz, CONST[17].xyzz, TEMP[9].xxxx 43: MUL TEMP[8].xyz, TEMP[8].xyzz, TEMP[5].xxxx 44: DP3 TEMP[10].x, TEMP[0].xyzz, TEMP[1].xyzz 45: MUL TEMP[10].xyz, TEMP[10].xxxx, TEMP[0].xyzz 46: MUL TEMP[10].xyz, IMM[0].zzzz, TEMP[10].xyzz 47: ADD TEMP[10].xyz, TEMP[1].xyzz, -TEMP[10].xyzz 48: MOV TEMP[11].xyz, TEMP[10].xyzx 49: FSLT TEMP[12].x, IMM[0].yyyy, CONST[10].wwww 50: UIF TEMP[12].xxxx :0 51: DP3 TEMP[12].x, TEMP[10].xyzz, TEMP[10].xyzz 52: RSQ TEMP[12].x, TEMP[12].xxxx 53: MUL TEMP[12].xyz, TEMP[10].xyzz, TEMP[12].xxxx 54: MOV TEMP[13].xyz, -IN[5].xyzx 55: ADD TEMP[14].xyz, CONST[8].xyzz, TEMP[13].xyzz 56: RCP TEMP[15].x, TEMP[12].xxxx 57: RCP TEMP[15].y, TEMP[12].yyyy 58: RCP TEMP[15].z, TEMP[12].zzzz 59: MUL TEMP[14].xyz, TEMP[14].xyzz, TEMP[15].xyzz 60: ADD TEMP[13].xyz, CONST[9].xyzz, TEMP[13].xyzz 61: RCP TEMP[15].x, TEMP[12].xxxx 62: RCP TEMP[15].y, TEMP[12].yyyy 63: RCP TEMP[15].z, TEMP[12].zzzz 64: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[15].xyzz 65: FSLT TEMP[15].xyz, IMM[0].yyyy, TEMP[12].xyzz 66: UIF TEMP[15].xxxx :0 67: MOV TEMP[16].x, TEMP[14].xxxx 68: ELSE :0 69: MOV TEMP[16].x, TEMP[13].xxxx 70: ENDIF 71: UIF TEMP[15].yyyy :0 72: MOV TEMP[17].x, TEMP[14].yyyy 73: ELSE :0 74: MOV TEMP[17].x, TEMP[13].yyyy 75: ENDIF 76: UIF TEMP[15].zzzz :0 77: MOV TEMP[14].x, TEMP[14].zzzz 78: ELSE :0 79: MOV TEMP[14].x, TEMP[13].zzzz 80: ENDIF 81: ADD TEMP[13].xyz, CONST[8].xyzz, CONST[9].xyzz 82: MUL TEMP[13].xyz, TEMP[13].xyzz, IMM[0].wwww 83: MIN TEMP[15].x, TEMP[16].xxxx, TEMP[17].xxxx 84: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[14].xxxx 85: ADD TEMP[15].xyz, TEMP[13].xyzz, -CONST[10].xyzz 86: ADD TEMP[15].xyz, TEMP[15].xyzz, IN[5].xyzz 87: MAD TEMP[12].xyz, TEMP[12].xyzz, TEMP[14].xxxx, TEMP[15].xyzz 88: ADD TEMP[11].xyz, TEMP[12].xyzz, -TEMP[13].xyzz 89: ENDIF 90: ADD TEMP[12].x, IMM[0].xxxx, -CONST[24].xxxx 91: POW TEMP[12].x, TEMP[12].xxxx, IMM[1].xxxx 92: MUL TEMP[12].x, TEMP[12].xxxx, IMM[1].yyyy 93: MOV TEMP[11].xyz, TEMP[11].xyzz 94: MOV TEMP[11].w, TEMP[12].xxxx 95: TXL TEMP[11], TEMP[11], SAMP[0], CUBE 96: POW TEMP[12].x, TEMP[11].wwww, CONST[11].yyyy 97: MUL TEMP[12].x, CONST[11].xxxx, TEMP[12].xxxx 98: MUL TEMP[11].xyz, TEMP[12].xxxx, TEMP[11].xyzz 99: FSLT TEMP[12].x, CONST[9].wwww, IMM[1].zzzz 100: UIF TEMP[12].xxxx :0 101: MOV TEMP[12].xyz, TEMP[10].xyzx 102: FSLT TEMP[13].x, IMM[0].yyyy, CONST[14].wwww 103: UIF TEMP[13].xxxx :0 104: DP3 TEMP[13].x, TEMP[10].xyzz, TEMP[10].xyzz 105: RSQ TEMP[13].x, TEMP[13].xxxx 106: MUL TEMP[10].xyz, TEMP[10].xyzz, TEMP[13].xxxx 107: MOV TEMP[13].xyz, -IN[5].xyzx 108: ADD TEMP[14].xyz, CONST[12].xyzz, TEMP[13].xyzz 109: RCP TEMP[15].x, TEMP[10].xxxx 110: RCP TEMP[15].y, TEMP[10].yyyy 111: RCP TEMP[15].z, TEMP[10].zzzz 112: MUL TEMP[14].xyz, TEMP[14].xyzz, TEMP[15].xyzz 113: ADD TEMP[13].xyz, CONST[13].xyzz, TEMP[13].xyzz 114: RCP TEMP[15].x, TEMP[10].xxxx 115: RCP TEMP[15].y, TEMP[10].yyyy 116: RCP TEMP[15].z, TEMP[10].zzzz 117: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[15].xyzz 118: FSLT TEMP[15].xyz, IMM[0].yyyy, TEMP[10].xyzz 119: UIF TEMP[15].xxxx :0 120: MOV TEMP[16].x, TEMP[14].xxxx 121: ELSE :0 122: MOV TEMP[16].x, TEMP[13].xxxx 123: ENDIF 124: UIF TEMP[15].yyyy :0 125: MOV TEMP[17].x, TEMP[14].yyyy 126: ELSE :0 127: MOV TEMP[17].x, TEMP[13].yyyy 128: ENDIF 129: UIF TEMP[15].zzzz :0 130: MOV TEMP[14].x, TEMP[14].zzzz 131: ELSE :0 132: MOV TEMP[14].x, TEMP[13].zzzz 133: ENDIF 134: ADD TEMP[13].xyz, CONST[12].xyzz, CONST[13].xyzz 135: MUL TEMP[13].xyz, TEMP[13].xyzz, IMM[0].wwww 136: MIN TEMP[15].x, TEMP[16].xxxx, TEMP[17].xxxx 137: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[14].xxxx 138: ADD TEMP[15].xyz, TEMP[13].xyzz, -CONST[14].xyzz 139: ADD TEMP[15].xyz, TEMP[15].xyzz, IN[5].xyzz 140: MAD TEMP[10].xyz, TEMP[10].xyzz, TEMP[14].xxxx, TEMP[15].xyzz 141: ADD TEMP[12].xyz, TEMP[10].xyzz, -TEMP[13].xyzz 142: ENDIF 143: ADD TEMP[10].x, IMM[0].xxxx, -CONST[24].xxxx 144: POW TEMP[10].x, TEMP[10].xxxx, IMM[1].xxxx 145: MUL TEMP[10].x, TEMP[10].xxxx, IMM[1].yyyy 146: MOV TEMP[12].xyz, TEMP[12].xyzz 147: MOV TEMP[12].w, TEMP[10].xxxx 148: TXL TEMP[10], TEMP[12], SAMP[1], CUBE 149: POW TEMP[12].x, TEMP[10].wwww, CONST[15].yyyy 150: MUL TEMP[12].x, CONST[15].xxxx, TEMP[12].xxxx 151: MUL TEMP[10].xyz, TEMP[12].xxxx, TEMP[10].xyzz 152: LRP TEMP[7].xyz, CONST[9].wwww, TEMP[11].xyzz, TEMP[10].xyzz 153: ELSE :0 154: MOV TEMP[7].xyz, TEMP[11].xyzx 155: ENDIF 156: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[5].xxxx 157: MOV TEMP[1].xyz, -TEMP[1].xyzx 158: ADD TEMP[5].x, IMM[0].xxxx, -CONST[24].xxxx 159: ADD TEMP[10].xyz, CONST[0].xyzz, TEMP[1].xyzz 160: DP3 TEMP[11].x, TEMP[10].xyzz, TEMP[10].xyzz 161: RSQ TEMP[11].x, TEMP[11].xxxx 162: MUL TEMP[10].xyz, TEMP[10].xyzz, TEMP[11].xxxx 163: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[1].xyzz 164: MAX TEMP[1].x, IMM[0].yyyy, TEMP[1].xxxx 165: DP3 TEMP[11].x, CONST[0].xyzz, TEMP[10].xyzz 166: MAX TEMP[11].x, IMM[0].yyyy, TEMP[11].xxxx 167: MUL TEMP[12].x, TEMP[5].xxxx, TEMP[5].xxxx 168: MUL TEMP[12].x, TEMP[12].xxxx, CONST[18].wwww 169: ADD TEMP[13].x, IMM[0].xxxx, -TEMP[5].xxxx 170: MAD TEMP[13].x, TEMP[13].xxxx, IMM[2].xxxx, IMM[2].yyyy 171: LG2 TEMP[13].x, TEMP[13].xxxx 172: RCP TEMP[13].x, TEMP[13].xxxx 173: MUL TEMP[13].x, IMM[1].wwww, TEMP[13].xxxx 174: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[13].xxxx 175: ADD TEMP[14].x, IMM[0].xxxx, -TEMP[6].xxxx 176: ADD TEMP[15].x, IMM[0].xxxx, -TEMP[1].xxxx 177: MUL TEMP[16].x, IMM[0].zzzz, TEMP[11].xxxx 178: MUL TEMP[5].x, TEMP[11].xxxx, TEMP[5].xxxx 179: MAD TEMP[5].x, TEMP[16].xxxx, TEMP[5].xxxx, IMM[0].wwww 180: ADD TEMP[11].x, IMM[0].xxxx, -TEMP[11].xxxx 181: ADD TEMP[16].x, IMM[0].xxxx, -TEMP[1].xxxx 182: ADD TEMP[4].x, IMM[0].xxxx, -TEMP[4].xxxx 183: ADD TEMP[4].x, CONST[24].xxxx, TEMP[4].xxxx 184: MOV_SAT TEMP[4].x, TEMP[4].xxxx 185: MUL TEMP[17].x, TEMP[16].xxxx, TEMP[16].xxxx 186: MUL TEMP[18].x, TEMP[16].xxxx, TEMP[16].xxxx 187: MUL TEMP[16].x, TEMP[18].xxxx, TEMP[16].xxxx 188: MUL TEMP[16].x, TEMP[17].xxxx, TEMP[16].xxxx 189: LRP TEMP[4].xyz, TEMP[16].xxxx, TEMP[4].xxxx, TEMP[2].xyzz 190: LRP TEMP[16].x, TEMP[6].xxxx, IMM[0].xxxx, TEMP[12].xxxx 191: LRP TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx, TEMP[12].xxxx 192: MAD TEMP[1].x, TEMP[16].xxxx, TEMP[1].xxxx, IMM[2].zzzz 193: RCP TEMP[1].x, TEMP[1].xxxx 194: DP3 TEMP[10].x, TEMP[0].xyzz, TEMP[10].xyzz 195: MAX TEMP[10].x, IMM[0].yyyy, TEMP[10].xxxx 196: POW TEMP[10].x, TEMP[10].xxxx, TEMP[13].xxxx 197: ADD TEMP[12].x, TEMP[13].xxxx, IMM[0].xxxx 198: MUL TEMP[12].x, TEMP[12].xxxx, CONST[18].yyyy 199: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[12].xxxx 200: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[10].xxxx 201: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[6].xxxx 202: MUL TEMP[1].x, TEMP[1].xxxx, CONST[18].xxxx 203: MAX TEMP[1].x, IMM[0].yyyy, TEMP[1].xxxx 204: MUL TEMP[1].xyz, TEMP[1].xxxx, TEMP[9].xyzz 205: ADD TEMP[10].xyz, IMM[0].xxxx, -TEMP[2].xyzz 206: MUL TEMP[12].x, TEMP[11].xxxx, TEMP[11].xxxx 207: MUL TEMP[13].x, TEMP[11].xxxx, TEMP[11].xxxx 208: MUL TEMP[11].x, TEMP[13].xxxx, TEMP[11].xxxx 209: MUL TEMP[11].x, TEMP[12].xxxx, TEMP[11].xxxx 210: MAD TEMP[2].xyz, TEMP[10].xyzz, TEMP[11].xxxx, TEMP[2].xyzz 211: ADD TEMP[10].x, TEMP[5].xxxx, IMM[2].wwww 212: MUL TEMP[11].x, TEMP[14].xxxx, TEMP[14].xxxx 213: MUL TEMP[12].x, TEMP[14].xxxx, TEMP[14].xxxx 214: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[14].xxxx 215: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[12].xxxx 216: MAD TEMP[10].x, TEMP[10].xxxx, TEMP[11].xxxx, IMM[0].xxxx 217: ADD TEMP[5].x, TEMP[5].xxxx, IMM[2].wwww 218: MUL TEMP[11].x, TEMP[15].xxxx, TEMP[15].xxxx 219: MUL TEMP[12].x, TEMP[15].xxxx, TEMP[15].xxxx 220: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[15].xxxx 221: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[12].xxxx 222: MAD TEMP[5].x, TEMP[5].xxxx, TEMP[11].xxxx, IMM[0].xxxx 223: MUL TEMP[5].x, TEMP[10].xxxx, TEMP[5].xxxx 224: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[6].xxxx 225: MAD TEMP[5].xyz, TEMP[9].xyzz, TEMP[5].xxxx, TEMP[8].xyzz 226: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[5].xyzz 227: MAD TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xyzz, TEMP[3].xyzz 228: MAD TEMP[0].xyz, TEMP[7].xyzz, TEMP[4].xyzz, TEMP[1].xyzz 229: MOV TEMP[0].xyz, TEMP[0].xyzx 230: MAD TEMP[1].x, IN[4].xxxx, CONST[5].zzzz, CONST[5].wwww 231: MOV_SAT TEMP[1].x, TEMP[1].xxxx 232: LRP TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[4].xyzz 233: MOV TEMP[0].xyz, TEMP[0].xyzx 234: MOV TEMP[0].w, IMM[0].xxxx 235: MOV OUT[0], TEMP[0] 236: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 172) %55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200) %60 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208) %61 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212) %62 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216) %63 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224) %64 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228) %65 = call float @llvm.SI.load.const(<16 x i8> %23, i32 232) %66 = call float @llvm.SI.load.const(<16 x i8> %23, i32 236) %67 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240) %68 = call float @llvm.SI.load.const(<16 x i8> %23, i32 244) %69 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256) %70 = call float @llvm.SI.load.const(<16 x i8> %23, i32 260) %71 = call float @llvm.SI.load.const(<16 x i8> %23, i32 264) %72 = call float @llvm.SI.load.const(<16 x i8> %23, i32 268) %73 = call float @llvm.SI.load.const(<16 x i8> %23, i32 272) %74 = call float @llvm.SI.load.const(<16 x i8> %23, i32 276) %75 = call float @llvm.SI.load.const(<16 x i8> %23, i32 280) %76 = call float @llvm.SI.load.const(<16 x i8> %23, i32 288) %77 = call float @llvm.SI.load.const(<16 x i8> %23, i32 292) %78 = call float @llvm.SI.load.const(<16 x i8> %23, i32 300) %79 = call float @llvm.SI.load.const(<16 x i8> %23, i32 304) %80 = call float @llvm.SI.load.const(<16 x i8> %23, i32 308) %81 = call float @llvm.SI.load.const(<16 x i8> %23, i32 312) %82 = call float @llvm.SI.load.const(<16 x i8> %23, i32 368) %83 = call float @llvm.SI.load.const(<16 x i8> %23, i32 384) %84 = call float @llvm.SI.load.const(<16 x i8> %23, i32 416) %85 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %86 = load <32 x i8>, <32 x i8> addrspace(2)* %85, align 32, !tbaa !0 %87 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %88 = load <16 x i8>, <16 x i8> addrspace(2)* %87, align 16, !tbaa !0 %89 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %90 = bitcast <8 x i32> addrspace(2)* %89 to <32 x i8> addrspace(2)* %91 = load <32 x i8>, <32 x i8> addrspace(2)* %90, align 32, !tbaa !0 %92 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %93 = bitcast <4 x i32> addrspace(2)* %92 to <16 x i8> addrspace(2)* %94 = load <16 x i8>, <16 x i8> addrspace(2)* %93, align 16, !tbaa !0 %95 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %96 = load <8 x i32>, <8 x i32> addrspace(2)* %95, align 32, !tbaa !0 %97 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %98 = load <4 x i32>, <4 x i32> addrspace(2)* %97, align 16, !tbaa !0 %99 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %100 = bitcast <8 x i32> addrspace(2)* %99 to <32 x i8> addrspace(2)* %101 = load <32 x i8>, <32 x i8> addrspace(2)* %100, align 32, !tbaa !0 %102 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %103 = bitcast <4 x i32> addrspace(2)* %102 to <16 x i8> addrspace(2)* %104 = load <16 x i8>, <16 x i8> addrspace(2)* %103, align 16, !tbaa !0 %105 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %106 = bitcast <8 x i32> addrspace(2)* %105 to <32 x i8> addrspace(2)* %107 = load <32 x i8>, <32 x i8> addrspace(2)* %106, align 32, !tbaa !0 %108 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %109 = bitcast <4 x i32> addrspace(2)* %108 to <16 x i8> addrspace(2)* %110 = load <16 x i8>, <16 x i8> addrspace(2)* %109, align 16, !tbaa !0 %111 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5 %112 = bitcast <8 x i32> addrspace(2)* %111 to <32 x i8> addrspace(2)* %113 = load <32 x i8>, <32 x i8> addrspace(2)* %112, align 32, !tbaa !0 %114 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5 %115 = bitcast <4 x i32> addrspace(2)* %114 to <16 x i8> addrspace(2)* %116 = load <16 x i8>, <16 x i8> addrspace(2)* %115, align 16, !tbaa !0 %117 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %118 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %119 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %120 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %121 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %122 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %123 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %124 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %125 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %126 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %127 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7) %128 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %129 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %130 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %131 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7) %132 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7) %133 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %134 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7) %135 = fmul float %119, %119 %136 = fmul float %120, %120 %137 = fadd float %136, %135 %138 = fmul float %121, %121 %139 = fadd float %137, %138 %140 = call float @llvm.AMDGPU.rsq.clamped.f32(float %139) %141 = fmul float %119, %140 %142 = fmul float %120, %140 %143 = fmul float %121, %140 %144 = fmul float %129, %129 %145 = fmul float %130, %130 %146 = fadd float %145, %144 %147 = fmul float %131, %131 %148 = fadd float %146, %147 %149 = call float @llvm.AMDGPU.rsq.clamped.f32(float %148) %150 = fmul float %129, %149 %151 = fmul float %130, %149 %152 = fmul float %131, %149 %153 = bitcast float %117 to i32 %154 = bitcast float %118 to i32 %155 = insertelement <2 x i32> undef, i32 %153, i32 0 %156 = insertelement <2 x i32> %155, i32 %154, i32 1 %157 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %156, <32 x i8> %101, <16 x i8> %104, i32 2) %158 = extractelement <4 x float> %157, i32 0 %159 = fcmp ogt float %158, 0.000000e+00 br i1 %159, label %IF, label %ELSE IF: ; preds = %main_body %160 = call float @llvm.SI.load.const(<16 x i8> %23, i32 328) %161 = call float @llvm.SI.load.const(<16 x i8> %23, i32 324) %162 = call float @llvm.SI.load.const(<16 x i8> %23, i32 320) %163 = fmul float %162, %79 %164 = fmul float %161, %80 %165 = fmul float %160, %81 %166 = bitcast float %117 to i32 %167 = bitcast float %118 to i32 %168 = insertelement <2 x i32> undef, i32 %166, i32 0 %169 = insertelement <2 x i32> %168, i32 %167, i32 1 %170 = bitcast <8 x i32> %96 to <32 x i8> %171 = bitcast <4 x i32> %98 to <16 x i8> %172 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %169, <32 x i8> %170, <16 x i8> %171, i32 2) %173 = extractelement <4 x float> %172, i32 0 %174 = extractelement <4 x float> %172, i32 1 %175 = extractelement <4 x float> %172, i32 2 %176 = fmul float %163, %173 %177 = fmul float %164, %174 %178 = fmul float %165, %175 br label %ENDIF ELSE: ; preds = %main_body %179 = bitcast float %117 to i32 %180 = bitcast float %118 to i32 %181 = insertelement <2 x i32> undef, i32 %179, i32 0 %182 = insertelement <2 x i32> %181, i32 %180, i32 1 %183 = bitcast <8 x i32> %96 to <32 x i8> %184 = bitcast <4 x i32> %98 to <16 x i8> %185 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %182, <32 x i8> %183, <16 x i8> %184, i32 2) %186 = extractelement <4 x float> %185, i32 0 %187 = extractelement <4 x float> %185, i32 1 %188 = extractelement <4 x float> %185, i32 2 %189 = fmul float %79, %186 %190 = fmul float %80, %187 %191 = fmul float %81, %188 br label %ENDIF ENDIF: ; preds = %ELSE, %IF %temp14.0 = phi float [ %178, %IF ], [ %191, %ELSE ] %temp13.0 = phi float [ %177, %IF ], [ %190, %ELSE ] %temp12.0 = phi float [ %176, %IF ], [ %189, %ELSE ] %192 = call float @llvm.AMDGPU.lrp(float %82, float %temp12.0, float %69) %193 = call float @llvm.AMDGPU.lrp(float %82, float %temp13.0, float %70) %194 = call float @llvm.AMDGPU.lrp(float %82, float %temp14.0, float %71) %195 = fmul float %82, %72 %196 = fsub float %72, %195 %197 = fmul float %temp12.0, %196 %198 = fmul float %temp13.0, %196 %199 = fmul float %temp14.0, %196 %200 = bitcast float %117 to i32 %201 = bitcast float %118 to i32 %202 = insertelement <2 x i32> undef, i32 %200, i32 0 %203 = insertelement <2 x i32> %202, i32 %201, i32 1 %204 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %203, <32 x i8> %107, <16 x i8> %110, i32 2) %205 = extractelement <4 x float> %204, i32 1 %206 = fsub float 1.000000e+00, %84 %207 = fmul float %205, %84 %208 = fadd float %207, %206 %209 = fmul float %141, %24 %210 = fmul float %142, %25 %211 = fadd float %210, %209 %212 = fmul float %143, %26 %213 = fadd float %211, %212 %214 = call float @llvm.maxnum.f32(float %213, float 0.000000e+00) %215 = fmul float %27, %141 %216 = fmul float %28, %142 %217 = fadd float %215, %216 %218 = fmul float %29, %143 %219 = fadd float %217, %218 %220 = fadd float %219, %30 %221 = fmul float %31, %141 %222 = fmul float %32, %142 %223 = fadd float %221, %222 %224 = fmul float %33, %143 %225 = fadd float %223, %224 %226 = fadd float %225, %34 %227 = fmul float %35, %141 %228 = fmul float %36, %142 %229 = fadd float %227, %228 %230 = fmul float %37, %143 %231 = fadd float %229, %230 %232 = fadd float %231, %38 %233 = fadd float %122, %220 %234 = fadd float %123, %226 %235 = fadd float %124, %232 %236 = fdiv float %125, %127 %237 = fdiv float %126, %127 %238 = bitcast float %236 to i32 %239 = bitcast float %237 to i32 %240 = insertelement <2 x i32> undef, i32 %238, i32 0 %241 = insertelement <2 x i32> %240, i32 %239, i32 1 %242 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %241, <32 x i8> %113, <16 x i8> %116, i32 2) %243 = extractelement <4 x float> %242, i32 0 %244 = fmul float %73, %243 %245 = fmul float %74, %243 %246 = fmul float %75, %243 %247 = fmul float %233, %208 %248 = fmul float %234, %208 %249 = fmul float %235, %208 %250 = fmul float %141, %150 %251 = fmul float %142, %151 %252 = fadd float %251, %250 %253 = fmul float %143, %152 %254 = fadd float %252, %253 %255 = fmul float %254, %141 %256 = fmul float %254, %142 %257 = fmul float %254, %143 %258 = fmul float %255, 2.000000e+00 %259 = fmul float %256, 2.000000e+00 %260 = fmul float %257, 2.000000e+00 %261 = fsub float %150, %258 %262 = fsub float %151, %259 %263 = fsub float %152, %260 %264 = fcmp ogt float %54, 0.000000e+00 br i1 %264, label %IF77, label %ENDIF76 IF77: ; preds = %ENDIF %265 = fmul float %261, %261 %266 = fmul float %262, %262 %267 = fadd float %266, %265 %268 = fmul float %263, %263 %269 = fadd float %267, %268 %270 = call float @llvm.AMDGPU.rsq.clamped.f32(float %269) %271 = fmul float %261, %270 %272 = fmul float %262, %270 %273 = fmul float %263, %270 %274 = fsub float %44, %132 %275 = fsub float %45, %133 %276 = fsub float %46, %134 %277 = fdiv float 1.000000e+00, %271 %278 = fdiv float 1.000000e+00, %272 %279 = fdiv float 1.000000e+00, %273 %280 = fmul float %274, %277 %281 = fmul float %275, %278 %282 = fmul float %276, %279 %283 = fsub float %47, %132 %284 = fsub float %48, %133 %285 = fsub float %49, %134 %286 = fdiv float 1.000000e+00, %271 %287 = fdiv float 1.000000e+00, %272 %288 = fdiv float 1.000000e+00, %273 %289 = fmul float %283, %286 %290 = fmul float %284, %287 %291 = fmul float %285, %288 %292 = fcmp ogt float %271, 0.000000e+00 %293 = fcmp ogt float %272, 0.000000e+00 %294 = fcmp ogt float %273, 0.000000e+00 %. = select i1 %292, float %280, float %289 %temp68.0 = select i1 %293, float %281, float %290 %.103 = select i1 %294, float %282, float %291 %295 = fadd float %44, %47 %296 = fadd float %45, %48 %297 = fadd float %46, %49 %298 = fmul float %295, 5.000000e-01 %299 = fmul float %296, 5.000000e-01 %300 = fmul float %297, 5.000000e-01 %301 = call float @llvm.minnum.f32(float %., float %temp68.0) %302 = call float @llvm.minnum.f32(float %301, float %.103) %303 = fsub float %298, %51 %304 = fsub float %299, %52 %305 = fsub float %300, %53 %306 = fadd float %303, %132 %307 = fadd float %304, %133 %308 = fadd float %305, %134 %309 = fmul float %271, %302 %310 = fadd float %309, %306 %311 = fmul float %272, %302 %312 = fadd float %311, %307 %313 = fmul float %273, %302 %314 = fadd float %313, %308 %315 = fsub float %310, %298 %316 = fsub float %312, %299 %317 = fsub float %314, %300 br label %ENDIF76 ENDIF76: ; preds = %ENDIF, %IF77 %temp44.0 = phi float [ %315, %IF77 ], [ %261, %ENDIF ] %temp45.0 = phi float [ %316, %IF77 ], [ %262, %ENDIF ] %temp46.0 = phi float [ %317, %IF77 ], [ %263, %ENDIF ] %318 = fsub float 1.000000e+00, %83 %319 = call float @llvm.pow.f32(float %318, float 7.500000e-01) %320 = fmul float %319, 7.000000e+00 %321 = insertelement <4 x float> undef, float %temp44.0, i32 0 %322 = insertelement <4 x float> %321, float %temp45.0, i32 1 %323 = insertelement <4 x float> %322, float %temp46.0, i32 2 %324 = insertelement <4 x float> %323, float %320, i32 3 %325 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %324) %326 = extractelement <4 x float> %325, i32 0 %327 = extractelement <4 x float> %325, i32 1 %328 = extractelement <4 x float> %325, i32 2 %329 = extractelement <4 x float> %325, i32 3 %330 = call float @llvm.fabs.f32(float %328) %331 = fdiv float 1.000000e+00, %330 %332 = fmul float %326, %331 %333 = fadd float %332, 1.500000e+00 %334 = fmul float %327, %331 %335 = fadd float %334, 1.500000e+00 %336 = bitcast float %335 to i32 %337 = bitcast float %333 to i32 %338 = bitcast float %329 to i32 %339 = bitcast float %320 to i32 %340 = insertelement <4 x i32> undef, i32 %336, i32 0 %341 = insertelement <4 x i32> %340, i32 %337, i32 1 %342 = insertelement <4 x i32> %341, i32 %338, i32 2 %343 = insertelement <4 x i32> %342, i32 %339, i32 3 %344 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %343, <32 x i8> %86, <16 x i8> %88, i32 4) %345 = extractelement <4 x float> %344, i32 0 %346 = extractelement <4 x float> %344, i32 1 %347 = extractelement <4 x float> %344, i32 2 %348 = extractelement <4 x float> %344, i32 3 %349 = call float @llvm.pow.f32(float %348, float %56) %350 = fmul float %55, %349 %351 = fmul float %350, %345 %352 = fmul float %350, %346 %353 = fmul float %350, %347 %354 = fcmp olt float %50, 0x3FEFFFEB00000000 br i1 %354, label %IF89, label %ENDIF88 IF89: ; preds = %ENDIF76 %355 = fcmp ogt float %66, 0.000000e+00 br i1 %355, label %IF92, label %ENDIF91 ENDIF88: ; preds = %ENDIF76, %ENDIF91 %temp28.0 = phi float [ %583, %ENDIF91 ], [ %351, %ENDIF76 ] %temp29.0 = phi float [ %584, %ENDIF91 ], [ %352, %ENDIF76 ] %temp30.0 = phi float [ %585, %ENDIF91 ], [ %353, %ENDIF76 ] %356 = fmul float %temp28.0, %208 %357 = fmul float %temp29.0, %208 %358 = fmul float %temp30.0, %208 %359 = fsub float 1.000000e+00, %83 %360 = fsub float %24, %150 %361 = fsub float %25, %151 %362 = fsub float %26, %152 %363 = fmul float %360, %360 %364 = fmul float %361, %361 %365 = fadd float %364, %363 %366 = fmul float %362, %362 %367 = fadd float %365, %366 %368 = call float @llvm.AMDGPU.rsq.clamped.f32(float %367) %369 = fmul float %360, %368 %370 = fmul float %361, %368 %371 = fmul float %362, %368 %372 = fmul float %150, %141 %373 = fsub float -0.000000e+00, %372 %374 = fmul float %151, %142 %375 = fsub float %373, %374 %376 = fmul float %152, %143 %377 = fsub float %375, %376 %378 = call float @llvm.maxnum.f32(float %377, float 0.000000e+00) %379 = fmul float %24, %369 %380 = fmul float %25, %370 %381 = fadd float %380, %379 %382 = fmul float %26, %371 %383 = fadd float %381, %382 %384 = call float @llvm.maxnum.f32(float %383, float 0.000000e+00) %385 = fmul float %359, %359 %386 = fmul float %385, %78 %387 = fsub float 1.000000e+00, %359 %388 = fmul float %387, 0x3FEEF9DB20000000 %389 = fadd float %388, 0x3F9EB851E0000000 %390 = call float @llvm.log2.f32(float %389) %391 = fdiv float 1.000000e+00, %390 %392 = fmul float %391, 1.000000e+01 %393 = fmul float %392, %392 %394 = fsub float 1.000000e+00, %214 %395 = fsub float 1.000000e+00, %378 %396 = fmul float %384, 2.000000e+00 %397 = fmul float %384, %359 %398 = fmul float %396, %397 %399 = fadd float %398, 5.000000e-01 %400 = fsub float 1.000000e+00, %384 %401 = fsub float 1.000000e+00, %378 %402 = fsub float 1.000000e+00, %196 %403 = fadd float %83, %402 %404 = call float @llvm.AMDIL.clamp.(float %403, float 0.000000e+00, float 1.000000e+00) %405 = fmul float %401, %401 %406 = fmul float %401, %401 %407 = fmul float %406, %401 %408 = fmul float %405, %407 %409 = call float @llvm.AMDGPU.lrp(float %408, float %404, float %192) %410 = call float @llvm.AMDGPU.lrp(float %408, float %404, float %193) %411 = call float @llvm.AMDGPU.lrp(float %408, float %404, float %194) %412 = call float @llvm.AMDGPU.lrp(float %214, float 1.000000e+00, float %386) %413 = call float @llvm.AMDGPU.lrp(float %378, float 1.000000e+00, float %386) %414 = fmul float %412, %413 %415 = fadd float %414, 0x3F1A36E2E0000000 %416 = fdiv float 1.000000e+00, %415 %417 = fmul float %141, %369 %418 = fmul float %142, %370 %419 = fadd float %418, %417 %420 = fmul float %143, %371 %421 = fadd float %419, %420 %422 = call float @llvm.maxnum.f32(float %421, float 0.000000e+00) %423 = call float @llvm.pow.f32(float %422, float %393) %424 = fadd float %393, 1.000000e+00 %425 = fmul float %424, %77 %426 = fmul float %423, %425 %427 = fmul float %416, %426 %428 = fmul float %427, %214 %429 = fmul float %428, %76 %430 = call float @llvm.maxnum.f32(float %429, float 0.000000e+00) %431 = fmul float %430, %244 %432 = fmul float %430, %245 %433 = fmul float %430, %246 %434 = fsub float 1.000000e+00, %192 %435 = fsub float 1.000000e+00, %193 %436 = fsub float 1.000000e+00, %194 %437 = fmul float %400, %400 %438 = fmul float %400, %400 %439 = fmul float %438, %400 %440 = fmul float %437, %439 %441 = fmul float %434, %440 %442 = fadd float %441, %192 %443 = fmul float %435, %440 %444 = fadd float %443, %193 %445 = fmul float %436, %440 %446 = fadd float %445, %194 %447 = fadd float %399, -1.000000e+00 %448 = fmul float %394, %394 %449 = fmul float %394, %394 %450 = fmul float %449, %394 %451 = fmul float %448, %450 %452 = fmul float %447, %451 %453 = fadd float %452, 1.000000e+00 %454 = fadd float %399, -1.000000e+00 %455 = fmul float %395, %395 %456 = fmul float %395, %395 %457 = fmul float %456, %395 %458 = fmul float %455, %457 %459 = fmul float %454, %458 %460 = fadd float %459, 1.000000e+00 %461 = fmul float %453, %460 %462 = fmul float %461, %214 %463 = fmul float %244, %462 %464 = fadd float %463, %247 %465 = fmul float %245, %462 %466 = fadd float %465, %248 %467 = fmul float %246, %462 %468 = fadd float %467, %249 %469 = fmul float %197, %464 %470 = fmul float %198, %466 %471 = fmul float %199, %468 %472 = fmul float %431, %442 %473 = fadd float %472, %469 %474 = fmul float %432, %444 %475 = fadd float %474, %470 %476 = fmul float %433, %446 %477 = fadd float %476, %471 %478 = fmul float %356, %409 %479 = fadd float %478, %473 %480 = fmul float %357, %410 %481 = fadd float %480, %475 %482 = fmul float %358, %411 %483 = fadd float %482, %477 %484 = fmul float %128, %42 %485 = fadd float %484, %43 %486 = call float @llvm.AMDIL.clamp.(float %485, float 0.000000e+00, float 1.000000e+00) %487 = call float @llvm.AMDGPU.lrp(float %486, float %479, float %39) %488 = call float @llvm.AMDGPU.lrp(float %486, float %481, float %40) %489 = call float @llvm.AMDGPU.lrp(float %486, float %483, float %41) %490 = call i32 @llvm.SI.packf16(float %487, float %488) %491 = bitcast i32 %490 to float %492 = call i32 @llvm.SI.packf16(float %489, float 1.000000e+00) %493 = bitcast i32 %492 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %491, float %493, float %491, float %493) ret void IF92: ; preds = %IF89 %494 = fmul float %261, %261 %495 = fmul float %262, %262 %496 = fadd float %495, %494 %497 = fmul float %263, %263 %498 = fadd float %496, %497 %499 = call float @llvm.AMDGPU.rsq.clamped.f32(float %498) %500 = fmul float %261, %499 %501 = fmul float %262, %499 %502 = fmul float %263, %499 %503 = fsub float %57, %132 %504 = fsub float %58, %133 %505 = fsub float %59, %134 %506 = fdiv float 1.000000e+00, %500 %507 = fdiv float 1.000000e+00, %501 %508 = fdiv float 1.000000e+00, %502 %509 = fmul float %503, %506 %510 = fmul float %504, %507 %511 = fmul float %505, %508 %512 = fsub float %60, %132 %513 = fsub float %61, %133 %514 = fsub float %62, %134 %515 = fdiv float 1.000000e+00, %500 %516 = fdiv float 1.000000e+00, %501 %517 = fdiv float 1.000000e+00, %502 %518 = fmul float %512, %515 %519 = fmul float %513, %516 %520 = fmul float %514, %517 %521 = fcmp ogt float %500, 0.000000e+00 %522 = fcmp ogt float %501, 0.000000e+00 %523 = fcmp ogt float %502, 0.000000e+00 %.104 = select i1 %521, float %509, float %518 %temp68.1 = select i1 %522, float %510, float %519 %.105 = select i1 %523, float %511, float %520 %524 = fadd float %57, %60 %525 = fadd float %58, %61 %526 = fadd float %59, %62 %527 = fmul float %524, 5.000000e-01 %528 = fmul float %525, 5.000000e-01 %529 = fmul float %526, 5.000000e-01 %530 = call float @llvm.minnum.f32(float %.104, float %temp68.1) %531 = call float @llvm.minnum.f32(float %530, float %.105) %532 = fsub float %527, %63 %533 = fsub float %528, %64 %534 = fsub float %529, %65 %535 = fadd float %532, %132 %536 = fadd float %533, %133 %537 = fadd float %534, %134 %538 = fmul float %500, %531 %539 = fadd float %538, %535 %540 = fmul float %501, %531 %541 = fadd float %540, %536 %542 = fmul float %502, %531 %543 = fadd float %542, %537 %544 = fsub float %539, %527 %545 = fsub float %541, %528 %546 = fsub float %543, %529 br label %ENDIF91 ENDIF91: ; preds = %IF89, %IF92 %temp48.0 = phi float [ %544, %IF92 ], [ %261, %IF89 ] %temp49.0 = phi float [ %545, %IF92 ], [ %262, %IF89 ] %temp50.0 = phi float [ %546, %IF92 ], [ %263, %IF89 ] %547 = fsub float 1.000000e+00, %83 %548 = call float @llvm.pow.f32(float %547, float 7.500000e-01) %549 = fmul float %548, 7.000000e+00 %550 = insertelement <4 x float> undef, float %temp48.0, i32 0 %551 = insertelement <4 x float> %550, float %temp49.0, i32 1 %552 = insertelement <4 x float> %551, float %temp50.0, i32 2 %553 = insertelement <4 x float> %552, float %549, i32 3 %554 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %553) %555 = extractelement <4 x float> %554, i32 0 %556 = extractelement <4 x float> %554, i32 1 %557 = extractelement <4 x float> %554, i32 2 %558 = extractelement <4 x float> %554, i32 3 %559 = call float @llvm.fabs.f32(float %557) %560 = fdiv float 1.000000e+00, %559 %561 = fmul float %555, %560 %562 = fadd float %561, 1.500000e+00 %563 = fmul float %556, %560 %564 = fadd float %563, 1.500000e+00 %565 = bitcast float %564 to i32 %566 = bitcast float %562 to i32 %567 = bitcast float %558 to i32 %568 = bitcast float %549 to i32 %569 = insertelement <4 x i32> undef, i32 %565, i32 0 %570 = insertelement <4 x i32> %569, i32 %566, i32 1 %571 = insertelement <4 x i32> %570, i32 %567, i32 2 %572 = insertelement <4 x i32> %571, i32 %568, i32 3 %573 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %572, <32 x i8> %91, <16 x i8> %94, i32 4) %574 = extractelement <4 x float> %573, i32 0 %575 = extractelement <4 x float> %573, i32 1 %576 = extractelement <4 x float> %573, i32 2 %577 = extractelement <4 x float> %573, i32 3 %578 = call float @llvm.pow.f32(float %577, float %68) %579 = fmul float %67, %578 %580 = fmul float %579, %574 %581 = fmul float %579, %575 %582 = fmul float %579, %576 %583 = call float @llvm.AMDGPU.lrp(float %50, float %351, float %580) %584 = call float @llvm.AMDGPU.lrp(float %50, float %352, float %581) %585 = call float @llvm.AMDGPU.lrp(float %50, float %353, float %582) br label %ENDIF88 } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[12:15], s[4:5], 0x8 ; C0860508 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v17, v0, 0, 0, [m0] ; C8440000 v_interp_p2_f32 v17, [v17], v1, 0, 0, [m0] ; C8450001 v_interp_p1_f32 v18, v0, 1, 0, [m0] ; C8480100 v_interp_p2_f32 v18, [v18], v1, 1, 0, [m0] ; C8490101 v_interp_p1_f32 v7, v0, 0, 1, [m0] ; C81C0400 v_interp_p2_f32 v7, [v7], v1, 0, 1, [m0] ; C81D0401 v_interp_p1_f32 v10, v0, 1, 1, [m0] ; C8280500 v_interp_p2_f32 v10, [v10], v1, 1, 1, [m0] ; C8290501 v_interp_p1_f32 v11, v0, 2, 1, [m0] ; C82C0600 v_interp_p2_f32 v11, [v11], v1, 2, 1, [m0] ; C82D0601 v_interp_p1_f32 v2, v0, 0, 2, [m0] ; C8080800 v_interp_p2_f32 v2, [v2], v1, 0, 2, [m0] ; C8090801 v_interp_p1_f32 v3, v0, 1, 2, [m0] ; C80C0900 v_interp_p2_f32 v3, [v3], v1, 1, 2, [m0] ; C80D0901 v_interp_p1_f32 v4, v0, 2, 2, [m0] ; C8100A00 v_interp_p2_f32 v4, [v4], v1, 2, 2, [m0] ; C8110A01 v_interp_p1_f32 v6, v0, 0, 3, [m0] ; C8180C00 v_interp_p2_f32 v6, [v6], v1, 0, 3, [m0] ; C8190C01 v_interp_p1_f32 v9, v0, 1, 3, [m0] ; C8240D00 v_interp_p2_f32 v9, [v9], v1, 1, 3, [m0] ; C8250D01 v_interp_p1_f32 v19, v0, 3, 3, [m0] ; C84C0F00 v_interp_p2_f32 v19, [v19], v1, 3, 3, [m0] ; C84D0F01 v_interp_p1_f32 v5, v0, 0, 4, [m0] ; C8141000 v_interp_p2_f32 v5, [v5], v1, 0, 4, [m0] ; C8151001 v_interp_p1_f32 v20, v0, 1, 4, [m0] ; C8501100 v_interp_p2_f32 v20, [v20], v1, 1, 4, [m0] ; C8511101 v_interp_p1_f32 v23, v0, 2, 4, [m0] ; C85C1200 v_interp_p2_f32 v23, [v23], v1, 2, 4, [m0] ; C85D1201 v_interp_p1_f32 v24, v0, 3, 4, [m0] ; C8601300 v_interp_p2_f32 v24, [v24], v1, 3, 4, [m0] ; C8611301 v_interp_p1_f32 v21, v0, 0, 5, [m0] ; C8541400 v_interp_p2_f32 v21, [v21], v1, 0, 5, [m0] ; C8551401 v_interp_p1_f32 v16, v0, 1, 5, [m0] ; C8401500 v_interp_p2_f32 v16, [v16], v1, 1, 5, [m0] ; C8411501 v_interp_p1_f32 v22, v0, 2, 5, [m0] ; C8581600 v_mul_f32_e32 v0, v7, v7 ; 10000F07 v_mac_f32_e32 v0, v10, v10 ; 3E00150A s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x4c ; C204014C s_buffer_load_dword s9, s[0:3], 0x4d ; C204814D s_load_dwordx4 s[20:23], s[4:5], 0xc ; C08A050C s_load_dwordx8 s[36:43], s[6:7], 0x18 ; C0D20718 s_buffer_load_dword s18, s[0:3], 0x4e ; C209014E s_load_dwordx8 s[28:35], s[6:7], 0x10 ; C0CE0710 v_mac_f32_e32 v0, v11, v11 ; 3E00170B v_rsq_clamp_f32_e32 v0, v0 ; 7E005900 v_mul_f32_e32 v8, v20, v20 ; 10102914 v_mac_f32_e32 v8, v23, v23 ; 3E102F17 v_mac_f32_e32 v8, v24, v24 ; 3E103118 v_rsq_clamp_f32_e32 v25, v8 ; 7E325908 v_interp_p2_f32 v22, [v22], v1, 2, 5, [m0] ; C8591601 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v1, 1, 0, 0, 0, 0, 0, 0, 0, v[17:18], s[36:43], s[20:23] ; F0800100 00A90111 s_waitcnt vmcnt(0) ; BF8C0770 v_cmp_nlt_f32_e32 vcc, 0, v1 ; 7C1C0280 s_and_saveexec_b64 s[20:21], vcc ; BE94246A s_xor_b64 s[20:21], exec, s[20:21] ; 8994147E image_sample v[26:28], 7, 0, 0, 0, 0, 0, 0, 0, v[17:18], s[28:35], s[12:15] ; F0800700 00671A11 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v13, s8, v26 ; 101A3408 v_mul_f32_e32 v14, s9, v27 ; 101C3609 v_mul_f32_e32 v15, s18, v28 ; 101E3812 s_or_saveexec_b64 s[20:21], s[20:21] ; BE942514 s_buffer_load_dword s10, s[0:3], 0x2b ; C205012B s_buffer_load_dword s11, s[0:3], 0x40 ; C2058140 s_buffer_load_dword s16, s[0:3], 0x41 ; C2080141 s_buffer_load_dword s17, s[0:3], 0x42 ; C2088142 s_buffer_load_dword s26, s[0:3], 0x5c ; C20D015C s_waitcnt lgkmcnt(0) ; BF8C007F s_xor_b64 exec, exec, s[20:21] ; 89FE147E s_cbranch_execz BB0_4 ; BF880000 v_mov_b32_e32 v1, s8 ; 7E020208 v_mov_b32_e32 v8, s9 ; 7E100209 v_mov_b32_e32 v12, s18 ; 7E180212 s_buffer_load_dword s19, s[0:3], 0x50 ; C2098150 s_buffer_load_dword s22, s[0:3], 0x51 ; C20B0151 s_buffer_load_dword s23, s[0:3], 0x52 ; C20B8152 image_sample v[26:28], 7, 0, 0, 0, 0, 0, 0, 0, v[17:18], s[28:35], s[12:15] ; F0800700 00671A11 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s19, v1 ; 10020213 v_mul_f32_e32 v8, s22, v8 ; 10101016 v_mul_f32_e32 v12, s23, v12 ; 10181817 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v13, v26, v1 ; 101A031A v_mul_f32_e32 v14, v27, v8 ; 101C111B v_mul_f32_e32 v15, v28, v12 ; 101E191C s_or_b64 exec, exec, s[20:21] ; 88FE147E s_buffer_load_dword s9, s[0:3], 0x27 ; C2048127 v_mul_f32_e32 v8, v0, v7 ; 10100F00 v_mul_f32_e32 v7, v0, v10 ; 100E1500 v_mul_f32_e32 v0, v0, v11 ; 10001700 s_buffer_load_dword s31, s[0:3], 0x2c ; C20F812C s_buffer_load_dword s32, s[0:3], 0x2d ; C210012D s_buffer_load_dword s8, s[0:3], 0x60 ; C2040160 v_mul_f32_e32 v12, v25, v20 ; 10182919 v_mul_f32_e32 v11, v25, v23 ; 10162F19 v_mul_f32_e32 v10, v25, v24 ; 10143119 v_sub_f32_e64 v1, 1.0, s26 ; D2080001 000034F2 v_mov_b32_e32 v26, 0x6f800000 ; 7E3402FF 6F800000 v_cmp_gt_f32_e64 vcc, |v19|, v26 ; D008016A 00023513 v_mov_b32_e32 v26, 0x2f800000 ; 7E3402FF 2F800000 v_cndmask_b32_e32 v26, 1.0, v26 ; 003434F2 v_mul_f32_e32 v29, v12, v8 ; 103A110C v_mac_f32_e32 v29, v11, v7 ; 3E3A0F0B v_mac_f32_e32 v29, v10, v0 ; 3E3A010A v_mul_f32_e32 v27, v8, v29 ; 10363B08 v_mac_f32_e32 v27, v8, v29 ; 3E363B08 v_mul_f32_e32 v28, v7, v29 ; 10383B07 v_mac_f32_e32 v28, v7, v29 ; 3E383B07 v_mad_f32 v27, v20, v25, -v27 ; D282001B 846E3314 v_mad_f32 v28, v23, v25, -v28 ; D282001C 84723317 v_mul_f32_e32 v19, v26, v19 ; 1026271A v_rcp_f32_e32 v19, v19 ; 7E265513 v_mul_f32_e32 v20, v0, v29 ; 10283B00 v_mac_f32_e32 v20, v0, v29 ; 3E283B00 v_mad_f32 v29, v24, v25, -v20 ; D282001D 84523318 v_mul_f32_e32 v6, v19, v6 ; 100C0D13 v_mul_f32_e32 v9, v19, v9 ; 10121313 v_mul_f32_e32 v19, v6, v26 ; 10263506 s_load_dwordx4 s[12:15], s[4:5], 0x10 ; C0860510 s_load_dwordx4 s[20:23], s[4:5], 0x14 ; C08A0514 s_load_dwordx8 s[36:43], s[6:7], 0x20 ; C0D20720 s_load_dwordx8 s[44:51], s[6:7], 0x28 ; C0D60728 v_mul_f32_e32 v20, v9, v26 ; 10283509 v_mul_f32_e32 v9, s11, v1 ; 1012020B v_mul_f32_e32 v6, s16, v1 ; 100C0210 v_mul_f32_e32 v1, s17, v1 ; 10020211 v_mac_f32_e32 v9, s26, v13 ; 3E121A1A v_mov_b32_e32 v30, v27 ; 7E3C031B v_mac_f32_e32 v6, s26, v14 ; 3E0C1C1A v_mov_b32_e32 v31, v28 ; 7E3E031C v_mac_f32_e32 v1, s26, v15 ; 3E021E1A v_mov_b32_e32 v32, v29 ; 7E40031D v_cmp_lt_f32_e64 s[10:11], 0, s10 ; D002000A 00001480 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[23:26], 15, 0, 0, 0, 0, 0, 0, 0, v[17:18], s[36:43], s[12:15] ; F0800F00 00691711 image_sample v[17:20], 15, 0, 0, 0, 0, 0, 0, 0, v[19:20], s[44:51], s[20:23] ; F0800F00 00AB1113 s_waitcnt vmcnt(0) ; BF8C0770 s_and_saveexec_b64 s[14:15], s[10:11] ; BE8E240A s_xor_b64 s[14:15], exec, s[14:15] ; 898E0E7E s_cbranch_execz BB0_6 ; BF880000 s_buffer_load_dword s10, s[0:3], 0x26 ; C2050126 s_buffer_load_dword s11, s[0:3], 0x28 ; C2058128 s_buffer_load_dword s12, s[0:3], 0x29 ; C2060129 s_buffer_load_dword s13, s[0:3], 0x2a ; C206812A s_buffer_load_dword s16, s[0:3], 0x20 ; C2080120 s_buffer_load_dword s17, s[0:3], 0x21 ; C2088121 s_buffer_load_dword s18, s[0:3], 0x22 ; C2090122 s_buffer_load_dword s19, s[0:3], 0x24 ; C2098124 s_buffer_load_dword s20, s[0:3], 0x25 ; C20A0125 v_mul_f32_e32 v18, v27, v27 ; 1024371B v_mac_f32_e32 v18, v28, v28 ; 3E24391C v_mac_f32_e32 v18, v29, v29 ; 3E243B1D v_rsq_clamp_f32_e32 v18, v18 ; 7E245912 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v19, s10, v22 ; 08262C0A v_mov_b32_e32 v20, s10 ; 7E28020A v_sub_f32_e32 v23, s16, v21 ; 082E2A10 v_sub_f32_e32 v25, s17, v16 ; 08322011 v_add_f32_e32 v20, s18, v20 ; 06282812 v_sub_f32_e32 v26, s18, v22 ; 08342C12 v_mad_f32 v30, 0.5, v20, -s13 ; D282001E 803628F0 v_add_f32_e32 v32, v22, v30 ; 06403D16 v_mul_f32_e32 v30, v18, v27 ; 103C3712 v_mul_f32_e32 v31, v18, v28 ; 103E3912 v_mul_f32_e32 v18, v18, v29 ; 10243B12 v_rcp_f32_e32 v33, v30 ; 7E42551E v_rcp_f32_e32 v34, v31 ; 7E44551F v_rcp_f32_e32 v35, v18 ; 7E465512 v_sub_f32_e32 v36, s19, v21 ; 08482A13 v_mov_b32_e32 v37, s19 ; 7E4A0213 v_add_f32_e32 v37, s16, v37 ; 064A4A10 v_mul_f32_e32 v23, v33, v23 ; 102E2F21 v_mul_f32_e32 v25, v34, v25 ; 10323322 v_mul_f32_e32 v26, v35, v26 ; 10343523 v_mul_f32_e32 v33, v33, v36 ; 10424921 v_sub_f32_e32 v36, s20, v16 ; 08482014 v_mov_b32_e32 v38, s20 ; 7E4C0214 v_mul_f32_e32 v34, v34, v36 ; 10444922 v_mul_f32_e32 v19, v35, v19 ; 10262723 v_cmp_lt_f32_e32 vcc, 0, v30 ; 7C023C80 v_cndmask_b32_e32 v23, v33, v23 ; 002E2F21 v_cmp_lt_f32_e32 vcc, 0, v31 ; 7C023E80 v_cndmask_b32_e32 v25, v34, v25 ; 00323322 v_cmp_lt_f32_e32 vcc, 0, v18 ; 7C022480 v_cndmask_b32_e32 v19, v19, v26 ; 00263513 v_add_f32_e32 v26, s17, v38 ; 06344C11 v_min3_f32 v19, v23, v25, v19 ; D2A20013 044E3317 v_mad_f32 v23, 0.5, v37, -s11 ; D2820017 802E4AF0 v_mad_f32 v25, 0.5, v26, -s12 ; D2820019 803234F0 v_add_f32_e32 v23, v21, v23 ; 062E2F15 v_add_f32_e32 v25, v16, v25 ; 06323310 v_mac_f32_e32 v23, v19, v30 ; 3E2E3D13 v_mac_f32_e32 v25, v19, v31 ; 3E323F13 v_mac_f32_e32 v32, v19, v18 ; 3E402513 v_mad_f32 v30, 0.5, -v37, v23 ; D282001E 445E4AF0 v_mad_f32 v31, 0.5, -v26, v25 ; D282001F 446634F0 v_mad_f32 v32, 0.5, -v20, v32 ; D2820020 448228F0 s_or_b64 exec, exec, s[14:15] ; 88FE0E7E s_buffer_load_dword s28, s[0:3], 0x17 ; C20E0117 s_buffer_load_dword s29, s[0:3], 0x43 ; C20E8143 s_buffer_load_dword s27, s[0:3], 0x44 ; C20D8144 s_buffer_load_dword s19, s[0:3], 0x45 ; C2098145 s_buffer_load_dword s16, s[0:3], 0x46 ; C2080146 s_buffer_load_dword s11, s[0:3], 0x0 ; C2058100 s_buffer_load_dword s12, s[0:3], 0x1 ; C2060101 s_buffer_load_dword s10, s[0:3], 0x2 ; C2050102 s_buffer_load_dword s13, s[0:3], 0x4 ; C2068104 s_buffer_load_dword s14, s[0:3], 0x5 ; C2070105 s_buffer_load_dword s15, s[0:3], 0x6 ; C2078106 s_buffer_load_dword s17, s[0:3], 0x7 ; C2088107 s_buffer_load_dword s18, s[0:3], 0x8 ; C2090108 s_buffer_load_dword s20, s[0:3], 0x9 ; C20A0109 s_buffer_load_dword s21, s[0:3], 0xa ; C20A810A s_buffer_load_dword s22, s[0:3], 0xb ; C20B010B s_buffer_load_dword s23, s[0:3], 0xc ; C20B810C s_buffer_load_dword s24, s[0:3], 0xd ; C20C010D s_buffer_load_dword s25, s[0:3], 0xe ; C20C810E v_sub_f32_e64 v18, 1.0, s8 ; D2080012 000010F2 v_log_f32_e32 v18, v18 ; 7E244F12 v_mul_legacy_f32_e32 v18, 0x3f400000, v18 ; 0E2424FF 3F400000 v_exp_f32_e32 v18, v18 ; 7E244B12 v_mul_f32_e32 v33, 0x40e00000, v18 ; 104224FF 40E00000 v_cubeid_f32 v37, v30, v31, v32 ; D2880025 04823F1E v_cubema_f32 v36, v30, v31, v32 ; D28E0024 04823F1E s_load_dwordx4 s[36:39], s[4:5], 0x0 ; C0920500 s_load_dwordx8 s[40:47], s[6:7], 0x0 ; C0D40700 v_cubesc_f32 v35, v30, v31, v32 ; D28A0023 04823F1E v_cubetc_f32 v34, v30, v31, v32 ; D28C0022 04823F1E v_rcp_f32_e64 v18, |v36| ; D3540112 00000124 v_mov_b32_e32 v30, 0x3fc00000 ; 7E3C02FF 3FC00000 v_mad_f32 v31, v18, v34, v30 ; D282001F 047A4512 v_mac_f32_e32 v30, v18, v35 ; 3E3C4712 v_mov_b32_e32 v32, v37 ; 7E400325 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[30:33], 15, 0, 0, 0, 0, 0, 0, 0, v[30:33], s[40:47], s[36:39] ; F0900F00 012A1E1E s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v18, v33 ; 7E244F21 s_buffer_load_dword s30, s[0:3], 0xf ; C20F010F s_buffer_load_dword s33, s[0:3], 0x68 ; C2108168 v_mul_legacy_f32_e32 v18, s32, v18 ; 0E242420 v_exp_f32_e32 v18, v18 ; 7E244B12 v_mul_f32_e32 v18, s31, v18 ; 1024241F v_mul_f32_e32 v20, v30, v18 ; 1028251E v_mul_f32_e32 v19, v31, v18 ; 1026251F v_mul_f32_e32 v18, v32, v18 ; 10242520 v_mov_b32_e32 v23, s26 ; 7E2E021A v_mov_b32_e32 v25, 0x3f7fff58 ; 7E3202FF 3F7FFF58 v_cmp_lt_f32_e32 vcc, s9, v25 ; 7C023209 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[34:35], vcc ; BEA2246A s_xor_b64 s[34:35], exec, s[34:35] ; 89A2227E s_cbranch_execz BB0_10 ; BF880000 s_buffer_load_dword s32, s[0:3], 0x3b ; C210013B s_buffer_load_dword s26, s[0:3], 0x3c ; C20D013C s_buffer_load_dword s31, s[0:3], 0x3d ; C20F813D s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_lt_f32_e64 s[36:37], 0, s32 ; D0020024 00004080 s_and_saveexec_b64 s[36:37], s[36:37] ; BEA42424 s_xor_b64 s[36:37], exec, s[36:37] ; 89A4247E s_cbranch_execz BB0_11 ; BF880000 s_buffer_load_dword s32, s[0:3], 0x36 ; C2100136 s_buffer_load_dword s38, s[0:3], 0x38 ; C2130138 s_buffer_load_dword s39, s[0:3], 0x39 ; C2138139 s_buffer_load_dword s40, s[0:3], 0x3a ; C214013A s_buffer_load_dword s41, s[0:3], 0x30 ; C2148130 s_buffer_load_dword s42, s[0:3], 0x31 ; C2150131 s_buffer_load_dword s43, s[0:3], 0x32 ; C2158132 s_buffer_load_dword s44, s[0:3], 0x34 ; C2160134 s_buffer_load_dword s45, s[0:3], 0x35 ; C2168135 v_mul_f32_e32 v25, v27, v27 ; 1032371B v_mac_f32_e32 v25, v28, v28 ; 3E32391C v_mac_f32_e32 v25, v29, v29 ; 3E323B1D v_rsq_clamp_f32_e32 v25, v25 ; 7E325919 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v26, s32, v22 ; 08342C20 v_mov_b32_e32 v30, s32 ; 7E3C0220 v_sub_f32_e32 v31, s41, v21 ; 083E2A29 v_sub_f32_e32 v32, s42, v16 ; 0840202A v_add_f32_e32 v30, s43, v30 ; 063C3C2B v_sub_f32_e32 v33, s43, v22 ; 08422C2B v_mad_f32 v34, 0.5, v30, -s40 ; D2820022 80A23CF0 v_add_f32_e32 v22, v22, v34 ; 062C4516 v_mul_f32_e32 v27, v25, v27 ; 10363719 v_mul_f32_e32 v28, v25, v28 ; 10383919 v_mul_f32_e32 v25, v25, v29 ; 10323B19 v_rcp_f32_e32 v29, v27 ; 7E3A551B v_rcp_f32_e32 v34, v28 ; 7E44551C v_rcp_f32_e32 v35, v25 ; 7E465519 v_sub_f32_e32 v36, s44, v21 ; 08482A2C v_mov_b32_e32 v37, s44 ; 7E4A022C v_add_f32_e32 v37, s41, v37 ; 064A4A29 v_mul_f32_e32 v31, v29, v31 ; 103E3F1D v_mul_f32_e32 v29, v29, v36 ; 103A491D v_mul_f32_e32 v32, v34, v32 ; 10404122 v_mul_f32_e32 v33, v35, v33 ; 10424323 v_mul_f32_e32 v26, v35, v26 ; 10343523 v_mad_f32 v35, 0.5, v37, -s38 ; D2820023 809A4AF0 v_add_f32_e32 v21, v21, v35 ; 062A4715 v_sub_f32_e32 v35, s45, v16 ; 0846202D v_mov_b32_e32 v36, s45 ; 7E48022D v_mul_f32_e32 v34, v34, v35 ; 10444722 v_add_f32_e32 v35, s42, v36 ; 0646482A v_cmp_lt_f32_e32 vcc, 0, v27 ; 7C023680 v_cndmask_b32_e32 v29, v29, v31 ; 003A3F1D v_cmp_lt_f32_e32 vcc, 0, v28 ; 7C023880 v_cndmask_b32_e32 v31, v34, v32 ; 003E4122 v_cmp_lt_f32_e32 vcc, 0, v25 ; 7C023280 v_cndmask_b32_e32 v26, v26, v33 ; 0034431A v_min3_f32 v26, v29, v31, v26 ; D2A2001A 046A3F1D v_mad_f32 v29, 0.5, v35, -s39 ; D282001D 809E46F0 v_add_f32_e32 v16, v16, v29 ; 06203B10 v_mac_f32_e32 v21, v26, v27 ; 3E2A371A v_mac_f32_e32 v16, v26, v28 ; 3E20391A v_mac_f32_e32 v22, v26, v25 ; 3E2C331A v_mad_f32 v27, 0.5, -v37, v21 ; D282001B 44564AF0 v_mad_f32 v28, 0.5, -v35, v16 ; D282001C 444246F0 v_mad_f32 v29, 0.5, -v30, v22 ; D282001D 445A3CF0 s_or_b64 exec, exec, s[36:37] ; 88FE247E v_sub_f32_e64 v16, 1.0, s8 ; D2080010 000010F2 v_log_f32_e32 v16, v16 ; 7E204F10 s_load_dwordx4 s[36:39], s[4:5], 0x4 ; C0920504 v_mul_legacy_f32_e32 v16, 0x3f400000, v16 ; 0E2020FF 3F400000 v_exp_f32_e32 v16, v16 ; 7E204B10 v_mul_f32_e32 v30, 0x40e00000, v16 ; 103C20FF 40E00000 v_cubeid_f32 v34, v27, v28, v29 ; D2880022 0476391B v_cubema_f32 v33, v27, v28, v29 ; D28E0021 0476391B s_load_dwordx8 s[40:47], s[6:7], 0x8 ; C0D40708 v_cubesc_f32 v32, v27, v28, v29 ; D28A0020 0476391B v_cubetc_f32 v31, v27, v28, v29 ; D28C001F 0476391B v_rcp_f32_e64 v16, |v33| ; D3540110 00000121 v_mov_b32_e32 v27, 0x3fc00000 ; 7E3602FF 3FC00000 v_mad_f32 v28, v16, v31, v27 ; D282001C 046E3F10 v_mac_f32_e32 v27, v16, v32 ; 3E364110 v_mov_b32_e32 v29, v34 ; 7E3A0322 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[25:28], 15, 0, 0, 0, 0, 0, 0, 0, v[27:30], s[40:47], s[36:39] ; F0900F00 012A191B s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v16, v28 ; 7E204F1C v_sub_f32_e64 v21, 1.0, s9 ; D2080015 000012F2 v_mul_legacy_f32_e32 v16, s31, v16 ; 0E20201F v_exp_f32_e32 v16, v16 ; 7E204B10 v_mul_f32_e32 v16, s26, v16 ; 1020201A v_mul_f32_e32 v22, v25, v16 ; 102C2119 v_mul_f32_e32 v25, v26, v16 ; 1032211A v_mul_f32_e32 v16, v27, v16 ; 1020211B v_mul_f32_e32 v22, v22, v21 ; 102C2B16 v_mul_f32_e32 v25, v25, v21 ; 10322B19 v_mul_f32_e32 v16, v16, v21 ; 10202B10 v_mac_f32_e32 v22, s9, v20 ; 3E2C2809 v_mac_f32_e32 v25, s9, v19 ; 3E322609 v_mac_f32_e32 v16, s9, v18 ; 3E202409 v_mov_b32_e32 v18, v16 ; 7E240310 v_mov_b32_e32 v19, v25 ; 7E260319 v_mov_b32_e32 v20, v22 ; 7E280316 s_or_b64 exec, exec, s[34:35] ; 88FE227E v_mad_f32 v25, -v23, s29, s29 ; D2820019 20743B17 v_mov_b32_e32 v16, s28 ; 7E20021C v_mul_f32_e32 v21, v25, v13 ; 102A1B19 v_mul_f32_e32 v14, v25, v14 ; 101C1D19 v_mul_f32_e32 v13, v25, v15 ; 101A1F19 v_mul_f32_e32 v15, s27, v17 ; 101E221B v_sub_f32_e64 v26, 1.0, s33 ; D208001A 000042F2 v_mac_f32_e32 v26, s33, v24 ; 3E343021 v_mul_f32_e32 v22, s19, v17 ; 102C2213 v_mul_f32_e32 v17, s16, v17 ; 10222210 s_buffer_load_dword s6, s[0:3], 0x10 ; C2030110 s_buffer_load_dword s5, s[0:3], 0x11 ; C2028111 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_buffer_load_dword s16, s[0:3], 0x16 ; C2080116 s_buffer_load_dword s7, s[0:3], 0x48 ; C2038148 s_buffer_load_dword s9, s[0:3], 0x49 ; C2048149 s_buffer_load_dword s0, s[0:3], 0x4b ; C200014B v_mul_f32_e32 v23, s14, v7 ; 102E0E0E v_mac_f32_e32 v23, s13, v8 ; 3E2E100D v_mac_f32_e32 v23, s15, v0 ; 3E2E000F v_add_f32_e32 v23, s17, v23 ; 062E2E11 v_mul_f32_e32 v24, s20, v7 ; 10300E14 v_mac_f32_e32 v24, s18, v8 ; 3E301012 v_mac_f32_e32 v24, s21, v0 ; 3E300015 v_add_f32_e32 v24, s22, v24 ; 06303016 v_mul_f32_e32 v27, s24, v7 ; 10360E18 v_mac_f32_e32 v27, s23, v8 ; 3E361017 v_mac_f32_e32 v27, s25, v0 ; 3E360019 v_add_f32_e32 v27, s30, v27 ; 0636361E v_add_f32_e32 v2, v23, v2 ; 06040517 v_add_f32_e32 v3, v24, v3 ; 06060718 v_add_f32_e32 v24, v27, v4 ; 0630091B v_mul_f32_e32 v4, s11, v8 ; 1008100B v_mac_f32_e32 v4, s12, v7 ; 3E080E0C v_mac_f32_e32 v4, s10, v0 ; 3E08000A v_max_f32_e32 v23, 0, v4 ; 202E0880 v_mul_f32_e32 v4, v26, v2 ; 1008051A v_mul_f32_e32 v2, v26, v3 ; 1004071A v_mul_f32_e32 v3, v26, v24 ; 1006311A s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v16, s16, v5 ; 3E200A10 v_mul_f32_e32 v5, v26, v20 ; 100A291A v_mul_f32_e32 v19, v26, v19 ; 1026271A v_mul_f32_e32 v18, v26, v18 ; 1024251A v_sub_f32_e32 v20, 1.0, v25 ; 082832F2 v_add_f32_e32 v20, s8, v20 ; 06282808 v_add_f32_e64 v20, 0, v20 clamp ; D2060814 00022880 v_sub_f32_e32 v24, s11, v12 ; 0830180B v_sub_f32_e32 v25, s12, v11 ; 0832160C v_mul_f32_e32 v26, v24, v24 ; 10343118 v_mac_f32_e32 v26, v25, v25 ; 3E343319 v_sub_f32_e32 v27, s10, v10 ; 0836140A v_mac_f32_e32 v26, v27, v27 ; 3E34371B v_rsq_clamp_f32_e32 v26, v26 ; 7E34591A v_mul_f32_e32 v24, v26, v24 ; 1030311A v_mul_f32_e32 v25, v26, v25 ; 1032331A v_mul_f32_e32 v26, v26, v27 ; 1034371A v_mul_f32_e32 v12, v12, v8 ; 1018110C v_mad_f32 v11, -v11, v7, -v12 ; D282000B A4320F0B v_mad_f32 v10, -v10, v0, v11 ; D282000A 242E010A v_mul_f32_e32 v8, v24, v8 ; 10101118 v_mac_f32_e32 v8, v25, v7 ; 3E100F19 v_mul_f32_e32 v7, s11, v24 ; 100E300B v_mac_f32_e32 v7, s12, v25 ; 3E0E320C v_mac_f32_e32 v8, v26, v0 ; 3E10011A v_mac_f32_e32 v7, s10, v26 ; 3E0E340A v_max_f32_e32 v0, 0, v7 ; 20000E80 v_sub_f32_e32 v7, 1.0, v0 ; 080E00F2 v_mul_f32_e32 v11, v7, v7 ; 10160F07 v_mul_f32_e32 v7, v7, v11 ; 100E1707 v_mul_f32_e32 v7, v7, v11 ; 100E1707 v_max_f32_e32 v10, 0, v10 ; 20141480 v_sub_f32_e32 v11, 1.0, v10 ; 081614F2 v_mul_f32_e32 v12, v11, v11 ; 1018170B v_mul_f32_e32 v24, v11, v12 ; 1030190B v_mad_f32 v25, -v12, v24, 1.0 ; D2820019 23CA310C v_mul_f32_e32 v26, v9, v25 ; 10343309 v_sub_f32_e32 v27, 1.0, v9 ; 083612F2 v_mac_f32_e32 v9, v7, v27 ; 3E123707 v_mul_f32_e32 v27, v6, v25 ; 10363306 v_sub_f32_e32 v28, 1.0, v6 ; 08380CF2 v_mac_f32_e32 v6, v7, v28 ; 3E0C3907 v_mul_f32_e32 v25, v1, v25 ; 10323301 v_sub_f32_e32 v28, 1.0, v1 ; 083802F2 v_mac_f32_e32 v1, v7, v28 ; 3E023907 v_sub_f32_e64 v7, 1.0, s8 ; D2080007 000010F2 v_sub_f32_e32 v28, 1.0, v7 ; 08380EF2 v_mov_b32_e32 v29, 0x3cf5c28f ; 7E3A02FF 3CF5C28F v_madmk_f32_e32 v28, v28, v29, 0x3f77ced9 ; 40383B1C 3F77CED9 v_add_f32_e32 v29, v0, v0 ; 063A0100 v_mul_f32_e32 v0, v7, v0 ; 10000107 v_mad_f32 v0, v29, v0, 0.5 ; D2820000 03C2011D v_mul_f32_e32 v12, v24, v12 ; 10181918 v_mac_f32_e32 v26, v20, v12 ; 3E341914 v_mac_f32_e32 v27, v20, v12 ; 3E361914 v_mac_f32_e32 v25, v20, v12 ; 3E321914 v_mul_f32_e32 v7, v7, v7 ; 100E0F07 v_log_f32_e32 v20, v28 ; 7E284F1C v_mul_f32_e32 v7, s0, v7 ; 100E0E00 v_mul_f32_e32 v11, v7, v11 ; 10161707 v_mac_f32_e32 v11, 1.0, v10 ; 3E1614F2 v_rcp_f32_e32 v10, v20 ; 7E145514 v_sub_f32_e32 v20, 1.0, v23 ; 08282EF2 v_mul_f32_e32 v7, v7, v20 ; 100E2907 v_mac_f32_e32 v7, 1.0, v23 ; 3E0E2EF2 v_max_f32_e32 v8, 0, v8 ; 20101080 v_log_f32_e32 v8, v8 ; 7E104F08 v_madak_f32_e32 v7, v7, v11, 0x38d1b717 ; 420E1707 38D1B717 v_mul_f32_e32 v10, 0x41200000, v10 ; 101414FF 41200000 v_mul_f32_e32 v11, v10, v10 ; 1016150A v_mul_legacy_f32_e32 v8, v11, v8 ; 0E10110B v_rcp_f32_e32 v7, v7 ; 7E0E5507 v_mad_f32 v10, v10, v10, 1.0 ; D282000A 03CA150A v_mul_f32_e32 v10, s9, v10 ; 10141409 v_exp_f32_e32 v8, v8 ; 7E104B08 v_mul_f32_e32 v8, v10, v8 ; 1010110A v_mul_f32_e32 v7, v8, v7 ; 100E0F08 v_mul_f32_e32 v8, v20, v20 ; 10102914 v_mul_f32_e32 v10, v20, v8 ; 10141114 v_mul_f32_e32 v8, v10, v8 ; 1010110A v_add_f32_e32 v0, -1.0, v0 ; 060000F3 v_mad_f32 v8, v0, v8, 1.0 ; D2820008 03CA1100 v_mad_f32 v0, v0, v12, 1.0 ; D2820000 03CA1900 v_mul_f32_e32 v0, v0, v8 ; 10001100 v_mul_f32_e32 v7, v23, v7 ; 100E0F17 v_mul_f32_e32 v7, s7, v7 ; 100E0E07 v_mul_f32_e32 v0, v23, v0 ; 10000117 v_mac_f32_e32 v4, v0, v15 ; 3E081F00 v_mul_f32_e32 v4, v4, v21 ; 10082B04 v_max_f32_e32 v7, 0, v7 ; 200E0E80 v_mul_f32_e32 v8, v15, v7 ; 10100F0F v_mac_f32_e32 v4, v9, v8 ; 3E081109 v_mac_f32_e32 v2, v0, v22 ; 3E042D00 v_mac_f32_e32 v3, v0, v17 ; 3E062300 v_mul_f32_e32 v0, v22, v7 ; 10000F16 v_mul_f32_e32 v7, v17, v7 ; 100E0F11 v_mul_f32_e32 v2, v2, v14 ; 10041D02 v_mul_f32_e32 v3, v3, v13 ; 10061B03 v_mac_f32_e32 v2, v6, v0 ; 3E040106 v_mac_f32_e32 v3, v1, v7 ; 3E060F01 v_mac_f32_e32 v4, v26, v5 ; 3E080B1A v_mac_f32_e32 v2, v27, v19 ; 3E04271B v_mac_f32_e32 v3, v25, v18 ; 3E062519 v_add_f32_e64 v0, 0, v16 clamp ; D2060800 00022080 v_sub_f32_e32 v1, 1.0, v0 ; 080200F2 v_mul_f32_e32 v5, s6, v1 ; 100A0206 v_mac_f32_e32 v5, v4, v0 ; 3E0A0104 v_mul_f32_e32 v4, s5, v1 ; 10080205 v_mac_f32_e32 v4, v2, v0 ; 3E080102 v_mul_f32_e32 v1, s4, v1 ; 10020204 v_mac_f32_e32 v1, v3, v0 ; 3E020103 v_cvt_pkrtz_f16_f32_e32 v0, v5, v4 ; 5E000905 v_cvt_pkrtz_f16_f32_e64 v1, v1, 1.0 ; D25E0001 0001E501 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 56 VGPRS: 40 Code Size: 2340 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL OUT[5], GENERIC[4] DCL OUT[6], GENERIC[5] DCL OUT[7], GENERIC[6] DCL OUT[8], GENERIC[7] DCL CONST[0..20] DCL TEMP[0..10], LOCAL IMM[0] FLT32 { 0.0000, 0.5000, 0.0000, 0.0000} 0: MUL TEMP[0], CONST[6], IN[0].xxxx 1: MAD TEMP[0], CONST[7], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[8], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0].xyz, CONST[9], IN[0].wwww, TEMP[0] 4: MUL TEMP[1], CONST[17], IN[0].xxxx 5: MAD TEMP[1], CONST[18], IN[0].yyyy, TEMP[1] 6: MAD TEMP[1], CONST[19], IN[0].zzzz, TEMP[1] 7: MAD TEMP[1], CONST[20], IN[0].wwww, TEMP[1] 8: MAD TEMP[2].xy, IN[2].xyyy, CONST[14].xyyy, CONST[14].zwww 9: FSEQ TEMP[3].x, CONST[16].xxxx, IMM[0].xxxx 10: UIF TEMP[3].xxxx :0 11: MOV TEMP[3].xy, IN[2].xyxx 12: ELSE :0 13: MOV TEMP[3].xy, IN[3].xyxx 14: ENDIF 15: MAD TEMP[3].xy, TEMP[3].xyyy, CONST[15].xyyy, CONST[15].zwww 16: MOV TEMP[2].zw, TEMP[3].yyxy 17: MOV TEMP[3].x, CONST[10].xxxx 18: MOV TEMP[3].y, CONST[11].xxxx 19: MOV TEMP[3].z, CONST[12].xxxx 20: MOV TEMP[4].x, CONST[10].yyyy 21: MOV TEMP[4].y, CONST[11].yyyy 22: MOV TEMP[4].z, CONST[12].yyyy 23: MOV TEMP[5].x, CONST[10].zzzz 24: MOV TEMP[5].y, CONST[11].zzzz 25: MOV TEMP[5].z, CONST[12].zzzz 26: MUL TEMP[3].xyz, TEMP[3].xyzz, IN[1].xxxx 27: MAD TEMP[3].xyz, TEMP[4].xyzz, IN[1].yyyy, TEMP[3].xyzz 28: MAD TEMP[3].xyz, TEMP[5].xyzz, IN[1].zzzz, TEMP[3].xyzz 29: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz 30: RSQ TEMP[4].x, TEMP[4].xxxx 31: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx 32: MUL TEMP[4].xyz, CONST[6].xyzz, IN[4].xxxx 33: MAD TEMP[4].xyz, CONST[7].xyzz, IN[4].yyyy, TEMP[4].xyzz 34: MAD TEMP[4].xyz, CONST[8].xyzz, IN[4].zzzz, TEMP[4].xyzz 35: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz 36: RSQ TEMP[5].x, TEMP[5].xxxx 37: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx 38: MUL TEMP[5].xyz, TEMP[3].zxyy, TEMP[4].yzxx 39: MAD TEMP[5].xyz, TEMP[3].yzxx, TEMP[4].zxyy, -TEMP[5].xyzz 40: MUL TEMP[5].xyz, TEMP[5].xyzz, IN[4].wwww 41: MOV TEMP[4].xyz, TEMP[4].xyzx 42: MOV TEMP[5].xyz, TEMP[5].xyzx 43: MOV TEMP[6].xyz, TEMP[3].xyzx 44: MUL TEMP[7].xyw, TEMP[1], IMM[0].yyyy 45: MOV TEMP[8].x, TEMP[7].xxxx 46: MUL TEMP[9].x, TEMP[7].yyyy, CONST[1].xxxx 47: MOV TEMP[8].y, TEMP[9].xxxx 48: ADD TEMP[7].xy, TEMP[8].xyyy, TEMP[7].wwww 49: MOV TEMP[7].zw, TEMP[1].wwzw 50: MUL TEMP[8], TEMP[3].xyzz, TEMP[3].yzzx 51: DP4 TEMP[9].x, CONST[2], TEMP[8] 52: DP4 TEMP[10].x, CONST[3], TEMP[8] 53: MOV TEMP[9].y, TEMP[10].xxxx 54: DP4 TEMP[8].x, CONST[4], TEMP[8] 55: MOV TEMP[9].z, TEMP[8].xxxx 56: MUL TEMP[8].x, TEMP[3].yyyy, TEMP[3].yyyy 57: MAD TEMP[3].x, TEMP[3].xxxx, TEMP[3].xxxx, -TEMP[8].xxxx 58: MAD TEMP[3].xyz, CONST[5].xyzz, TEMP[3].xxxx, TEMP[9].xyzz 59: ADD TEMP[8].xyz, TEMP[0].xyzz, -CONST[0].xyzz 60: MOV TEMP[8].yzw, TEMP[8].yxyz 61: MOV TEMP[8].x, TEMP[1].zzzz 62: MOV TEMP[0].xyz, TEMP[0].xyzx 63: MOV OUT[8], TEMP[0] 64: MOV OUT[1], TEMP[2] 65: MOV OUT[3], TEMP[5] 66: MOV OUT[2], TEMP[4] 67: MOV OUT[4], TEMP[6] 68: MOV OUT[5], TEMP[3] 69: MOV OUT[6], TEMP[7] 70: MOV OUT[0], TEMP[1] 71: MOV OUT[7], TEMP[8] 72: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248) %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 252) %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256) %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272) %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276) %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280) %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284) %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288) %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292) %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296) %72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300) %73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304) %74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308) %75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312) %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316) %77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320) %78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 324) %79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 328) %80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 332) %81 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %82 = load <16 x i8>, <16 x i8> addrspace(2)* %81, align 16, !tbaa !0 %83 = add i32 %5, %7 %84 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %82, i32 0, i32 %83) %85 = extractelement <4 x float> %84, i32 0 %86 = extractelement <4 x float> %84, i32 1 %87 = extractelement <4 x float> %84, i32 2 %88 = extractelement <4 x float> %84, i32 3 %89 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %90 = load <16 x i8>, <16 x i8> addrspace(2)* %89, align 16, !tbaa !0 %91 = add i32 %5, %7 %92 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %90, i32 0, i32 %91) %93 = extractelement <4 x float> %92, i32 0 %94 = extractelement <4 x float> %92, i32 1 %95 = extractelement <4 x float> %92, i32 2 %96 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %97 = load <16 x i8>, <16 x i8> addrspace(2)* %96, align 16, !tbaa !0 %98 = add i32 %5, %7 %99 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %97, i32 0, i32 %98) %100 = extractelement <4 x float> %99, i32 0 %101 = extractelement <4 x float> %99, i32 1 %102 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %103 = load <16 x i8>, <16 x i8> addrspace(2)* %102, align 16, !tbaa !0 %104 = add i32 %5, %7 %105 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %103, i32 0, i32 %104) %106 = extractelement <4 x float> %105, i32 0 %107 = extractelement <4 x float> %105, i32 1 %108 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4 %109 = load <16 x i8>, <16 x i8> addrspace(2)* %108, align 16, !tbaa !0 %110 = add i32 %5, %7 %111 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %109, i32 0, i32 %110) %112 = extractelement <4 x float> %111, i32 0 %113 = extractelement <4 x float> %111, i32 1 %114 = extractelement <4 x float> %111, i32 2 %115 = extractelement <4 x float> %111, i32 3 %116 = fmul float %32, %85 %117 = fmul float %33, %85 %118 = fmul float %34, %85 %119 = fmul float %35, %85 %120 = fmul float %36, %86 %121 = fadd float %120, %116 %122 = fmul float %37, %86 %123 = fadd float %122, %117 %124 = fmul float %38, %86 %125 = fadd float %124, %118 %126 = fmul float %39, %86 %127 = fadd float %126, %119 %128 = fmul float %40, %87 %129 = fadd float %128, %121 %130 = fmul float %41, %87 %131 = fadd float %130, %123 %132 = fmul float %42, %87 %133 = fadd float %132, %125 %134 = fmul float %43, %87 %135 = fadd float %134, %127 %136 = fmul float %44, %88 %137 = fadd float %136, %129 %138 = fmul float %45, %88 %139 = fadd float %138, %131 %140 = fmul float %46, %88 %141 = fadd float %140, %133 %142 = fmul float %65, %85 %143 = fmul float %66, %85 %144 = fmul float %67, %85 %145 = fmul float %68, %85 %146 = fmul float %69, %86 %147 = fadd float %146, %142 %148 = fmul float %70, %86 %149 = fadd float %148, %143 %150 = fmul float %71, %86 %151 = fadd float %150, %144 %152 = fmul float %72, %86 %153 = fadd float %152, %145 %154 = fmul float %73, %87 %155 = fadd float %154, %147 %156 = fmul float %74, %87 %157 = fadd float %156, %149 %158 = fmul float %75, %87 %159 = fadd float %158, %151 %160 = fmul float %76, %87 %161 = fadd float %160, %153 %162 = fmul float %77, %88 %163 = fadd float %162, %155 %164 = fmul float %78, %88 %165 = fadd float %164, %157 %166 = fmul float %79, %88 %167 = fadd float %166, %159 %168 = fmul float %80, %88 %169 = fadd float %168, %161 %170 = fmul float %100, %56 %171 = fadd float %170, %58 %172 = fmul float %101, %57 %173 = fadd float %172, %59 %174 = fcmp oeq float %64, 0.000000e+00 %. = select i1 %174, float %100, float %106 %.44 = select i1 %174, float %101, float %107 %175 = fmul float %., %60 %176 = fadd float %175, %62 %177 = fmul float %.44, %61 %178 = fadd float %177, %63 %179 = fmul float %47, %93 %180 = fmul float %50, %93 %181 = fmul float %53, %93 %182 = fmul float %48, %94 %183 = fadd float %182, %179 %184 = fmul float %51, %94 %185 = fadd float %184, %180 %186 = fmul float %54, %94 %187 = fadd float %186, %181 %188 = fmul float %49, %95 %189 = fadd float %188, %183 %190 = fmul float %52, %95 %191 = fadd float %190, %185 %192 = fmul float %55, %95 %193 = fadd float %192, %187 %194 = fmul float %189, %189 %195 = fmul float %191, %191 %196 = fadd float %195, %194 %197 = fmul float %193, %193 %198 = fadd float %196, %197 %199 = call float @llvm.AMDGPU.rsq.clamped.f32(float %198) %200 = fmul float %189, %199 %201 = fmul float %191, %199 %202 = fmul float %193, %199 %203 = fmul float %32, %112 %204 = fmul float %33, %112 %205 = fmul float %34, %112 %206 = fmul float %36, %113 %207 = fadd float %206, %203 %208 = fmul float %37, %113 %209 = fadd float %208, %204 %210 = fmul float %38, %113 %211 = fadd float %210, %205 %212 = fmul float %40, %114 %213 = fadd float %212, %207 %214 = fmul float %41, %114 %215 = fadd float %214, %209 %216 = fmul float %42, %114 %217 = fadd float %216, %211 %218 = fmul float %213, %213 %219 = fmul float %215, %215 %220 = fadd float %219, %218 %221 = fmul float %217, %217 %222 = fadd float %220, %221 %223 = call float @llvm.AMDGPU.rsq.clamped.f32(float %222) %224 = fmul float %213, %223 %225 = fmul float %215, %223 %226 = fmul float %217, %223 %227 = fmul float %202, %225 %228 = fmul float %200, %226 %229 = fmul float %201, %224 %230 = fmul float %201, %226 %231 = fsub float %230, %227 %232 = fmul float %202, %224 %233 = fsub float %232, %228 %234 = fmul float %200, %225 %235 = fsub float %234, %229 %236 = fmul float %231, %115 %237 = fmul float %233, %115 %238 = fmul float %235, %115 %239 = fmul float %163, 5.000000e-01 %240 = fmul float %165, 5.000000e-01 %241 = fmul float %169, 5.000000e-01 %242 = fmul float %240, %16 %243 = fadd float %239, %241 %244 = fadd float %242, %241 %245 = fmul float %200, %201 %246 = fmul float %201, %202 %247 = fmul float %202, %202 %248 = fmul float %202, %200 %249 = fmul float %17, %245 %250 = fmul float %18, %246 %251 = fadd float %249, %250 %252 = fmul float %19, %247 %253 = fadd float %251, %252 %254 = fmul float %20, %248 %255 = fadd float %253, %254 %256 = fmul float %21, %245 %257 = fmul float %22, %246 %258 = fadd float %256, %257 %259 = fmul float %23, %247 %260 = fadd float %258, %259 %261 = fmul float %24, %248 %262 = fadd float %260, %261 %263 = fmul float %25, %245 %264 = fmul float %26, %246 %265 = fadd float %263, %264 %266 = fmul float %27, %247 %267 = fadd float %265, %266 %268 = fmul float %28, %248 %269 = fadd float %267, %268 %270 = fmul float %201, %201 %271 = fmul float %200, %200 %272 = fsub float %271, %270 %273 = fmul float %29, %272 %274 = fadd float %273, %255 %275 = fmul float %30, %272 %276 = fadd float %275, %262 %277 = fmul float %31, %272 %278 = fadd float %277, %269 %279 = fsub float %137, %13 %280 = fsub float %139, %14 %281 = fsub float %141, %15 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %171, float %173, float %176, float %178) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %224, float %225, float %226, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %236, float %237, float %238, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %200, float %201, float %202, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %274, float %276, float %278, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %243, float %244, float %167, float %169) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %167, float %279, float %280, float %281) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 39, i32 0, float %137, float %139, float %141, float %135) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %163, float %165, float %167, float %169) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0 ; 7E020280 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[28:31], s[2:3], 0x0 ; C08E0300 s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 s_load_dwordx4 s[12:15], s[8:9], 0x8 ; C0860908 s_load_dwordx4 s[16:19], s[8:9], 0xc ; C088090C s_load_dwordx4 s[8:11], s[8:9], 0x10 ; C0840910 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s32, s[28:31], 0x23 ; C2101D23 buffer_load_format_xyzw v[2:5], v0, s[0:3], 0 idxen ; E00C2000 80000200 buffer_load_format_xyzw v[6:9], v0, s[4:7], 0 idxen ; E00C2000 80010600 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[9:12], v0, s[12:15], 0 idxen ; E00C2000 80030900 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[11:14], v0, s[16:19], 0 idxen ; E00C2000 80040B00 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[13:16], v0, s[8:11], 0 idxen ; E00C2000 80020D00 s_buffer_load_dword s33, s[28:31], 0x24 ; C2109D24 s_buffer_load_dword s34, s[28:31], 0x25 ; C2111D25 s_buffer_load_dword s35, s[28:31], 0x26 ; C2119D26 s_buffer_load_dword s36, s[28:31], 0x28 ; C2121D28 s_buffer_load_dword s3, s[28:31], 0x13 ; C2019D13 s_buffer_load_dword s2, s[28:31], 0x14 ; C2011D14 s_buffer_load_dword s0, s[28:31], 0x15 ; C2001D15 s_buffer_load_dword s1, s[28:31], 0x16 ; C2009D16 s_buffer_load_dword s18, s[28:31], 0x18 ; C2091D18 s_buffer_load_dword s37, s[28:31], 0x29 ; C2129D29 s_buffer_load_dword s38, s[28:31], 0x2a ; C2131D2A s_buffer_load_dword s39, s[28:31], 0x2c ; C2139D2C s_buffer_load_dword s40, s[28:31], 0x2d ; C2141D2D s_buffer_load_dword s41, s[28:31], 0x2e ; C2149D2E s_buffer_load_dword s20, s[28:31], 0x19 ; C20A1D19 s_buffer_load_dword s19, s[28:31], 0x1a ; C2099D1A s_buffer_load_dword s42, s[28:31], 0x1b ; C2151D1B s_buffer_load_dword s23, s[28:31], 0x1c ; C20B9D1C s_buffer_load_dword s22, s[28:31], 0x1d ; C20B1D1D s_buffer_load_dword s43, s[28:31], 0x30 ; C2159D30 s_buffer_load_dword s44, s[28:31], 0x31 ; C2161D31 s_buffer_load_dword s45, s[28:31], 0x32 ; C2169D32 s_buffer_load_dword s46, s[28:31], 0x38 ; C2171D38 s_buffer_load_dword s47, s[28:31], 0x39 ; C2179D39 s_buffer_load_dword s24, s[28:31], 0x1e ; C20C1D1E s_buffer_load_dword s48, s[28:31], 0x1f ; C2181D1F s_buffer_load_dword s26, s[28:31], 0x20 ; C20D1D20 s_buffer_load_dword s27, s[28:31], 0x21 ; C20D9D21 s_buffer_load_dword s25, s[28:31], 0x22 ; C20C9D22 s_buffer_load_dword s4, s[28:31], 0x3f ; C2021D3F s_buffer_load_dword s5, s[28:31], 0x40 ; C2029D40 s_buffer_load_dword s49, s[28:31], 0x44 ; C2189D44 s_buffer_load_dword s50, s[28:31], 0x45 ; C2191D45 s_buffer_load_dword s51, s[28:31], 0x46 ; C2199D46 s_buffer_load_dword s6, s[28:31], 0x3a ; C2031D3A s_buffer_load_dword s8, s[28:31], 0x3b ; C2041D3B s_buffer_load_dword s52, s[28:31], 0x3c ; C21A1D3C s_buffer_load_dword s53, s[28:31], 0x3d ; C21A9D3D s_buffer_load_dword s14, s[28:31], 0x3e ; C2071D3E s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v0, s4 ; 7E000204 s_buffer_load_dword s13, s[28:31], 0x9 ; C2069D09 v_cmp_eq_f32_e64 vcc, 0, s5 ; D004006A 00000A80 s_buffer_load_dword s9, s[28:31], 0xa ; C2049D0A s_buffer_load_dword s7, s[28:31], 0xb ; C2039D0B s_buffer_load_dword s12, s[28:31], 0xc ; C2061D0C v_mov_b32_e32 v17, s6 ; 7E220206 s_buffer_load_dword s16, s[28:31], 0xd ; C2081D0D v_mov_b32_e32 v18, s8 ; 7E240208 s_buffer_load_dword s11, s[28:31], 0xe ; C2059D0E s_buffer_load_dword s10, s[28:31], 0xf ; C2051D0F s_buffer_load_dword s15, s[28:31], 0x10 ; C2079D10 v_mov_b32_e32 v19, s14 ; 7E26020E s_buffer_load_dword s17, s[28:31], 0x11 ; C2089D11 s_buffer_load_dword s14, s[28:31], 0x12 ; C2071D12 s_buffer_load_dword s54, s[28:31], 0x47 ; C21B1D47 s_buffer_load_dword s55, s[28:31], 0x48 ; C21B9D48 s_buffer_load_dword s56, s[28:31], 0x49 ; C21C1D49 s_buffer_load_dword s57, s[28:31], 0x4a ; C21C9D4A s_buffer_load_dword s58, s[28:31], 0x4b ; C21D1D4B s_buffer_load_dword s4, s[28:31], 0x0 ; C2021D00 s_buffer_load_dword s5, s[28:31], 0x1 ; C2029D01 s_buffer_load_dword s6, s[28:31], 0x2 ; C2031D02 s_buffer_load_dword s8, s[28:31], 0x4 ; C2041D04 s_buffer_load_dword s21, s[28:31], 0x8 ; C20A9D08 s_buffer_load_dword s59, s[28:31], 0x4c ; C21D9D4C s_buffer_load_dword s60, s[28:31], 0x4d ; C21E1D4D s_buffer_load_dword s61, s[28:31], 0x4e ; C21E9D4E s_buffer_load_dword s62, s[28:31], 0x4f ; C21F1D4F s_buffer_load_dword s63, s[28:31], 0x50 ; C21F9D50 s_buffer_load_dword s64, s[28:31], 0x51 ; C2201D51 s_buffer_load_dword s65, s[28:31], 0x52 ; C2209D52 s_buffer_load_dword s28, s[28:31], 0x53 ; C20E1D53 v_mul_f32_e32 v20, s42, v2 ; 1028042A v_mac_f32_e32 v20, s48, v3 ; 3E280630 v_mac_f32_e32 v20, s32, v4 ; 3E280820 v_mac_f32_e32 v17, s46, v9 ; 3E22122E v_mac_f32_e32 v18, s47, v10 ; 3E24142F v_mul_f32_e32 v21, s49, v2 ; 102A0431 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v21, s55, v3 ; 3E2A0637 v_mac_f32_e32 v21, s59, v4 ; 3E2A083B v_mac_f32_e32 v21, s63, v5 ; 3E2A0A3F v_mul_f32_e32 v22, s50, v2 ; 102C0432 v_mac_f32_e32 v22, s56, v3 ; 3E2C0638 v_mac_f32_e32 v22, s60, v4 ; 3E2C083C v_mac_f32_e32 v22, s64, v5 ; 3E2C0A40 v_mul_f32_e32 v23, s51, v2 ; 102E0433 v_mac_f32_e32 v23, s57, v3 ; 3E2E0639 v_mac_f32_e32 v23, s61, v4 ; 3E2E083D v_mac_f32_e32 v23, s65, v5 ; 3E2E0A41 v_mul_f32_e32 v24, s54, v2 ; 10300436 v_mac_f32_e32 v24, s58, v3 ; 3E30063A v_mac_f32_e32 v24, s62, v4 ; 3E30083E v_mac_f32_e32 v24, s28, v5 ; 3E300A1C v_cndmask_b32_e32 v9, v11, v9 ; 0012130B v_cndmask_b32_e32 v10, v12, v10 ; 0014150C v_mul_f32_e32 v11, s36, v6 ; 10160C24 v_mac_f32_e32 v11, s37, v7 ; 3E160E25 v_mul_f32_e32 v12, s39, v6 ; 10180C27 v_mac_f32_e32 v12, s40, v7 ; 3E180E28 v_mul_f32_e32 v6, s43, v6 ; 100C0C2B v_mac_f32_e32 v6, s44, v7 ; 3E0C0E2C v_mac_f32_e32 v11, s38, v8 ; 3E161026 v_mac_f32_e32 v12, s41, v8 ; 3E181029 v_mac_f32_e32 v6, s45, v8 ; 3E0C102D v_mul_f32_e32 v7, s18, v2 ; 100E0412 v_mac_f32_e32 v7, s23, v3 ; 3E0E0617 v_mac_f32_e32 v7, s26, v4 ; 3E0E081A v_mac_f32_e32 v7, s33, v5 ; 3E0E0A21 v_mul_f32_e32 v8, s20, v2 ; 10100414 v_mac_f32_e32 v8, s22, v3 ; 3E100616 v_mac_f32_e32 v8, s27, v4 ; 3E10081B v_mac_f32_e32 v8, s34, v5 ; 3E100A22 v_mul_f32_e32 v2, s19, v2 ; 10040413 v_mac_f32_e32 v2, s24, v3 ; 3E040618 v_mac_f32_e32 v2, s25, v4 ; 3E040819 v_mac_f32_e32 v2, s35, v5 ; 3E040A23 v_mac_f32_e32 v19, s52, v9 ; 3E261234 v_mac_f32_e32 v0, s53, v10 ; 3E001435 exp 15, 32, 0, 0, 0, v17, v18, v19, v0 ; F800020F 00131211 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s18, v13 ; 10001A12 v_mac_f32_e32 v0, s23, v14 ; 3E001C17 v_mul_f32_e32 v3, s20, v13 ; 10061A14 v_mac_f32_e32 v3, s22, v14 ; 3E061C16 v_mul_f32_e32 v4, s19, v13 ; 10081A13 v_mac_f32_e32 v4, s24, v14 ; 3E081C18 v_mac_f32_e32 v0, s26, v15 ; 3E001E1A v_mac_f32_e32 v3, s27, v15 ; 3E061E1B v_mac_f32_e32 v4, s25, v15 ; 3E081E19 v_mul_f32_e32 v5, v11, v11 ; 100A170B v_mac_f32_e32 v5, v12, v12 ; 3E0A190C v_mac_f32_e32 v5, v6, v6 ; 3E0A0D06 v_rsq_clamp_f32_e32 v5, v5 ; 7E0A5905 v_mul_f32_e32 v9, v0, v0 ; 10120100 v_mac_f32_e32 v9, v3, v3 ; 3E120703 v_mac_f32_e32 v9, v4, v4 ; 3E120904 v_rsq_clamp_f32_e32 v9, v9 ; 7E125909 v_mul_f32_e32 v10, v5, v11 ; 10141705 v_mul_f32_e32 v11, v5, v12 ; 10161905 v_mul_f32_e32 v5, v5, v6 ; 100A0D05 v_mul_f32_e32 v0, v9, v0 ; 10000109 v_mul_f32_e32 v3, v9, v3 ; 10060709 v_mul_f32_e32 v4, v9, v4 ; 10080909 v_mul_f32_e32 v6, v3, v5 ; 100C0B03 v_mad_f32 v6, v11, v4, -v6 ; D2820006 841A090B v_mul_f32_e32 v9, v4, v10 ; 10121504 v_mad_f32 v9, v5, v0, -v9 ; D2820009 84260105 v_mul_f32_e32 v12, v0, v11 ; 10181700 v_mad_f32 v12, v10, v3, -v12 ; D282000C 8432070A v_mul_f32_e32 v6, v16, v6 ; 100C0D10 v_mul_f32_e32 v9, v16, v9 ; 10121310 v_mul_f32_e32 v12, v16, v12 ; 10181910 exp 15, 33, 0, 0, 0, v0, v3, v4, v1 ; F800021F 01040300 exp 15, 34, 0, 0, 0, v6, v9, v12, v1 ; F800022F 010C0906 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, v5, v11 ; 10001705 v_mul_f32_e32 v3, s13, v0 ; 1006000D v_mul_f32_e32 v4, s16, v0 ; 10080010 v_mul_f32_e32 v0, s17, v0 ; 10000011 v_mul_f32_e32 v6, v11, v10 ; 100C150B v_mac_f32_e32 v3, s21, v6 ; 3E060C15 v_mac_f32_e32 v4, s12, v6 ; 3E080C0C v_mac_f32_e32 v0, s15, v6 ; 3E000C0F v_mul_f32_e32 v6, v5, v5 ; 100C0B05 v_mac_f32_e32 v3, s9, v6 ; 3E060C09 v_mac_f32_e32 v4, s11, v6 ; 3E080C0B v_mac_f32_e32 v0, s14, v6 ; 3E000C0E v_mul_f32_e32 v6, v10, v5 ; 100C0B0A v_mac_f32_e32 v3, s7, v6 ; 3E060C07 v_mac_f32_e32 v4, s10, v6 ; 3E080C0A v_mac_f32_e32 v0, s3, v6 ; 3E000C03 exp 15, 35, 0, 0, 0, v10, v11, v5, v1 ; F800023F 01050B0A s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v5, v11, v11 ; 100A170B v_mad_f32 v5, v10, v10, -v5 ; D2820005 8416150A v_mac_f32_e32 v3, s2, v5 ; 3E060A02 v_mac_f32_e32 v4, s0, v5 ; 3E080A00 v_mac_f32_e32 v0, s1, v5 ; 3E000A01 v_mul_f32_e32 v5, 0.5, v22 ; 100A2CF0 v_mul_f32_e32 v6, 0.5, v24 ; 100C30F0 exp 15, 36, 0, 0, 0, v3, v4, v0, v1 ; F800024F 01000403 s_waitcnt expcnt(0) ; BF8C070F v_mad_f32 v0, 0.5, v21, v6 ; D2820000 041A2AF0 v_mac_f32_e32 v6, s8, v5 ; 3E0C0A08 exp 15, 37, 0, 0, 0, v0, v6, v23, v24 ; F800025F 18170600 s_waitcnt expcnt(0) ; BF8C070F v_subrev_f32_e32 v0, s4, v7 ; 0A000E04 v_subrev_f32_e32 v1, s5, v8 ; 0A021005 v_subrev_f32_e32 v3, s6, v2 ; 0A060406 exp 15, 38, 0, 0, 0, v23, v0, v1, v3 ; F800026F 03010017 exp 15, 39, 0, 0, 0, v7, v8, v2, v20 ; F800027F 14020807 exp 15, 12, 0, 1, 0, v21, v22, v23, v24 ; F80008CF 18171615 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 72 VGPRS: 28 Code Size: 932 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL IN[4], GENERIC[4], PERSPECTIVE DCL IN[5], GENERIC[5], PERSPECTIVE DCL IN[6], GENERIC[6], PERSPECTIVE DCL IN[7], GENERIC[7], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SVIEW[0], CUBE, FLOAT DCL SVIEW[1], CUBE, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL SVIEW[4], 2D, FLOAT DCL SVIEW[5], 2D, FLOAT DCL CONST[0..5] DCL CONST[8..20] DCL CONST[23..25] DCL CONST[27] DCL TEMP[0..18], LOCAL IMM[0] FLT32 { 1.0000, 2.0000, -1.0000, 0.0000} IMM[1] FLT32 { 0.5000, 0.7500, 7.0000, 1.0000} IMM[2] FLT32 { 10.0000, 0.9680, 0.0300, 0.0001} 0: MOV TEMP[0].x, IN[1].xxxx 1: MOV TEMP[0].y, IN[2].xxxx 2: MOV TEMP[0].z, IN[3].xxxx 3: MOV TEMP[1].x, IN[1].yyyy 4: MOV TEMP[1].y, IN[2].yyyy 5: MOV TEMP[1].z, IN[3].yyyy 6: MOV TEMP[2].x, IN[1].zzzz 7: MOV TEMP[2].y, IN[2].zzzz 8: MOV TEMP[2].z, IN[3].zzzz 9: MOV TEMP[3].xy, IN[0].xyyy 10: TEX TEMP[3], TEMP[3], SAMP[2], 2D 11: MUL TEMP[4].x, TEMP[3].wwww, CONST[19].wwww 12: FSLT TEMP[4].x, TEMP[4].xxxx, CONST[20].xxxx 13: AND TEMP[4].x, TEMP[4].xxxx, IMM[0].xxxx 14: KILL_IF -TEMP[4].xxxx 15: MOV TEMP[4].xy, IN[0].xyyy 16: TEX TEMP[4].yw, TEMP[4], SAMP[3], 2D 17: MAD TEMP[4].xy, TEMP[4].wyyy, IMM[0].yyyy, IMM[0].zzzz 18: MUL TEMP[4].xy, TEMP[4].xyyy, CONST[23].xxxx 19: DP2 TEMP[5].x, TEMP[4].xyyy, TEMP[4].xyyy 20: MOV_SAT TEMP[5].x, TEMP[5].xxxx 21: ADD TEMP[5].x, IMM[0].xxxx, -TEMP[5].xxxx 22: SQRT TEMP[5].x, TEMP[5].xxxx 23: MOV TEMP[4].z, TEMP[5].xxxx 24: DP3 TEMP[0].x, TEMP[4].xyzz, TEMP[0].xyzz 25: DP3 TEMP[1].x, TEMP[4].xyzz, TEMP[1].xyzz 26: MOV TEMP[0].y, TEMP[1].xxxx 27: DP3 TEMP[1].x, TEMP[4].xyzz, TEMP[2].xyzz 28: MOV TEMP[0].z, TEMP[1].xxxx 29: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[0].xyzz 30: RSQ TEMP[1].x, TEMP[1].xxxx 31: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xxxx 32: DP3 TEMP[1].x, IN[6].yzww, IN[6].yzww 33: RSQ TEMP[1].x, TEMP[1].xxxx 34: MUL TEMP[1].xyz, IN[6].yzww, TEMP[1].xxxx 35: MUL TEMP[2].xyz, CONST[19].xyzz, TEMP[3].xyzz 36: LRP TEMP[3].xyz, CONST[24].xxxx, TEMP[2].xyzz, CONST[16].xyzz 37: MUL TEMP[4].x, CONST[24].xxxx, CONST[16].wwww 38: ADD TEMP[4].x, CONST[16].wwww, -TEMP[4].xxxx 39: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[4].xxxx 40: MOV TEMP[5].xy, IN[0].xyyy 41: TEX TEMP[5].y, TEMP[5], SAMP[4], 2D 42: ADD TEMP[6].x, IMM[0].xxxx, -CONST[27].xxxx 43: MAD TEMP[5].x, TEMP[5].yyyy, CONST[27].xxxx, TEMP[6].xxxx 44: DP3 TEMP[6].x, TEMP[0].xyzz, CONST[0].xyzz 45: MAX TEMP[6].x, IMM[0].wwww, TEMP[6].xxxx 46: MOV TEMP[7].xyz, IMM[0].wwww 47: MOV TEMP[8].w, IMM[0].xxxx 48: MOV TEMP[8].xyz, TEMP[0].xyzx 49: DP4 TEMP[9].x, CONST[1], TEMP[8] 50: DP4 TEMP[10].x, CONST[2], TEMP[8] 51: MOV TEMP[9].y, TEMP[10].xxxx 52: DP4 TEMP[8].x, CONST[3], TEMP[8] 53: MOV TEMP[9].z, TEMP[8].xxxx 54: ADD TEMP[8].xyz, IN[4].xyzz, TEMP[9].xyzz 55: MOV TEMP[9].xy, IN[5].xyyy 56: MOV TEMP[9].w, IN[5].wwww 57: TXP TEMP[9].x, TEMP[9], SAMP[5], 2D 58: MUL TEMP[9].xyz, CONST[17].xyzz, TEMP[9].xxxx 59: MUL TEMP[8].xyz, TEMP[8].xyzz, TEMP[5].xxxx 60: DP3 TEMP[10].x, TEMP[0].xyzz, TEMP[1].xyzz 61: MUL TEMP[10].xyz, TEMP[10].xxxx, TEMP[0].xyzz 62: MUL TEMP[10].xyz, IMM[0].yyyy, TEMP[10].xyzz 63: ADD TEMP[10].xyz, TEMP[1].xyzz, -TEMP[10].xyzz 64: MOV TEMP[11].xyz, TEMP[10].xyzx 65: FSLT TEMP[12].x, IMM[0].wwww, CONST[10].wwww 66: UIF TEMP[12].xxxx :0 67: DP3 TEMP[12].x, TEMP[10].xyzz, TEMP[10].xyzz 68: RSQ TEMP[12].x, TEMP[12].xxxx 69: MUL TEMP[12].xyz, TEMP[10].xyzz, TEMP[12].xxxx 70: MOV TEMP[13].xyz, -IN[7].xyzx 71: ADD TEMP[14].xyz, CONST[8].xyzz, TEMP[13].xyzz 72: RCP TEMP[15].x, TEMP[12].xxxx 73: RCP TEMP[15].y, TEMP[12].yyyy 74: RCP TEMP[15].z, TEMP[12].zzzz 75: MUL TEMP[14].xyz, TEMP[14].xyzz, TEMP[15].xyzz 76: ADD TEMP[13].xyz, CONST[9].xyzz, TEMP[13].xyzz 77: RCP TEMP[15].x, TEMP[12].xxxx 78: RCP TEMP[15].y, TEMP[12].yyyy 79: RCP TEMP[15].z, TEMP[12].zzzz 80: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[15].xyzz 81: FSLT TEMP[15].xyz, IMM[0].wwww, TEMP[12].xyzz 82: UIF TEMP[15].xxxx :0 83: MOV TEMP[16].x, TEMP[14].xxxx 84: ELSE :0 85: MOV TEMP[16].x, TEMP[13].xxxx 86: ENDIF 87: UIF TEMP[15].yyyy :0 88: MOV TEMP[17].x, TEMP[14].yyyy 89: ELSE :0 90: MOV TEMP[17].x, TEMP[13].yyyy 91: ENDIF 92: UIF TEMP[15].zzzz :0 93: MOV TEMP[14].x, TEMP[14].zzzz 94: ELSE :0 95: MOV TEMP[14].x, TEMP[13].zzzz 96: ENDIF 97: ADD TEMP[13].xyz, CONST[8].xyzz, CONST[9].xyzz 98: MUL TEMP[13].xyz, TEMP[13].xyzz, IMM[1].xxxx 99: MIN TEMP[15].x, TEMP[16].xxxx, TEMP[17].xxxx 100: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[14].xxxx 101: ADD TEMP[15].xyz, TEMP[13].xyzz, -CONST[10].xyzz 102: ADD TEMP[15].xyz, TEMP[15].xyzz, IN[7].xyzz 103: MAD TEMP[12].xyz, TEMP[12].xyzz, TEMP[14].xxxx, TEMP[15].xyzz 104: ADD TEMP[11].xyz, TEMP[12].xyzz, -TEMP[13].xyzz 105: ENDIF 106: ADD TEMP[12].x, IMM[0].xxxx, -CONST[25].xxxx 107: POW TEMP[12].x, TEMP[12].xxxx, IMM[1].yyyy 108: MUL TEMP[12].x, TEMP[12].xxxx, IMM[1].zzzz 109: MOV TEMP[11].xyz, TEMP[11].xyzz 110: MOV TEMP[11].w, TEMP[12].xxxx 111: TXL TEMP[11], TEMP[11], SAMP[0], CUBE 112: POW TEMP[12].x, TEMP[11].wwww, CONST[11].yyyy 113: MUL TEMP[12].x, CONST[11].xxxx, TEMP[12].xxxx 114: MUL TEMP[11].xyz, TEMP[12].xxxx, TEMP[11].xyzz 115: FSLT TEMP[12].x, CONST[9].wwww, IMM[1].wwww 116: UIF TEMP[12].xxxx :0 117: MOV TEMP[12].xyz, TEMP[10].xyzx 118: FSLT TEMP[13].x, IMM[0].wwww, CONST[14].wwww 119: UIF TEMP[13].xxxx :0 120: DP3 TEMP[13].x, TEMP[10].xyzz, TEMP[10].xyzz 121: RSQ TEMP[13].x, TEMP[13].xxxx 122: MUL TEMP[10].xyz, TEMP[10].xyzz, TEMP[13].xxxx 123: MOV TEMP[13].xyz, -IN[7].xyzx 124: ADD TEMP[14].xyz, CONST[12].xyzz, TEMP[13].xyzz 125: RCP TEMP[15].x, TEMP[10].xxxx 126: RCP TEMP[15].y, TEMP[10].yyyy 127: RCP TEMP[15].z, TEMP[10].zzzz 128: MUL TEMP[14].xyz, TEMP[14].xyzz, TEMP[15].xyzz 129: ADD TEMP[13].xyz, CONST[13].xyzz, TEMP[13].xyzz 130: RCP TEMP[15].x, TEMP[10].xxxx 131: RCP TEMP[15].y, TEMP[10].yyyy 132: RCP TEMP[15].z, TEMP[10].zzzz 133: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[15].xyzz 134: FSLT TEMP[15].xyz, IMM[0].wwww, TEMP[10].xyzz 135: UIF TEMP[15].xxxx :0 136: MOV TEMP[16].x, TEMP[14].xxxx 137: ELSE :0 138: MOV TEMP[16].x, TEMP[13].xxxx 139: ENDIF 140: UIF TEMP[15].yyyy :0 141: MOV TEMP[17].x, TEMP[14].yyyy 142: ELSE :0 143: MOV TEMP[17].x, TEMP[13].yyyy 144: ENDIF 145: UIF TEMP[15].zzzz :0 146: MOV TEMP[14].x, TEMP[14].zzzz 147: ELSE :0 148: MOV TEMP[14].x, TEMP[13].zzzz 149: ENDIF 150: ADD TEMP[13].xyz, CONST[12].xyzz, CONST[13].xyzz 151: MUL TEMP[13].xyz, TEMP[13].xyzz, IMM[1].xxxx 152: MIN TEMP[15].x, TEMP[16].xxxx, TEMP[17].xxxx 153: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[14].xxxx 154: ADD TEMP[15].xyz, TEMP[13].xyzz, -CONST[14].xyzz 155: ADD TEMP[15].xyz, TEMP[15].xyzz, IN[7].xyzz 156: MAD TEMP[10].xyz, TEMP[10].xyzz, TEMP[14].xxxx, TEMP[15].xyzz 157: ADD TEMP[12].xyz, TEMP[10].xyzz, -TEMP[13].xyzz 158: ENDIF 159: ADD TEMP[10].x, IMM[0].xxxx, -CONST[25].xxxx 160: POW TEMP[10].x, TEMP[10].xxxx, IMM[1].yyyy 161: MUL TEMP[10].x, TEMP[10].xxxx, IMM[1].zzzz 162: MOV TEMP[12].xyz, TEMP[12].xyzz 163: MOV TEMP[12].w, TEMP[10].xxxx 164: TXL TEMP[10], TEMP[12], SAMP[1], CUBE 165: POW TEMP[12].x, TEMP[10].wwww, CONST[15].yyyy 166: MUL TEMP[12].x, CONST[15].xxxx, TEMP[12].xxxx 167: MUL TEMP[10].xyz, TEMP[12].xxxx, TEMP[10].xyzz 168: LRP TEMP[7].xyz, CONST[9].wwww, TEMP[11].xyzz, TEMP[10].xyzz 169: ELSE :0 170: MOV TEMP[7].xyz, TEMP[11].xyzx 171: ENDIF 172: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[5].xxxx 173: MOV TEMP[1].xyz, -TEMP[1].xyzx 174: ADD TEMP[5].x, IMM[0].xxxx, -CONST[25].xxxx 175: ADD TEMP[10].xyz, CONST[0].xyzz, TEMP[1].xyzz 176: DP3 TEMP[11].x, TEMP[10].xyzz, TEMP[10].xyzz 177: RSQ TEMP[11].x, TEMP[11].xxxx 178: MUL TEMP[10].xyz, TEMP[10].xyzz, TEMP[11].xxxx 179: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[1].xyzz 180: MAX TEMP[1].x, IMM[0].wwww, TEMP[1].xxxx 181: DP3 TEMP[11].x, CONST[0].xyzz, TEMP[10].xyzz 182: MAX TEMP[11].x, IMM[0].wwww, TEMP[11].xxxx 183: MUL TEMP[12].x, TEMP[5].xxxx, TEMP[5].xxxx 184: MUL TEMP[12].x, TEMP[12].xxxx, CONST[18].wwww 185: ADD TEMP[13].x, IMM[0].xxxx, -TEMP[5].xxxx 186: MAD TEMP[13].x, TEMP[13].xxxx, IMM[2].yyyy, IMM[2].zzzz 187: LG2 TEMP[13].x, TEMP[13].xxxx 188: RCP TEMP[13].x, TEMP[13].xxxx 189: MUL TEMP[13].x, IMM[2].xxxx, TEMP[13].xxxx 190: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[13].xxxx 191: ADD TEMP[14].x, IMM[0].xxxx, -TEMP[6].xxxx 192: ADD TEMP[15].x, IMM[0].xxxx, -TEMP[1].xxxx 193: MUL TEMP[16].x, IMM[0].yyyy, TEMP[11].xxxx 194: MUL TEMP[5].x, TEMP[11].xxxx, TEMP[5].xxxx 195: MAD TEMP[5].x, TEMP[16].xxxx, TEMP[5].xxxx, IMM[1].xxxx 196: ADD TEMP[11].x, IMM[0].xxxx, -TEMP[11].xxxx 197: ADD TEMP[16].x, IMM[0].xxxx, -TEMP[1].xxxx 198: ADD TEMP[4].x, IMM[0].xxxx, -TEMP[4].xxxx 199: ADD TEMP[4].x, CONST[25].xxxx, TEMP[4].xxxx 200: MOV_SAT TEMP[4].x, TEMP[4].xxxx 201: MUL TEMP[17].x, TEMP[16].xxxx, TEMP[16].xxxx 202: MUL TEMP[18].x, TEMP[16].xxxx, TEMP[16].xxxx 203: MUL TEMP[16].x, TEMP[18].xxxx, TEMP[16].xxxx 204: MUL TEMP[16].x, TEMP[17].xxxx, TEMP[16].xxxx 205: LRP TEMP[4].xyz, TEMP[16].xxxx, TEMP[4].xxxx, TEMP[3].xyzz 206: LRP TEMP[16].x, TEMP[6].xxxx, IMM[0].xxxx, TEMP[12].xxxx 207: LRP TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx, TEMP[12].xxxx 208: MAD TEMP[1].x, TEMP[16].xxxx, TEMP[1].xxxx, IMM[2].wwww 209: RCP TEMP[1].x, TEMP[1].xxxx 210: DP3 TEMP[10].x, TEMP[0].xyzz, TEMP[10].xyzz 211: MAX TEMP[10].x, IMM[0].wwww, TEMP[10].xxxx 212: POW TEMP[10].x, TEMP[10].xxxx, TEMP[13].xxxx 213: ADD TEMP[12].x, TEMP[13].xxxx, IMM[0].xxxx 214: MUL TEMP[12].x, TEMP[12].xxxx, CONST[18].yyyy 215: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[12].xxxx 216: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[10].xxxx 217: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[6].xxxx 218: MUL TEMP[1].x, TEMP[1].xxxx, CONST[18].xxxx 219: MAX TEMP[1].x, IMM[0].wwww, TEMP[1].xxxx 220: MUL TEMP[1].xyz, TEMP[1].xxxx, TEMP[9].xyzz 221: ADD TEMP[10].xyz, IMM[0].xxxx, -TEMP[3].xyzz 222: MUL TEMP[12].x, TEMP[11].xxxx, TEMP[11].xxxx 223: MUL TEMP[13].x, TEMP[11].xxxx, TEMP[11].xxxx 224: MUL TEMP[11].x, TEMP[13].xxxx, TEMP[11].xxxx 225: MUL TEMP[11].x, TEMP[12].xxxx, TEMP[11].xxxx 226: MAD TEMP[3].xyz, TEMP[10].xyzz, TEMP[11].xxxx, TEMP[3].xyzz 227: ADD TEMP[10].x, TEMP[5].xxxx, IMM[0].zzzz 228: MUL TEMP[11].x, TEMP[14].xxxx, TEMP[14].xxxx 229: MUL TEMP[12].x, TEMP[14].xxxx, TEMP[14].xxxx 230: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[14].xxxx 231: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[12].xxxx 232: MAD TEMP[10].x, TEMP[10].xxxx, TEMP[11].xxxx, IMM[0].xxxx 233: ADD TEMP[5].x, TEMP[5].xxxx, IMM[0].zzzz 234: MUL TEMP[11].x, TEMP[15].xxxx, TEMP[15].xxxx 235: MUL TEMP[12].x, TEMP[15].xxxx, TEMP[15].xxxx 236: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[15].xxxx 237: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[12].xxxx 238: MAD TEMP[5].x, TEMP[5].xxxx, TEMP[11].xxxx, IMM[0].xxxx 239: MUL TEMP[5].x, TEMP[10].xxxx, TEMP[5].xxxx 240: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[6].xxxx 241: MAD TEMP[5].xyz, TEMP[9].xyzz, TEMP[5].xxxx, TEMP[8].xyzz 242: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[5].xyzz 243: MAD TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xyzz, TEMP[2].xyzz 244: MAD TEMP[0].xyz, TEMP[7].xyzz, TEMP[4].xyzz, TEMP[1].xyzz 245: MOV TEMP[0].xyz, TEMP[0].xyzx 246: MAD TEMP[1].x, IN[6].xxxx, CONST[5].zzzz, CONST[5].wwww 247: MOV_SAT TEMP[1].x, TEMP[1].xxxx 248: LRP TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[4].xyzz 249: MOV TEMP[0].xyz, TEMP[0].xyzx 250: MOV TEMP[0].w, IMM[0].xxxx 251: MOV OUT[0], TEMP[0] 252: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 172) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200) %57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208) %58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212) %59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216) %60 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224) %61 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228) %62 = call float @llvm.SI.load.const(<16 x i8> %23, i32 232) %63 = call float @llvm.SI.load.const(<16 x i8> %23, i32 236) %64 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240) %65 = call float @llvm.SI.load.const(<16 x i8> %23, i32 244) %66 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256) %67 = call float @llvm.SI.load.const(<16 x i8> %23, i32 260) %68 = call float @llvm.SI.load.const(<16 x i8> %23, i32 264) %69 = call float @llvm.SI.load.const(<16 x i8> %23, i32 268) %70 = call float @llvm.SI.load.const(<16 x i8> %23, i32 272) %71 = call float @llvm.SI.load.const(<16 x i8> %23, i32 276) %72 = call float @llvm.SI.load.const(<16 x i8> %23, i32 280) %73 = call float @llvm.SI.load.const(<16 x i8> %23, i32 288) %74 = call float @llvm.SI.load.const(<16 x i8> %23, i32 292) %75 = call float @llvm.SI.load.const(<16 x i8> %23, i32 300) %76 = call float @llvm.SI.load.const(<16 x i8> %23, i32 304) %77 = call float @llvm.SI.load.const(<16 x i8> %23, i32 308) %78 = call float @llvm.SI.load.const(<16 x i8> %23, i32 312) %79 = call float @llvm.SI.load.const(<16 x i8> %23, i32 316) %80 = call float @llvm.SI.load.const(<16 x i8> %23, i32 320) %81 = call float @llvm.SI.load.const(<16 x i8> %23, i32 368) %82 = call float @llvm.SI.load.const(<16 x i8> %23, i32 384) %83 = call float @llvm.SI.load.const(<16 x i8> %23, i32 400) %84 = call float @llvm.SI.load.const(<16 x i8> %23, i32 432) %85 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %86 = load <32 x i8>, <32 x i8> addrspace(2)* %85, align 32, !tbaa !0 %87 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %88 = load <16 x i8>, <16 x i8> addrspace(2)* %87, align 16, !tbaa !0 %89 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %90 = bitcast <8 x i32> addrspace(2)* %89 to <32 x i8> addrspace(2)* %91 = load <32 x i8>, <32 x i8> addrspace(2)* %90, align 32, !tbaa !0 %92 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %93 = bitcast <4 x i32> addrspace(2)* %92 to <16 x i8> addrspace(2)* %94 = load <16 x i8>, <16 x i8> addrspace(2)* %93, align 16, !tbaa !0 %95 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %96 = bitcast <8 x i32> addrspace(2)* %95 to <32 x i8> addrspace(2)* %97 = load <32 x i8>, <32 x i8> addrspace(2)* %96, align 32, !tbaa !0 %98 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %99 = bitcast <4 x i32> addrspace(2)* %98 to <16 x i8> addrspace(2)* %100 = load <16 x i8>, <16 x i8> addrspace(2)* %99, align 16, !tbaa !0 %101 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %102 = bitcast <8 x i32> addrspace(2)* %101 to <32 x i8> addrspace(2)* %103 = load <32 x i8>, <32 x i8> addrspace(2)* %102, align 32, !tbaa !0 %104 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %105 = bitcast <4 x i32> addrspace(2)* %104 to <16 x i8> addrspace(2)* %106 = load <16 x i8>, <16 x i8> addrspace(2)* %105, align 16, !tbaa !0 %107 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %108 = bitcast <8 x i32> addrspace(2)* %107 to <32 x i8> addrspace(2)* %109 = load <32 x i8>, <32 x i8> addrspace(2)* %108, align 32, !tbaa !0 %110 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %111 = bitcast <4 x i32> addrspace(2)* %110 to <16 x i8> addrspace(2)* %112 = load <16 x i8>, <16 x i8> addrspace(2)* %111, align 16, !tbaa !0 %113 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5 %114 = bitcast <8 x i32> addrspace(2)* %113 to <32 x i8> addrspace(2)* %115 = load <32 x i8>, <32 x i8> addrspace(2)* %114, align 32, !tbaa !0 %116 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5 %117 = bitcast <4 x i32> addrspace(2)* %116 to <16 x i8> addrspace(2)* %118 = load <16 x i8>, <16 x i8> addrspace(2)* %117, align 16, !tbaa !0 %119 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %120 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %121 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %122 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %123 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %124 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %125 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %126 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %127 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %128 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %129 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %130 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %131 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %132 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %133 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7) %134 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %135 = call float @llvm.SI.fs.interp(i32 3, i32 5, i32 %5, <2 x i32> %7) %136 = call float @llvm.SI.fs.interp(i32 0, i32 6, i32 %5, <2 x i32> %7) %137 = call float @llvm.SI.fs.interp(i32 1, i32 6, i32 %5, <2 x i32> %7) %138 = call float @llvm.SI.fs.interp(i32 2, i32 6, i32 %5, <2 x i32> %7) %139 = call float @llvm.SI.fs.interp(i32 3, i32 6, i32 %5, <2 x i32> %7) %140 = call float @llvm.SI.fs.interp(i32 0, i32 7, i32 %5, <2 x i32> %7) %141 = call float @llvm.SI.fs.interp(i32 1, i32 7, i32 %5, <2 x i32> %7) %142 = call float @llvm.SI.fs.interp(i32 2, i32 7, i32 %5, <2 x i32> %7) %143 = bitcast float %119 to i32 %144 = bitcast float %120 to i32 %145 = insertelement <2 x i32> undef, i32 %143, i32 0 %146 = insertelement <2 x i32> %145, i32 %144, i32 1 %147 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %146, <32 x i8> %97, <16 x i8> %100, i32 2) %148 = extractelement <4 x float> %147, i32 0 %149 = extractelement <4 x float> %147, i32 1 %150 = extractelement <4 x float> %147, i32 2 %151 = extractelement <4 x float> %147, i32 3 %152 = fmul float %151, %79 %153 = fcmp olt float %152, %80 %154 = select i1 %153, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %154) %155 = bitcast float %119 to i32 %156 = bitcast float %120 to i32 %157 = insertelement <2 x i32> undef, i32 %155, i32 0 %158 = insertelement <2 x i32> %157, i32 %156, i32 1 %159 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %158, <32 x i8> %103, <16 x i8> %106, i32 2) %160 = extractelement <4 x float> %159, i32 1 %161 = extractelement <4 x float> %159, i32 3 %162 = fmul float %161, 2.000000e+00 %163 = fadd float %162, -1.000000e+00 %164 = fmul float %160, 2.000000e+00 %165 = fadd float %164, -1.000000e+00 %166 = fmul float %163, %81 %167 = fmul float %165, %81 %168 = fmul float %166, %166 %169 = fmul float %167, %167 %170 = fadd float %168, %169 %171 = call float @llvm.AMDIL.clamp.(float %170, float 0.000000e+00, float 1.000000e+00) %172 = fsub float 1.000000e+00, %171 %173 = call float @llvm.sqrt.f32(float %172) %174 = fmul float %166, %121 %175 = fmul float %167, %124 %176 = fadd float %175, %174 %177 = fmul float %173, %127 %178 = fadd float %176, %177 %179 = fmul float %166, %122 %180 = fmul float %167, %125 %181 = fadd float %180, %179 %182 = fmul float %173, %128 %183 = fadd float %181, %182 %184 = fmul float %166, %123 %185 = fmul float %167, %126 %186 = fadd float %185, %184 %187 = fmul float %173, %129 %188 = fadd float %186, %187 %189 = fmul float %178, %178 %190 = fmul float %183, %183 %191 = fadd float %190, %189 %192 = fmul float %188, %188 %193 = fadd float %191, %192 %194 = call float @llvm.AMDGPU.rsq.clamped.f32(float %193) %195 = fmul float %178, %194 %196 = fmul float %183, %194 %197 = fmul float %188, %194 %198 = fmul float %137, %137 %199 = fmul float %138, %138 %200 = fadd float %199, %198 %201 = fmul float %139, %139 %202 = fadd float %200, %201 %203 = call float @llvm.AMDGPU.rsq.clamped.f32(float %202) %204 = fmul float %137, %203 %205 = fmul float %138, %203 %206 = fmul float %139, %203 %207 = fmul float %76, %148 %208 = fmul float %77, %149 %209 = fmul float %78, %150 %210 = call float @llvm.AMDGPU.lrp(float %82, float %207, float %66) %211 = call float @llvm.AMDGPU.lrp(float %82, float %208, float %67) %212 = call float @llvm.AMDGPU.lrp(float %82, float %209, float %68) %213 = fmul float %82, %69 %214 = fsub float %69, %213 %215 = fmul float %207, %214 %216 = fmul float %208, %214 %217 = fmul float %209, %214 %218 = bitcast float %119 to i32 %219 = bitcast float %120 to i32 %220 = insertelement <2 x i32> undef, i32 %218, i32 0 %221 = insertelement <2 x i32> %220, i32 %219, i32 1 %222 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %221, <32 x i8> %109, <16 x i8> %112, i32 2) %223 = extractelement <4 x float> %222, i32 1 %224 = fsub float 1.000000e+00, %84 %225 = fmul float %223, %84 %226 = fadd float %225, %224 %227 = fmul float %195, %24 %228 = fmul float %196, %25 %229 = fadd float %228, %227 %230 = fmul float %197, %26 %231 = fadd float %229, %230 %232 = call float @llvm.maxnum.f32(float %231, float 0.000000e+00) %233 = fmul float %27, %195 %234 = fmul float %28, %196 %235 = fadd float %233, %234 %236 = fmul float %29, %197 %237 = fadd float %235, %236 %238 = fadd float %237, %30 %239 = fmul float %31, %195 %240 = fmul float %32, %196 %241 = fadd float %239, %240 %242 = fmul float %33, %197 %243 = fadd float %241, %242 %244 = fadd float %243, %34 %245 = fmul float %35, %195 %246 = fmul float %36, %196 %247 = fadd float %245, %246 %248 = fmul float %37, %197 %249 = fadd float %247, %248 %250 = fadd float %249, %38 %251 = fadd float %130, %238 %252 = fadd float %131, %244 %253 = fadd float %132, %250 %254 = fdiv float %133, %135 %255 = fdiv float %134, %135 %256 = bitcast float %254 to i32 %257 = bitcast float %255 to i32 %258 = insertelement <2 x i32> undef, i32 %256, i32 0 %259 = insertelement <2 x i32> %258, i32 %257, i32 1 %260 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %259, <32 x i8> %115, <16 x i8> %118, i32 2) %261 = extractelement <4 x float> %260, i32 0 %262 = fmul float %70, %261 %263 = fmul float %71, %261 %264 = fmul float %72, %261 %265 = fmul float %251, %226 %266 = fmul float %252, %226 %267 = fmul float %253, %226 %268 = fmul float %195, %204 %269 = fmul float %196, %205 %270 = fadd float %269, %268 %271 = fmul float %197, %206 %272 = fadd float %270, %271 %273 = fmul float %272, %195 %274 = fmul float %272, %196 %275 = fmul float %272, %197 %276 = fmul float %273, 2.000000e+00 %277 = fmul float %274, 2.000000e+00 %278 = fmul float %275, 2.000000e+00 %279 = fsub float %204, %276 %280 = fsub float %205, %277 %281 = fsub float %206, %278 %282 = fcmp ogt float %51, 0.000000e+00 br i1 %282, label %IF, label %ENDIF IF: ; preds = %main_body %283 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %284 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %285 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %286 = fmul float %279, %279 %287 = fmul float %280, %280 %288 = fadd float %287, %286 %289 = fmul float %281, %281 %290 = fadd float %288, %289 %291 = call float @llvm.AMDGPU.rsq.clamped.f32(float %290) %292 = fmul float %279, %291 %293 = fmul float %280, %291 %294 = fmul float %281, %291 %295 = fsub float %44, %140 %296 = fsub float %45, %141 %297 = fsub float %46, %142 %298 = fdiv float 1.000000e+00, %292 %299 = fdiv float 1.000000e+00, %293 %300 = fdiv float 1.000000e+00, %294 %301 = fmul float %295, %298 %302 = fmul float %296, %299 %303 = fmul float %297, %300 %304 = fsub float %47, %140 %305 = fsub float %48, %141 %306 = fsub float %49, %142 %307 = fdiv float 1.000000e+00, %292 %308 = fdiv float 1.000000e+00, %293 %309 = fdiv float 1.000000e+00, %294 %310 = fmul float %304, %307 %311 = fmul float %305, %308 %312 = fmul float %306, %309 %313 = fcmp ogt float %292, 0.000000e+00 %314 = fcmp ogt float %293, 0.000000e+00 %315 = fcmp ogt float %294, 0.000000e+00 %. = select i1 %313, float %301, float %310 %temp68.0 = select i1 %314, float %302, float %311 %.100 = select i1 %315, float %303, float %312 %316 = fadd float %44, %47 %317 = fadd float %45, %48 %318 = fadd float %46, %49 %319 = fmul float %316, 5.000000e-01 %320 = fmul float %317, 5.000000e-01 %321 = fmul float %318, 5.000000e-01 %322 = call float @llvm.minnum.f32(float %., float %temp68.0) %323 = call float @llvm.minnum.f32(float %322, float %.100) %324 = fsub float %319, %285 %325 = fsub float %320, %284 %326 = fsub float %321, %283 %327 = fadd float %324, %140 %328 = fadd float %325, %141 %329 = fadd float %326, %142 %330 = fmul float %292, %323 %331 = fadd float %330, %327 %332 = fmul float %293, %323 %333 = fadd float %332, %328 %334 = fmul float %294, %323 %335 = fadd float %334, %329 %336 = fsub float %331, %319 %337 = fsub float %333, %320 %338 = fsub float %335, %321 br label %ENDIF ENDIF: ; preds = %main_body, %IF %temp44.0 = phi float [ %336, %IF ], [ %279, %main_body ] %temp45.0 = phi float [ %337, %IF ], [ %280, %main_body ] %temp46.0 = phi float [ %338, %IF ], [ %281, %main_body ] %339 = fsub float 1.000000e+00, %83 %340 = call float @llvm.pow.f32(float %339, float 7.500000e-01) %341 = fmul float %340, 7.000000e+00 %342 = insertelement <4 x float> undef, float %temp44.0, i32 0 %343 = insertelement <4 x float> %342, float %temp45.0, i32 1 %344 = insertelement <4 x float> %343, float %temp46.0, i32 2 %345 = insertelement <4 x float> %344, float %341, i32 3 %346 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %345) %347 = extractelement <4 x float> %346, i32 0 %348 = extractelement <4 x float> %346, i32 1 %349 = extractelement <4 x float> %346, i32 2 %350 = extractelement <4 x float> %346, i32 3 %351 = call float @llvm.fabs.f32(float %349) %352 = fdiv float 1.000000e+00, %351 %353 = fmul float %347, %352 %354 = fadd float %353, 1.500000e+00 %355 = fmul float %348, %352 %356 = fadd float %355, 1.500000e+00 %357 = bitcast float %356 to i32 %358 = bitcast float %354 to i32 %359 = bitcast float %350 to i32 %360 = bitcast float %341 to i32 %361 = insertelement <4 x i32> undef, i32 %357, i32 0 %362 = insertelement <4 x i32> %361, i32 %358, i32 1 %363 = insertelement <4 x i32> %362, i32 %359, i32 2 %364 = insertelement <4 x i32> %363, i32 %360, i32 3 %365 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %364, <32 x i8> %86, <16 x i8> %88, i32 4) %366 = extractelement <4 x float> %365, i32 0 %367 = extractelement <4 x float> %365, i32 1 %368 = extractelement <4 x float> %365, i32 2 %369 = extractelement <4 x float> %365, i32 3 %370 = call float @llvm.pow.f32(float %369, float %53) %371 = fmul float %52, %370 %372 = fmul float %371, %366 %373 = fmul float %371, %367 %374 = fmul float %371, %368 %375 = fcmp olt float %50, 0x3FEFFFEB00000000 br i1 %375, label %IF86, label %ENDIF85 IF86: ; preds = %ENDIF %376 = fcmp ogt float %63, 0.000000e+00 br i1 %376, label %IF89, label %ENDIF88 ENDIF85: ; preds = %ENDIF, %ENDIF88 %temp28.0 = phi float [ %604, %ENDIF88 ], [ %372, %ENDIF ] %temp29.0 = phi float [ %605, %ENDIF88 ], [ %373, %ENDIF ] %temp30.0 = phi float [ %606, %ENDIF88 ], [ %374, %ENDIF ] %377 = fmul float %temp28.0, %226 %378 = fmul float %temp29.0, %226 %379 = fmul float %temp30.0, %226 %380 = fsub float 1.000000e+00, %83 %381 = fsub float %24, %204 %382 = fsub float %25, %205 %383 = fsub float %26, %206 %384 = fmul float %381, %381 %385 = fmul float %382, %382 %386 = fadd float %385, %384 %387 = fmul float %383, %383 %388 = fadd float %386, %387 %389 = call float @llvm.AMDGPU.rsq.clamped.f32(float %388) %390 = fmul float %381, %389 %391 = fmul float %382, %389 %392 = fmul float %383, %389 %393 = fmul float %204, %195 %394 = fsub float -0.000000e+00, %393 %395 = fmul float %205, %196 %396 = fsub float %394, %395 %397 = fmul float %206, %197 %398 = fsub float %396, %397 %399 = call float @llvm.maxnum.f32(float %398, float 0.000000e+00) %400 = fmul float %24, %390 %401 = fmul float %25, %391 %402 = fadd float %401, %400 %403 = fmul float %26, %392 %404 = fadd float %402, %403 %405 = call float @llvm.maxnum.f32(float %404, float 0.000000e+00) %406 = fmul float %380, %380 %407 = fmul float %406, %75 %408 = fsub float 1.000000e+00, %380 %409 = fmul float %408, 0x3FEEF9DB20000000 %410 = fadd float %409, 0x3F9EB851E0000000 %411 = call float @llvm.log2.f32(float %410) %412 = fdiv float 1.000000e+00, %411 %413 = fmul float %412, 1.000000e+01 %414 = fmul float %413, %413 %415 = fsub float 1.000000e+00, %232 %416 = fsub float 1.000000e+00, %399 %417 = fmul float %405, 2.000000e+00 %418 = fmul float %405, %380 %419 = fmul float %417, %418 %420 = fadd float %419, 5.000000e-01 %421 = fsub float 1.000000e+00, %405 %422 = fsub float 1.000000e+00, %399 %423 = fsub float 1.000000e+00, %214 %424 = fadd float %83, %423 %425 = call float @llvm.AMDIL.clamp.(float %424, float 0.000000e+00, float 1.000000e+00) %426 = fmul float %422, %422 %427 = fmul float %422, %422 %428 = fmul float %427, %422 %429 = fmul float %426, %428 %430 = call float @llvm.AMDGPU.lrp(float %429, float %425, float %210) %431 = call float @llvm.AMDGPU.lrp(float %429, float %425, float %211) %432 = call float @llvm.AMDGPU.lrp(float %429, float %425, float %212) %433 = call float @llvm.AMDGPU.lrp(float %232, float 1.000000e+00, float %407) %434 = call float @llvm.AMDGPU.lrp(float %399, float 1.000000e+00, float %407) %435 = fmul float %433, %434 %436 = fadd float %435, 0x3F1A36E2E0000000 %437 = fdiv float 1.000000e+00, %436 %438 = fmul float %195, %390 %439 = fmul float %196, %391 %440 = fadd float %439, %438 %441 = fmul float %197, %392 %442 = fadd float %440, %441 %443 = call float @llvm.maxnum.f32(float %442, float 0.000000e+00) %444 = call float @llvm.pow.f32(float %443, float %414) %445 = fadd float %414, 1.000000e+00 %446 = fmul float %445, %74 %447 = fmul float %444, %446 %448 = fmul float %437, %447 %449 = fmul float %448, %232 %450 = fmul float %449, %73 %451 = call float @llvm.maxnum.f32(float %450, float 0.000000e+00) %452 = fmul float %451, %262 %453 = fmul float %451, %263 %454 = fmul float %451, %264 %455 = fsub float 1.000000e+00, %210 %456 = fsub float 1.000000e+00, %211 %457 = fsub float 1.000000e+00, %212 %458 = fmul float %421, %421 %459 = fmul float %421, %421 %460 = fmul float %459, %421 %461 = fmul float %458, %460 %462 = fmul float %455, %461 %463 = fadd float %462, %210 %464 = fmul float %456, %461 %465 = fadd float %464, %211 %466 = fmul float %457, %461 %467 = fadd float %466, %212 %468 = fadd float %420, -1.000000e+00 %469 = fmul float %415, %415 %470 = fmul float %415, %415 %471 = fmul float %470, %415 %472 = fmul float %469, %471 %473 = fmul float %468, %472 %474 = fadd float %473, 1.000000e+00 %475 = fadd float %420, -1.000000e+00 %476 = fmul float %416, %416 %477 = fmul float %416, %416 %478 = fmul float %477, %416 %479 = fmul float %476, %478 %480 = fmul float %475, %479 %481 = fadd float %480, 1.000000e+00 %482 = fmul float %474, %481 %483 = fmul float %482, %232 %484 = fmul float %262, %483 %485 = fadd float %484, %265 %486 = fmul float %263, %483 %487 = fadd float %486, %266 %488 = fmul float %264, %483 %489 = fadd float %488, %267 %490 = fmul float %215, %485 %491 = fmul float %216, %487 %492 = fmul float %217, %489 %493 = fmul float %452, %463 %494 = fadd float %493, %490 %495 = fmul float %453, %465 %496 = fadd float %495, %491 %497 = fmul float %454, %467 %498 = fadd float %497, %492 %499 = fmul float %377, %430 %500 = fadd float %499, %494 %501 = fmul float %378, %431 %502 = fadd float %501, %496 %503 = fmul float %379, %432 %504 = fadd float %503, %498 %505 = fmul float %136, %42 %506 = fadd float %505, %43 %507 = call float @llvm.AMDIL.clamp.(float %506, float 0.000000e+00, float 1.000000e+00) %508 = call float @llvm.AMDGPU.lrp(float %507, float %500, float %39) %509 = call float @llvm.AMDGPU.lrp(float %507, float %502, float %40) %510 = call float @llvm.AMDGPU.lrp(float %507, float %504, float %41) %511 = call i32 @llvm.SI.packf16(float %508, float %509) %512 = bitcast i32 %511 to float %513 = call i32 @llvm.SI.packf16(float %510, float 1.000000e+00) %514 = bitcast i32 %513 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %512, float %514, float %512, float %514) ret void IF89: ; preds = %IF86 %515 = fmul float %279, %279 %516 = fmul float %280, %280 %517 = fadd float %516, %515 %518 = fmul float %281, %281 %519 = fadd float %517, %518 %520 = call float @llvm.AMDGPU.rsq.clamped.f32(float %519) %521 = fmul float %279, %520 %522 = fmul float %280, %520 %523 = fmul float %281, %520 %524 = fsub float %54, %140 %525 = fsub float %55, %141 %526 = fsub float %56, %142 %527 = fdiv float 1.000000e+00, %521 %528 = fdiv float 1.000000e+00, %522 %529 = fdiv float 1.000000e+00, %523 %530 = fmul float %524, %527 %531 = fmul float %525, %528 %532 = fmul float %526, %529 %533 = fsub float %57, %140 %534 = fsub float %58, %141 %535 = fsub float %59, %142 %536 = fdiv float 1.000000e+00, %521 %537 = fdiv float 1.000000e+00, %522 %538 = fdiv float 1.000000e+00, %523 %539 = fmul float %533, %536 %540 = fmul float %534, %537 %541 = fmul float %535, %538 %542 = fcmp ogt float %521, 0.000000e+00 %543 = fcmp ogt float %522, 0.000000e+00 %544 = fcmp ogt float %523, 0.000000e+00 %.101 = select i1 %542, float %530, float %539 %temp68.1 = select i1 %543, float %531, float %540 %.102 = select i1 %544, float %532, float %541 %545 = fadd float %54, %57 %546 = fadd float %55, %58 %547 = fadd float %56, %59 %548 = fmul float %545, 5.000000e-01 %549 = fmul float %546, 5.000000e-01 %550 = fmul float %547, 5.000000e-01 %551 = call float @llvm.minnum.f32(float %.101, float %temp68.1) %552 = call float @llvm.minnum.f32(float %551, float %.102) %553 = fsub float %548, %60 %554 = fsub float %549, %61 %555 = fsub float %550, %62 %556 = fadd float %553, %140 %557 = fadd float %554, %141 %558 = fadd float %555, %142 %559 = fmul float %521, %552 %560 = fadd float %559, %556 %561 = fmul float %522, %552 %562 = fadd float %561, %557 %563 = fmul float %523, %552 %564 = fadd float %563, %558 %565 = fsub float %560, %548 %566 = fsub float %562, %549 %567 = fsub float %564, %550 br label %ENDIF88 ENDIF88: ; preds = %IF86, %IF89 %temp48.0 = phi float [ %565, %IF89 ], [ %279, %IF86 ] %temp49.0 = phi float [ %566, %IF89 ], [ %280, %IF86 ] %temp50.0 = phi float [ %567, %IF89 ], [ %281, %IF86 ] %568 = fsub float 1.000000e+00, %83 %569 = call float @llvm.pow.f32(float %568, float 7.500000e-01) %570 = fmul float %569, 7.000000e+00 %571 = insertelement <4 x float> undef, float %temp48.0, i32 0 %572 = insertelement <4 x float> %571, float %temp49.0, i32 1 %573 = insertelement <4 x float> %572, float %temp50.0, i32 2 %574 = insertelement <4 x float> %573, float %570, i32 3 %575 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %574) %576 = extractelement <4 x float> %575, i32 0 %577 = extractelement <4 x float> %575, i32 1 %578 = extractelement <4 x float> %575, i32 2 %579 = extractelement <4 x float> %575, i32 3 %580 = call float @llvm.fabs.f32(float %578) %581 = fdiv float 1.000000e+00, %580 %582 = fmul float %576, %581 %583 = fadd float %582, 1.500000e+00 %584 = fmul float %577, %581 %585 = fadd float %584, 1.500000e+00 %586 = bitcast float %585 to i32 %587 = bitcast float %583 to i32 %588 = bitcast float %579 to i32 %589 = bitcast float %570 to i32 %590 = insertelement <4 x i32> undef, i32 %586, i32 0 %591 = insertelement <4 x i32> %590, i32 %587, i32 1 %592 = insertelement <4 x i32> %591, i32 %588, i32 2 %593 = insertelement <4 x i32> %592, i32 %589, i32 3 %594 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %593, <32 x i8> %91, <16 x i8> %94, i32 4) %595 = extractelement <4 x float> %594, i32 0 %596 = extractelement <4 x float> %594, i32 1 %597 = extractelement <4 x float> %594, i32 2 %598 = extractelement <4 x float> %594, i32 3 %599 = call float @llvm.pow.f32(float %598, float %65) %600 = fmul float %64, %599 %601 = fmul float %600, %595 %602 = fmul float %600, %596 %603 = fmul float %600, %597 %604 = call float @llvm.AMDGPU.lrp(float %50, float %372, float %601) %605 = call float @llvm.AMDGPU.lrp(float %50, float %373, float %602) %606 = call float @llvm.AMDGPU.lrp(float %50, float %374, float %603) br label %ENDIF85 } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v16, v0, 0, 0, [m0] ; C8400000 v_interp_p2_f32 v16, [v16], v1, 0, 0, [m0] ; C8410001 v_interp_p1_f32 v17, v0, 1, 0, [m0] ; C8440100 v_interp_p2_f32 v17, [v17], v1, 1, 0, [m0] ; C8450101 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600 v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601 v_interp_p1_f32 v9, v0, 0, 2, [m0] ; C8240800 v_interp_p2_f32 v9, [v9], v1, 0, 2, [m0] ; C8250801 v_interp_p1_f32 v10, v0, 1, 2, [m0] ; C8280900 v_interp_p2_f32 v10, [v10], v1, 1, 2, [m0] ; C8290901 v_interp_p1_f32 v11, v0, 2, 2, [m0] ; C82C0A00 v_interp_p2_f32 v11, [v11], v1, 2, 2, [m0] ; C82D0A01 v_interp_p1_f32 v12, v0, 0, 3, [m0] ; C8300C00 v_interp_p2_f32 v12, [v12], v1, 0, 3, [m0] ; C8310C01 v_interp_p1_f32 v18, v0, 1, 3, [m0] ; C8480D00 v_interp_p2_f32 v18, [v18], v1, 1, 3, [m0] ; C8490D01 v_interp_p1_f32 v19, v0, 2, 3, [m0] ; C84C0E00 v_interp_p2_f32 v19, [v19], v1, 2, 3, [m0] ; C84D0E01 v_interp_p1_f32 v2, v0, 0, 4, [m0] ; C8081000 v_interp_p2_f32 v2, [v2], v1, 0, 4, [m0] ; C8091001 v_interp_p1_f32 v3, v0, 1, 4, [m0] ; C80C1100 v_interp_p2_f32 v3, [v3], v1, 1, 4, [m0] ; C80D1101 v_interp_p1_f32 v4, v0, 2, 4, [m0] ; C8101200 v_interp_p2_f32 v4, [v4], v1, 2, 4, [m0] ; C8111201 v_interp_p1_f32 v21, v0, 0, 5, [m0] ; C8541400 v_interp_p2_f32 v21, [v21], v1, 0, 5, [m0] ; C8551401 v_interp_p1_f32 v22, v0, 1, 5, [m0] ; C8581500 v_interp_p2_f32 v22, [v22], v1, 1, 5, [m0] ; C8591501 v_interp_p1_f32 v23, v0, 3, 5, [m0] ; C85C1700 v_interp_p2_f32 v23, [v23], v1, 3, 5, [m0] ; C85D1701 v_interp_p1_f32 v5, v0, 0, 6, [m0] ; C8141800 v_interp_p2_f32 v5, [v5], v1, 0, 6, [m0] ; C8151801 v_interp_p1_f32 v24, v0, 1, 6, [m0] ; C8601900 v_interp_p2_f32 v24, [v24], v1, 1, 6, [m0] ; C8611901 v_interp_p1_f32 v28, v0, 2, 6, [m0] ; C8701A00 v_interp_p2_f32 v28, [v28], v1, 2, 6, [m0] ; C8711A01 s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300 v_interp_p1_f32 v29, v0, 3, 6, [m0] ; C8741B00 v_interp_p2_f32 v29, [v29], v1, 3, 6, [m0] ; C8751B01 v_interp_p1_f32 v25, v0, 0, 7, [m0] ; C8641C00 v_interp_p2_f32 v25, [v25], v1, 0, 7, [m0] ; C8651C01 v_interp_p1_f32 v20, v0, 1, 7, [m0] ; C8501D00 v_interp_p2_f32 v20, [v20], v1, 1, 7, [m0] ; C8511D01 v_interp_p1_f32 v26, v0, 2, 7, [m0] ; C8681E00 s_load_dwordx4 s[0:3], s[4:5], 0x8 ; C0800508 s_load_dwordx8 s[24:31], s[6:7], 0x10 ; C0CC0710 v_interp_p2_f32 v26, [v26], v1, 2, 7, [m0] ; C8691E01 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[8:11], 0x4f ; C206094F s_buffer_load_dword s13, s[8:11], 0x50 ; C2068950 s_load_dwordx4 s[32:35], s[4:5], 0xc ; C090050C s_load_dwordx4 s[16:19], s[4:5], 0x10 ; C0880510 s_load_dwordx4 s[20:23], s[4:5], 0x14 ; C08A0514 s_load_dwordx8 s[52:59], s[6:7], 0x18 ; C0DA0718 s_load_dwordx8 s[36:43], s[6:7], 0x20 ; C0D20720 s_load_dwordx8 s[44:51], s[6:7], 0x28 ; C0D60728 image_sample v[30:33], 15, 0, 0, 0, 0, 0, 0, 0, v[16:17], s[24:31], s[0:3] ; F0800F00 00061E10 s_buffer_load_dword s1, s[8:11], 0x4c ; C200894C s_buffer_load_dword s2, s[8:11], 0x4d ; C201094D s_buffer_load_dword s3, s[8:11], 0x4e ; C201894E s_buffer_load_dword s14, s[8:11], 0x5c ; C207095C s_buffer_load_dword s26, s[8:11], 0x60 ; C20D0960 s_buffer_load_dword s0, s[8:11], 0x64 ; C2000964 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v0, s12, v33 ; 1000420C v_cmp_gt_f32_e32 vcc, s13, v0 ; 7C08000D v_cndmask_b32_e64 v0, 0, -1.0, vcc ; D2000000 01A9E680 v_cmpx_le_f32_e32 vcc, 0, v0 ; 7C260080 s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 image_sample v[0:1], 10, 0, 0, 0, 0, 0, 0, 0, v[16:17], s[52:59], s[32:35] ; F0800A00 010D0010 v_mul_f32_e32 v13, s1, v30 ; 101A3C01 v_mul_f32_e32 v14, s2, v31 ; 101C3E02 v_mul_f32_e32 v15, s3, v32 ; 101E4003 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, 2.0, v1, -1.0 ; D2820001 03CE02F4 v_mad_f32 v0, 2.0, v0, -1.0 ; D2820000 03CE00F4 v_mul_f32_e32 v1, s14, v1 ; 1002020E v_mul_f32_e32 v0, s14, v0 ; 1000000E v_mul_f32_e32 v6, v6, v1 ; 100C0306 v_mac_f32_e32 v6, v9, v0 ; 3E0C0109 v_mul_f32_e32 v7, v7, v1 ; 100E0307 v_mac_f32_e32 v7, v10, v0 ; 3E0E010A v_mul_f32_e32 v10, v8, v1 ; 10140308 v_mac_f32_e32 v10, v11, v0 ; 3E14010B v_mul_f32_e32 v0, v0, v0 ; 10000100 v_mac_f32_e32 v0, v1, v1 ; 3E000301 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_sub_f32_e32 v0, 1.0, v0 ; 080000F2 v_sqrt_f32_e32 v0, v0 ; 7E006700 v_mac_f32_e32 v6, v12, v0 ; 3E0C010C v_mac_f32_e32 v7, v18, v0 ; 3E0E0112 v_mac_f32_e32 v10, v19, v0 ; 3E140113 v_mul_f32_e32 v0, v6, v6 ; 10000D06 v_mac_f32_e32 v0, v7, v7 ; 3E000F07 v_mac_f32_e32 v0, v10, v10 ; 3E00150A v_rsq_clamp_f32_e32 v0, v0 ; 7E005900 v_mul_f32_e32 v1, v24, v24 ; 10023118 v_mac_f32_e32 v1, v28, v28 ; 3E02391C v_mac_f32_e32 v1, v29, v29 ; 3E023B1D v_rsq_clamp_f32_e32 v1, v1 ; 7E025901 v_mul_f32_e32 v9, v0, v6 ; 10120D00 v_mul_f32_e32 v8, v0, v7 ; 10100F00 v_mul_f32_e32 v7, v0, v10 ; 100E1500 v_mul_f32_e32 v11, v1, v24 ; 10163101 v_mul_f32_e32 v10, v1, v28 ; 10143901 v_mul_f32_e32 v0, v11, v9 ; 1000130B v_mac_f32_e32 v0, v10, v8 ; 3E00110A v_mul_f32_e32 v12, v1, v29 ; 10183B01 v_mac_f32_e32 v0, v12, v7 ; 3E000F0C v_mul_f32_e32 v6, v9, v0 ; 100C0109 v_mac_f32_e32 v6, v9, v0 ; 3E0C0109 v_mul_f32_e32 v18, v8, v0 ; 10240108 v_mac_f32_e32 v18, v8, v0 ; 3E240108 v_mad_f32 v27, v24, v1, -v6 ; D282001B 841A0318 v_mad_f32 v28, v28, v1, -v18 ; D282001C 844A031C v_mov_b32_e32 v6, 0x6f800000 ; 7E0C02FF 6F800000 v_cmp_gt_f32_e64 vcc, |v23|, v6 ; D008016A 00020D17 v_mov_b32_e32 v6, 0x2f800000 ; 7E0C02FF 2F800000 v_cndmask_b32_e32 v6, 1.0, v6 ; 000C0CF2 v_mul_f32_e32 v18, v6, v23 ; 10242F06 v_rcp_f32_e32 v18, v18 ; 7E245512 v_mul_f32_e32 v19, v7, v0 ; 10260107 v_mac_f32_e32 v19, v7, v0 ; 3E260107 v_mad_f32 v29, v29, v1, -v19 ; D282001D 844E031D v_mul_f32_e32 v0, v18, v21 ; 10002B12 v_mul_f32_e32 v1, v18, v22 ; 10022D12 s_buffer_load_dword s1, s[8:11], 0x40 ; C2008940 s_buffer_load_dword s2, s[8:11], 0x41 ; C2010941 s_buffer_load_dword s3, s[8:11], 0x42 ; C2018942 v_mul_f32_e32 v18, v0, v6 ; 10240D00 v_mul_f32_e32 v19, v1, v6 ; 10260D01 s_buffer_load_dword s12, s[8:11], 0x27 ; C2060927 s_buffer_load_dword s13, s[8:11], 0x2b ; C206892B s_buffer_load_dword s32, s[8:11], 0x2c ; C210092C s_buffer_load_dword s33, s[8:11], 0x2d ; C210892D v_sub_f32_e64 v0, 1.0, s26 ; D2080000 000034F2 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s1, v0 ; 100C0001 v_mul_f32_e32 v1, s2, v0 ; 10020002 v_mul_f32_e32 v0, s3, v0 ; 10000003 v_mac_f32_e32 v6, s26, v13 ; 3E0C1A1A v_mov_b32_e32 v30, v27 ; 7E3C031B v_mac_f32_e32 v1, s26, v14 ; 3E021C1A v_mov_b32_e32 v31, v28 ; 7E3E031C v_mac_f32_e32 v0, s26, v15 ; 3E001E1A v_mov_b32_e32 v32, v29 ; 7E40031D v_cmp_lt_f32_e64 s[2:3], 0, s13 ; D0020002 00001A80 image_sample v[21:24], 15, 0, 0, 0, 0, 0, 0, 0, v[16:17], s[36:43], s[16:19] ; F0800F00 00891510 image_sample v[16:19], 15, 0, 0, 0, 0, 0, 0, 0, v[18:19], s[44:51], s[20:23] ; F0800F00 00AB1012 s_waitcnt vmcnt(0) ; BF8C0770 s_and_saveexec_b64 s[14:15], s[2:3] ; BE8E2402 s_xor_b64 s[14:15], exec, s[14:15] ; 898E0E7E s_cbranch_execz BB0_2 ; BF880000 s_buffer_load_dword s1, s[8:11], 0x20 ; C2008920 s_buffer_load_dword s2, s[8:11], 0x21 ; C2010921 s_buffer_load_dword s3, s[8:11], 0x22 ; C2018922 s_buffer_load_dword s13, s[8:11], 0x24 ; C2068924 s_buffer_load_dword s16, s[8:11], 0x25 ; C2080925 v_mul_f32_e32 v17, v27, v27 ; 1022371B v_mac_f32_e32 v17, v28, v28 ; 3E22391C v_mac_f32_e32 v17, v29, v29 ; 3E223B1D v_rsq_clamp_f32_e32 v17, v17 ; 7E225911 s_buffer_load_dword s17, s[8:11], 0x26 ; C2088926 s_buffer_load_dword s18, s[8:11], 0x28 ; C2090928 s_buffer_load_dword s19, s[8:11], 0x29 ; C2098929 s_buffer_load_dword s20, s[8:11], 0x2a ; C20A092A v_mul_f32_e32 v18, v17, v27 ; 10243711 v_mul_f32_e32 v19, v17, v28 ; 10263911 v_mul_f32_e32 v17, v17, v29 ; 10223B11 v_rcp_f32_e32 v21, v18 ; 7E2A5512 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v23, s1, v25 ; 082E3201 v_sub_f32_e32 v24, s2, v20 ; 08302802 v_rcp_f32_e32 v30, v19 ; 7E3C5513 v_mul_f32_e32 v23, v21, v23 ; 102E2F15 v_sub_f32_e32 v31, s13, v25 ; 083E320D v_mul_f32_e32 v21, v21, v31 ; 102A3F15 v_cmp_lt_f32_e32 vcc, 0, v18 ; 7C022480 v_cndmask_b32_e32 v21, v21, v23 ; 002A2F15 v_rcp_f32_e32 v23, v17 ; 7E2E5511 v_mul_f32_e32 v24, v30, v24 ; 1030311E v_sub_f32_e32 v31, s16, v20 ; 083E2810 v_mul_f32_e32 v30, v30, v31 ; 103C3F1E v_cmp_lt_f32_e32 vcc, 0, v19 ; 7C022680 v_cndmask_b32_e32 v24, v30, v24 ; 0030311E v_sub_f32_e32 v30, s3, v26 ; 083C3403 v_mul_f32_e32 v30, v23, v30 ; 103C3D17 v_sub_f32_e32 v31, s17, v26 ; 083E3411 v_mul_f32_e32 v23, v23, v31 ; 102E3F17 v_cmp_lt_f32_e32 vcc, 0, v17 ; 7C022280 v_cndmask_b32_e32 v23, v23, v30 ; 002E3D17 v_min3_f32 v21, v21, v24, v23 ; D2A20015 045E3115 v_mov_b32_e32 v23, s13 ; 7E2E020D v_add_f32_e32 v23, s1, v23 ; 062E2E01 v_mov_b32_e32 v24, s16 ; 7E300210 v_add_f32_e32 v24, s2, v24 ; 06303002 v_mov_b32_e32 v30, s17 ; 7E3C0211 v_add_f32_e32 v32, s3, v30 ; 06403C03 v_mad_f32 v30, 0.5, v23, -s18 ; D282001E 804A2EF0 v_add_f32_e32 v30, v25, v30 ; 063C3D19 v_mac_f32_e32 v30, v21, v18 ; 3E3C2515 v_mad_f32 v18, 0.5, v24, -s19 ; D2820012 804E30F0 v_add_f32_e32 v18, v20, v18 ; 06242514 v_mac_f32_e32 v18, v21, v19 ; 3E242715 v_mad_f32 v19, 0.5, v32, -s20 ; D2820013 805240F0 v_add_f32_e32 v19, v26, v19 ; 0626271A v_mac_f32_e32 v19, v21, v17 ; 3E262315 v_mad_f32 v30, 0.5, -v23, v30 ; D282001E 447A2EF0 v_mad_f32 v31, 0.5, -v24, v18 ; D282001F 444A30F0 v_mad_f32 v32, 0.5, -v32, v19 ; D2820020 444E40F0 s_or_b64 exec, exec, s[14:15] ; 88FE0E7E s_buffer_load_dword s28, s[8:11], 0x17 ; C20E0917 s_buffer_load_dword s29, s[8:11], 0x43 ; C20E8943 s_buffer_load_dword s27, s[8:11], 0x44 ; C20D8944 s_buffer_load_dword s19, s[8:11], 0x45 ; C2098945 s_buffer_load_dword s16, s[8:11], 0x46 ; C2080946 s_buffer_load_dword s2, s[8:11], 0x0 ; C2010900 s_buffer_load_dword s3, s[8:11], 0x1 ; C2018901 s_buffer_load_dword s1, s[8:11], 0x2 ; C2008902 s_buffer_load_dword s13, s[8:11], 0x4 ; C2068904 s_buffer_load_dword s14, s[8:11], 0x5 ; C2070905 s_buffer_load_dword s15, s[8:11], 0x6 ; C2078906 s_buffer_load_dword s17, s[8:11], 0x7 ; C2088907 s_buffer_load_dword s18, s[8:11], 0x8 ; C2090908 s_buffer_load_dword s20, s[8:11], 0x9 ; C20A0909 s_buffer_load_dword s21, s[8:11], 0xa ; C20A890A s_buffer_load_dword s22, s[8:11], 0xb ; C20B090B s_buffer_load_dword s23, s[8:11], 0xc ; C20B890C s_buffer_load_dword s24, s[8:11], 0xd ; C20C090D s_buffer_load_dword s25, s[8:11], 0xe ; C20C890E v_sub_f32_e64 v17, 1.0, s0 ; D2080011 000000F2 v_log_f32_e32 v17, v17 ; 7E224F11 v_mul_legacy_f32_e32 v17, 0x3f400000, v17 ; 0E2222FF 3F400000 v_exp_f32_e32 v17, v17 ; 7E224B11 v_mul_f32_e32 v33, 0x40e00000, v17 ; 104222FF 40E00000 v_cubeid_f32 v37, v30, v31, v32 ; D2880025 04823F1E v_cubema_f32 v36, v30, v31, v32 ; D28E0024 04823F1E s_load_dwordx4 s[36:39], s[4:5], 0x0 ; C0920500 s_load_dwordx8 s[40:47], s[6:7], 0x0 ; C0D40700 v_cubesc_f32 v35, v30, v31, v32 ; D28A0023 04823F1E v_cubetc_f32 v34, v30, v31, v32 ; D28C0022 04823F1E v_rcp_f32_e64 v17, |v36| ; D3540111 00000124 v_mov_b32_e32 v30, 0x3fc00000 ; 7E3C02FF 3FC00000 v_mad_f32 v31, v17, v34, v30 ; D282001F 047A4511 v_mac_f32_e32 v30, v17, v35 ; 3E3C4711 v_mov_b32_e32 v32, v37 ; 7E400325 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[30:33], 15, 0, 0, 0, 0, 0, 0, 0, v[30:33], s[40:47], s[36:39] ; F0900F00 012A1E1E s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v17, v33 ; 7E224F21 s_buffer_load_dword s30, s[8:11], 0xf ; C20F090F s_buffer_load_dword s31, s[8:11], 0x6c ; C20F896C v_mul_legacy_f32_e32 v17, s33, v17 ; 0E222221 v_exp_f32_e32 v17, v17 ; 7E224B11 v_mul_f32_e32 v17, s32, v17 ; 10222220 v_mul_f32_e32 v19, v30, v17 ; 1026231E v_mul_f32_e32 v18, v31, v17 ; 1024231F v_mul_f32_e32 v17, v32, v17 ; 10222320 v_mov_b32_e32 v21, s26 ; 7E2A021A v_mov_b32_e32 v23, 0x3f7fff58 ; 7E2E02FF 3F7FFF58 v_cmp_lt_f32_e32 vcc, s12, v23 ; 7C022E0C s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[32:33], vcc ; BEA0246A s_xor_b64 s[32:33], exec, s[32:33] ; 89A0207E s_cbranch_execz BB0_6 ; BF880000 s_buffer_load_dword s35, s[8:11], 0x3b ; C211893B s_buffer_load_dword s26, s[8:11], 0x3c ; C20D093C s_buffer_load_dword s34, s[8:11], 0x3d ; C211093D s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_lt_f32_e64 s[36:37], 0, s35 ; D0020024 00004680 s_and_saveexec_b64 s[36:37], s[36:37] ; BEA42424 s_xor_b64 s[36:37], exec, s[36:37] ; 89A4247E s_cbranch_execz BB0_7 ; BF880000 s_buffer_load_dword s35, s[8:11], 0x36 ; C2118936 s_buffer_load_dword s38, s[8:11], 0x38 ; C2130938 s_buffer_load_dword s39, s[8:11], 0x39 ; C2138939 s_buffer_load_dword s40, s[8:11], 0x3a ; C214093A s_buffer_load_dword s41, s[8:11], 0x30 ; C2148930 s_buffer_load_dword s42, s[8:11], 0x31 ; C2150931 s_buffer_load_dword s43, s[8:11], 0x32 ; C2158932 s_buffer_load_dword s44, s[8:11], 0x34 ; C2160934 s_buffer_load_dword s45, s[8:11], 0x35 ; C2168935 v_mul_f32_e32 v23, v27, v27 ; 102E371B v_mac_f32_e32 v23, v28, v28 ; 3E2E391C v_mac_f32_e32 v23, v29, v29 ; 3E2E3B1D v_rsq_clamp_f32_e32 v23, v23 ; 7E2E5917 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v24, s35, v26 ; 08303423 v_mov_b32_e32 v30, s35 ; 7E3C0223 v_sub_f32_e32 v31, s41, v25 ; 083E3229 v_sub_f32_e32 v32, s42, v20 ; 0840282A v_add_f32_e32 v30, s43, v30 ; 063C3C2B v_sub_f32_e32 v33, s43, v26 ; 0842342B v_mad_f32 v34, 0.5, v30, -s40 ; D2820022 80A23CF0 v_add_f32_e32 v26, v26, v34 ; 0634451A v_mul_f32_e32 v27, v23, v27 ; 10363717 v_mul_f32_e32 v28, v23, v28 ; 10383917 v_mul_f32_e32 v23, v23, v29 ; 102E3B17 v_rcp_f32_e32 v29, v27 ; 7E3A551B v_rcp_f32_e32 v34, v28 ; 7E44551C v_rcp_f32_e32 v35, v23 ; 7E465517 v_sub_f32_e32 v36, s44, v25 ; 0848322C v_mov_b32_e32 v37, s44 ; 7E4A022C v_add_f32_e32 v37, s41, v37 ; 064A4A29 v_mul_f32_e32 v31, v29, v31 ; 103E3F1D v_mul_f32_e32 v29, v29, v36 ; 103A491D v_mul_f32_e32 v32, v34, v32 ; 10404122 v_mul_f32_e32 v33, v35, v33 ; 10424323 v_mul_f32_e32 v24, v35, v24 ; 10303123 v_mad_f32 v35, 0.5, v37, -s38 ; D2820023 809A4AF0 v_add_f32_e32 v25, v25, v35 ; 06324719 v_sub_f32_e32 v35, s45, v20 ; 0846282D v_mov_b32_e32 v36, s45 ; 7E48022D v_mul_f32_e32 v34, v34, v35 ; 10444722 v_add_f32_e32 v35, s42, v36 ; 0646482A v_cmp_lt_f32_e32 vcc, 0, v27 ; 7C023680 v_cndmask_b32_e32 v29, v29, v31 ; 003A3F1D v_cmp_lt_f32_e32 vcc, 0, v28 ; 7C023880 v_cndmask_b32_e32 v31, v34, v32 ; 003E4122 v_cmp_lt_f32_e32 vcc, 0, v23 ; 7C022E80 v_cndmask_b32_e32 v24, v24, v33 ; 00304318 v_min3_f32 v24, v29, v31, v24 ; D2A20018 04623F1D v_mad_f32 v29, 0.5, v35, -s39 ; D282001D 809E46F0 v_add_f32_e32 v20, v20, v29 ; 06283B14 v_mac_f32_e32 v25, v24, v27 ; 3E323718 v_mac_f32_e32 v20, v24, v28 ; 3E283918 v_mac_f32_e32 v26, v24, v23 ; 3E342F18 v_mad_f32 v27, 0.5, -v37, v25 ; D282001B 44664AF0 v_mad_f32 v28, 0.5, -v35, v20 ; D282001C 445246F0 v_mad_f32 v29, 0.5, -v30, v26 ; D282001D 446A3CF0 s_or_b64 exec, exec, s[36:37] ; 88FE247E v_sub_f32_e64 v20, 1.0, s0 ; D2080014 000000F2 v_log_f32_e32 v20, v20 ; 7E284F14 s_load_dwordx4 s[36:39], s[4:5], 0x4 ; C0920504 v_mul_legacy_f32_e32 v20, 0x3f400000, v20 ; 0E2828FF 3F400000 v_exp_f32_e32 v20, v20 ; 7E284B14 v_mul_f32_e32 v30, 0x40e00000, v20 ; 103C28FF 40E00000 v_cubeid_f32 v26, v27, v28, v29 ; D288001A 0476391B v_cubema_f32 v25, v27, v28, v29 ; D28E0019 0476391B s_load_dwordx8 s[40:47], s[6:7], 0x8 ; C0D40708 v_cubesc_f32 v24, v27, v28, v29 ; D28A0018 0476391B v_cubetc_f32 v23, v27, v28, v29 ; D28C0017 0476391B v_rcp_f32_e64 v20, |v25| ; D3540114 00000119 v_mov_b32_e32 v27, 0x3fc00000 ; 7E3602FF 3FC00000 v_mad_f32 v28, v20, v23, v27 ; D282001C 046E2F14 v_mac_f32_e32 v27, v20, v24 ; 3E363114 v_mov_b32_e32 v29, v26 ; 7E3A031A s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[23:26], 15, 0, 0, 0, 0, 0, 0, 0, v[27:30], s[40:47], s[36:39] ; F0900F00 012A171B s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v20, v26 ; 7E284F1A v_sub_f32_e64 v26, 1.0, s12 ; D208001A 000018F2 v_mul_legacy_f32_e32 v20, s34, v20 ; 0E282822 v_exp_f32_e32 v20, v20 ; 7E284B14 v_mul_f32_e32 v20, s26, v20 ; 1028281A v_mul_f32_e32 v23, v23, v20 ; 102E2917 v_mul_f32_e32 v24, v24, v20 ; 10302918 v_mul_f32_e32 v20, v25, v20 ; 10282919 v_mul_f32_e32 v23, v23, v26 ; 102E3517 v_mul_f32_e32 v24, v24, v26 ; 10303518 v_mul_f32_e32 v20, v20, v26 ; 10283514 v_mac_f32_e32 v23, s12, v19 ; 3E2E260C v_mac_f32_e32 v24, s12, v18 ; 3E30240C v_mac_f32_e32 v20, s12, v17 ; 3E28220C v_mov_b32_e32 v17, v20 ; 7E220314 v_mov_b32_e32 v18, v24 ; 7E240318 v_mov_b32_e32 v19, v23 ; 7E260317 s_or_b64 exec, exec, s[32:33] ; 88FE207E v_mad_f32 v24, -v21, s29, s29 ; D2820018 20743B15 v_mov_b32_e32 v20, s28 ; 7E28021C v_mul_f32_e32 v21, v24, v13 ; 102A1B18 v_mul_f32_e32 v14, v24, v14 ; 101C1D18 v_mul_f32_e32 v13, v24, v15 ; 101A1F18 v_mul_f32_e32 v15, s27, v16 ; 101E201B v_sub_f32_e64 v25, 1.0, s31 ; D2080019 00003EF2 v_mac_f32_e32 v25, s31, v22 ; 3E322C1F v_mul_f32_e32 v22, s19, v16 ; 102C2013 v_mul_f32_e32 v16, s16, v16 ; 10202010 s_buffer_load_dword s6, s[8:11], 0x10 ; C2030910 s_buffer_load_dword s5, s[8:11], 0x11 ; C2028911 s_buffer_load_dword s4, s[8:11], 0x12 ; C2020912 s_buffer_load_dword s16, s[8:11], 0x16 ; C2080916 s_buffer_load_dword s7, s[8:11], 0x48 ; C2038948 s_buffer_load_dword s12, s[8:11], 0x49 ; C2060949 s_buffer_load_dword s8, s[8:11], 0x4b ; C204094B v_mul_f32_e32 v23, s14, v8 ; 102E100E v_mac_f32_e32 v23, s13, v9 ; 3E2E120D v_mac_f32_e32 v23, s15, v7 ; 3E2E0E0F v_add_f32_e32 v23, s17, v23 ; 062E2E11 v_mul_f32_e32 v26, s20, v8 ; 10341014 v_mac_f32_e32 v26, s18, v9 ; 3E341212 v_mac_f32_e32 v26, s21, v7 ; 3E340E15 v_add_f32_e32 v26, s22, v26 ; 06343416 v_mul_f32_e32 v27, s24, v8 ; 10361018 v_mac_f32_e32 v27, s23, v9 ; 3E361217 v_mac_f32_e32 v27, s25, v7 ; 3E360E19 v_add_f32_e32 v27, s30, v27 ; 0636361E v_add_f32_e32 v2, v23, v2 ; 06040517 v_add_f32_e32 v3, v26, v3 ; 0606071A v_add_f32_e32 v26, v27, v4 ; 0634091B v_mul_f32_e32 v4, s2, v9 ; 10081202 v_mac_f32_e32 v4, s3, v8 ; 3E081003 v_mac_f32_e32 v4, s1, v7 ; 3E080E01 v_max_f32_e32 v23, 0, v4 ; 202E0880 v_mul_f32_e32 v4, v25, v2 ; 10080519 v_mul_f32_e32 v2, v25, v3 ; 10040719 v_mul_f32_e32 v3, v25, v26 ; 10063519 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v20, s16, v5 ; 3E280A10 v_mul_f32_e32 v5, v25, v19 ; 100A2719 v_mul_f32_e32 v18, v25, v18 ; 10242519 v_mul_f32_e32 v17, v25, v17 ; 10222319 v_sub_f32_e32 v19, 1.0, v24 ; 082630F2 v_add_f32_e32 v19, s0, v19 ; 06262600 v_add_f32_e64 v19, 0, v19 clamp ; D2060813 00022680 v_sub_f32_e32 v24, s2, v11 ; 08301602 v_sub_f32_e32 v25, s3, v10 ; 08321403 v_mul_f32_e32 v26, v24, v24 ; 10343118 v_mac_f32_e32 v26, v25, v25 ; 3E343319 v_sub_f32_e32 v27, s1, v12 ; 08361801 v_mac_f32_e32 v26, v27, v27 ; 3E34371B v_rsq_clamp_f32_e32 v26, v26 ; 7E34591A v_mul_f32_e32 v24, v26, v24 ; 1030311A v_mul_f32_e32 v25, v26, v25 ; 1032331A v_mul_f32_e32 v26, v26, v27 ; 1034371A v_mul_f32_e32 v11, v11, v9 ; 1016130B v_mad_f32 v10, -v10, v8, -v11 ; D282000A A42E110A v_mad_f32 v10, -v12, v7, v10 ; D282000A 242A0F0C v_mul_f32_e32 v9, v24, v9 ; 10121318 v_mac_f32_e32 v9, v25, v8 ; 3E121119 v_mul_f32_e32 v8, s2, v24 ; 10103002 v_mac_f32_e32 v8, s3, v25 ; 3E103203 v_mac_f32_e32 v9, v26, v7 ; 3E120F1A v_mac_f32_e32 v8, s1, v26 ; 3E103401 v_max_f32_e32 v7, 0, v8 ; 200E1080 v_sub_f32_e32 v8, 1.0, v7 ; 08100EF2 v_mul_f32_e32 v11, v8, v8 ; 10161108 v_mul_f32_e32 v8, v8, v11 ; 10101708 v_mul_f32_e32 v8, v8, v11 ; 10101708 v_max_f32_e32 v10, 0, v10 ; 20141480 v_sub_f32_e32 v11, 1.0, v10 ; 081614F2 v_mul_f32_e32 v12, v11, v11 ; 1018170B v_mul_f32_e32 v24, v11, v12 ; 1030190B v_mad_f32 v25, -v12, v24, 1.0 ; D2820019 23CA310C v_mul_f32_e32 v26, v6, v25 ; 10343306 v_sub_f32_e32 v27, 1.0, v6 ; 08360CF2 v_mac_f32_e32 v6, v8, v27 ; 3E0C3708 v_mul_f32_e32 v27, v1, v25 ; 10363301 v_sub_f32_e32 v28, 1.0, v1 ; 083802F2 v_mac_f32_e32 v1, v8, v28 ; 3E023908 v_mul_f32_e32 v25, v0, v25 ; 10323300 v_sub_f32_e32 v28, 1.0, v0 ; 083800F2 v_mac_f32_e32 v0, v8, v28 ; 3E003908 v_sub_f32_e64 v8, 1.0, s0 ; D2080008 000000F2 v_sub_f32_e32 v28, 1.0, v8 ; 083810F2 v_mov_b32_e32 v29, 0x3cf5c28f ; 7E3A02FF 3CF5C28F v_madmk_f32_e32 v28, v28, v29, 0x3f77ced9 ; 40383B1C 3F77CED9 v_add_f32_e32 v29, v7, v7 ; 063A0F07 v_mul_f32_e32 v7, v8, v7 ; 100E0F08 v_mad_f32 v7, v29, v7, 0.5 ; D2820007 03C20F1D v_mul_f32_e32 v12, v24, v12 ; 10181918 v_mac_f32_e32 v26, v19, v12 ; 3E341913 v_mac_f32_e32 v27, v19, v12 ; 3E361913 v_mac_f32_e32 v25, v19, v12 ; 3E321913 v_mul_f32_e32 v8, v8, v8 ; 10101108 v_log_f32_e32 v19, v28 ; 7E264F1C v_mul_f32_e32 v8, s8, v8 ; 10101008 v_mul_f32_e32 v11, v8, v11 ; 10161708 v_mac_f32_e32 v11, 1.0, v10 ; 3E1614F2 v_rcp_f32_e32 v10, v19 ; 7E145513 v_sub_f32_e32 v19, 1.0, v23 ; 08262EF2 v_mul_f32_e32 v8, v8, v19 ; 10102708 v_mac_f32_e32 v8, 1.0, v23 ; 3E102EF2 v_max_f32_e32 v9, 0, v9 ; 20121280 v_log_f32_e32 v9, v9 ; 7E124F09 v_madak_f32_e32 v8, v8, v11, 0x38d1b717 ; 42101708 38D1B717 v_mul_f32_e32 v10, 0x41200000, v10 ; 101414FF 41200000 v_mul_f32_e32 v11, v10, v10 ; 1016150A v_mul_legacy_f32_e32 v9, v11, v9 ; 0E12130B v_rcp_f32_e32 v8, v8 ; 7E105508 v_mad_f32 v10, v10, v10, 1.0 ; D282000A 03CA150A v_mul_f32_e32 v10, s12, v10 ; 1014140C v_exp_f32_e32 v9, v9 ; 7E124B09 v_mul_f32_e32 v9, v10, v9 ; 1012130A v_mul_f32_e32 v8, v9, v8 ; 10101109 v_mul_f32_e32 v9, v19, v19 ; 10122713 v_mul_f32_e32 v10, v19, v9 ; 10141313 v_mul_f32_e32 v9, v10, v9 ; 1012130A v_add_f32_e32 v7, -1.0, v7 ; 060E0EF3 v_mad_f32 v9, v7, v9, 1.0 ; D2820009 03CA1307 v_mad_f32 v7, v7, v12, 1.0 ; D2820007 03CA1907 v_mul_f32_e32 v7, v7, v9 ; 100E1307 v_mul_f32_e32 v8, v23, v8 ; 10101117 v_mul_f32_e32 v8, s7, v8 ; 10101007 v_mul_f32_e32 v7, v23, v7 ; 100E0F17 v_mac_f32_e32 v4, v7, v15 ; 3E081F07 v_mul_f32_e32 v4, v4, v21 ; 10082B04 v_max_f32_e32 v8, 0, v8 ; 20101080 v_mul_f32_e32 v9, v15, v8 ; 1012110F v_mac_f32_e32 v4, v6, v9 ; 3E081306 v_mac_f32_e32 v2, v7, v22 ; 3E042D07 v_mac_f32_e32 v3, v7, v16 ; 3E062107 v_mul_f32_e32 v6, v22, v8 ; 100C1116 v_mul_f32_e32 v7, v16, v8 ; 100E1110 v_mul_f32_e32 v2, v2, v14 ; 10041D02 v_mul_f32_e32 v3, v3, v13 ; 10061B03 v_mac_f32_e32 v2, v1, v6 ; 3E040D01 v_mac_f32_e32 v3, v0, v7 ; 3E060F00 v_mac_f32_e32 v4, v26, v5 ; 3E080B1A v_mac_f32_e32 v2, v27, v18 ; 3E04251B v_mac_f32_e32 v3, v25, v17 ; 3E062319 v_add_f32_e64 v0, 0, v20 clamp ; D2060800 00022880 v_sub_f32_e32 v1, 1.0, v0 ; 080200F2 v_mul_f32_e32 v5, s6, v1 ; 100A0206 v_mac_f32_e32 v5, v4, v0 ; 3E0A0104 v_mul_f32_e32 v4, s5, v1 ; 10080205 v_mac_f32_e32 v4, v2, v0 ; 3E080102 v_mul_f32_e32 v1, s4, v1 ; 10020204 v_mac_f32_e32 v1, v3, v0 ; 3E020103 v_cvt_pkrtz_f16_f32_e32 v0, v5, v4 ; 5E000905 v_cvt_pkrtz_f16_f32_e64 v1, v1, 1.0 ; D25E0001 0001E501 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 64 VGPRS: 40 Code Size: 2420 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL OUT[5], GENERIC[4] DCL OUT[6], GENERIC[5] DCL CONST[0..20] DCL TEMP[0..8], LOCAL IMM[0] FLT32 { 0.0000, 0.5000, 0.0000, 0.0000} 0: MUL TEMP[0], CONST[6], IN[0].xxxx 1: MAD TEMP[0], CONST[7], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[8], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0].xyz, CONST[9], IN[0].wwww, TEMP[0] 4: MUL TEMP[1], CONST[17], IN[0].xxxx 5: MAD TEMP[1], CONST[18], IN[0].yyyy, TEMP[1] 6: MAD TEMP[1], CONST[19], IN[0].zzzz, TEMP[1] 7: MAD TEMP[1], CONST[20], IN[0].wwww, TEMP[1] 8: MAD TEMP[2].xy, IN[2].xyyy, CONST[14].xyyy, CONST[14].zwww 9: FSEQ TEMP[3].x, CONST[16].xxxx, IMM[0].xxxx 10: UIF TEMP[3].xxxx :0 11: MOV TEMP[3].xy, IN[2].xyxx 12: ELSE :0 13: MOV TEMP[3].xy, IN[3].xyxx 14: ENDIF 15: MAD TEMP[3].xy, TEMP[3].xyyy, CONST[15].xyyy, CONST[15].zwww 16: MOV TEMP[2].zw, TEMP[3].yyxy 17: MOV TEMP[3].x, CONST[10].xxxx 18: MOV TEMP[3].y, CONST[11].xxxx 19: MOV TEMP[3].z, CONST[12].xxxx 20: MOV TEMP[4].x, CONST[10].yyyy 21: MOV TEMP[4].y, CONST[11].yyyy 22: MOV TEMP[4].z, CONST[12].yyyy 23: MOV TEMP[5].x, CONST[10].zzzz 24: MOV TEMP[5].y, CONST[11].zzzz 25: MOV TEMP[5].z, CONST[12].zzzz 26: MUL TEMP[3].xyz, TEMP[3].xyzz, IN[1].xxxx 27: MAD TEMP[3].xyz, TEMP[4].xyzz, IN[1].yyyy, TEMP[3].xyzz 28: MAD TEMP[3].xyz, TEMP[5].xyzz, IN[1].zzzz, TEMP[3].xyzz 29: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz 30: RSQ TEMP[4].x, TEMP[4].xxxx 31: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx 32: MOV TEMP[4].xyz, TEMP[3].xyzx 33: MUL TEMP[5].xyw, TEMP[1], IMM[0].yyyy 34: MOV TEMP[6].x, TEMP[5].xxxx 35: MUL TEMP[7].x, TEMP[5].yyyy, CONST[1].xxxx 36: MOV TEMP[6].y, TEMP[7].xxxx 37: ADD TEMP[5].xy, TEMP[6].xyyy, TEMP[5].wwww 38: MOV TEMP[5].zw, TEMP[1].wwzw 39: MUL TEMP[6], TEMP[3].xyzz, TEMP[3].yzzx 40: DP4 TEMP[7].x, CONST[2], TEMP[6] 41: DP4 TEMP[8].x, CONST[3], TEMP[6] 42: MOV TEMP[7].y, TEMP[8].xxxx 43: DP4 TEMP[6].x, CONST[4], TEMP[6] 44: MOV TEMP[7].z, TEMP[6].xxxx 45: MUL TEMP[6].x, TEMP[3].yyyy, TEMP[3].yyyy 46: MAD TEMP[3].x, TEMP[3].xxxx, TEMP[3].xxxx, -TEMP[6].xxxx 47: MAD TEMP[3].xyz, CONST[5].xyzz, TEMP[3].xxxx, TEMP[7].xyzz 48: ADD TEMP[6].xyz, TEMP[0].xyzz, -CONST[0].xyzz 49: MOV TEMP[6].yzw, TEMP[6].yxyz 50: MOV TEMP[6].x, TEMP[1].zzzz 51: MOV TEMP[0].xyz, TEMP[0].xyzx 52: MOV OUT[6], TEMP[0] 53: MOV OUT[1], TEMP[2] 54: MOV OUT[2], TEMP[4] 55: MOV OUT[3], TEMP[3] 56: MOV OUT[4], TEMP[5] 57: MOV OUT[0], TEMP[1] 58: MOV OUT[5], TEMP[6] 59: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248) %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 252) %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256) %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272) %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276) %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280) %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284) %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288) %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292) %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296) %72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300) %73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304) %74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308) %75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312) %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316) %77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320) %78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 324) %79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 328) %80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 332) %81 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %82 = load <16 x i8>, <16 x i8> addrspace(2)* %81, align 16, !tbaa !0 %83 = add i32 %5, %7 %84 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %82, i32 0, i32 %83) %85 = extractelement <4 x float> %84, i32 0 %86 = extractelement <4 x float> %84, i32 1 %87 = extractelement <4 x float> %84, i32 2 %88 = extractelement <4 x float> %84, i32 3 %89 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %90 = load <16 x i8>, <16 x i8> addrspace(2)* %89, align 16, !tbaa !0 %91 = add i32 %5, %7 %92 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %90, i32 0, i32 %91) %93 = extractelement <4 x float> %92, i32 0 %94 = extractelement <4 x float> %92, i32 1 %95 = extractelement <4 x float> %92, i32 2 %96 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %97 = load <16 x i8>, <16 x i8> addrspace(2)* %96, align 16, !tbaa !0 %98 = add i32 %5, %7 %99 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %97, i32 0, i32 %98) %100 = extractelement <4 x float> %99, i32 0 %101 = extractelement <4 x float> %99, i32 1 %102 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %103 = load <16 x i8>, <16 x i8> addrspace(2)* %102, align 16, !tbaa !0 %104 = add i32 %5, %7 %105 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %103, i32 0, i32 %104) %106 = extractelement <4 x float> %105, i32 0 %107 = extractelement <4 x float> %105, i32 1 %108 = fmul float %32, %85 %109 = fmul float %33, %85 %110 = fmul float %34, %85 %111 = fmul float %35, %85 %112 = fmul float %36, %86 %113 = fadd float %112, %108 %114 = fmul float %37, %86 %115 = fadd float %114, %109 %116 = fmul float %38, %86 %117 = fadd float %116, %110 %118 = fmul float %39, %86 %119 = fadd float %118, %111 %120 = fmul float %40, %87 %121 = fadd float %120, %113 %122 = fmul float %41, %87 %123 = fadd float %122, %115 %124 = fmul float %42, %87 %125 = fadd float %124, %117 %126 = fmul float %43, %87 %127 = fadd float %126, %119 %128 = fmul float %44, %88 %129 = fadd float %128, %121 %130 = fmul float %45, %88 %131 = fadd float %130, %123 %132 = fmul float %46, %88 %133 = fadd float %132, %125 %134 = fmul float %65, %85 %135 = fmul float %66, %85 %136 = fmul float %67, %85 %137 = fmul float %68, %85 %138 = fmul float %69, %86 %139 = fadd float %138, %134 %140 = fmul float %70, %86 %141 = fadd float %140, %135 %142 = fmul float %71, %86 %143 = fadd float %142, %136 %144 = fmul float %72, %86 %145 = fadd float %144, %137 %146 = fmul float %73, %87 %147 = fadd float %146, %139 %148 = fmul float %74, %87 %149 = fadd float %148, %141 %150 = fmul float %75, %87 %151 = fadd float %150, %143 %152 = fmul float %76, %87 %153 = fadd float %152, %145 %154 = fmul float %77, %88 %155 = fadd float %154, %147 %156 = fmul float %78, %88 %157 = fadd float %156, %149 %158 = fmul float %79, %88 %159 = fadd float %158, %151 %160 = fmul float %80, %88 %161 = fadd float %160, %153 %162 = fmul float %100, %56 %163 = fadd float %162, %58 %164 = fmul float %101, %57 %165 = fadd float %164, %59 %166 = fcmp oeq float %64, 0.000000e+00 %. = select i1 %166, float %100, float %106 %.36 = select i1 %166, float %101, float %107 %167 = fmul float %., %60 %168 = fadd float %167, %62 %169 = fmul float %.36, %61 %170 = fadd float %169, %63 %171 = fmul float %47, %93 %172 = fmul float %50, %93 %173 = fmul float %53, %93 %174 = fmul float %48, %94 %175 = fadd float %174, %171 %176 = fmul float %51, %94 %177 = fadd float %176, %172 %178 = fmul float %54, %94 %179 = fadd float %178, %173 %180 = fmul float %49, %95 %181 = fadd float %180, %175 %182 = fmul float %52, %95 %183 = fadd float %182, %177 %184 = fmul float %55, %95 %185 = fadd float %184, %179 %186 = fmul float %181, %181 %187 = fmul float %183, %183 %188 = fadd float %187, %186 %189 = fmul float %185, %185 %190 = fadd float %188, %189 %191 = call float @llvm.AMDGPU.rsq.clamped.f32(float %190) %192 = fmul float %181, %191 %193 = fmul float %183, %191 %194 = fmul float %185, %191 %195 = fmul float %155, 5.000000e-01 %196 = fmul float %157, 5.000000e-01 %197 = fmul float %161, 5.000000e-01 %198 = fmul float %196, %16 %199 = fadd float %195, %197 %200 = fadd float %198, %197 %201 = fmul float %192, %193 %202 = fmul float %193, %194 %203 = fmul float %194, %194 %204 = fmul float %194, %192 %205 = fmul float %17, %201 %206 = fmul float %18, %202 %207 = fadd float %205, %206 %208 = fmul float %19, %203 %209 = fadd float %207, %208 %210 = fmul float %20, %204 %211 = fadd float %209, %210 %212 = fmul float %21, %201 %213 = fmul float %22, %202 %214 = fadd float %212, %213 %215 = fmul float %23, %203 %216 = fadd float %214, %215 %217 = fmul float %24, %204 %218 = fadd float %216, %217 %219 = fmul float %25, %201 %220 = fmul float %26, %202 %221 = fadd float %219, %220 %222 = fmul float %27, %203 %223 = fadd float %221, %222 %224 = fmul float %28, %204 %225 = fadd float %223, %224 %226 = fmul float %193, %193 %227 = fmul float %192, %192 %228 = fsub float %227, %226 %229 = fmul float %29, %228 %230 = fadd float %229, %211 %231 = fmul float %30, %228 %232 = fadd float %231, %218 %233 = fmul float %31, %228 %234 = fadd float %233, %225 %235 = fsub float %129, %13 %236 = fsub float %131, %14 %237 = fsub float %133, %15 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %163, float %165, float %168, float %170) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %192, float %193, float %194, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %230, float %232, float %234, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %199, float %200, float %159, float %161) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %159, float %235, float %236, float %237) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %129, float %131, float %133, float %127) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %155, float %157, float %159, float %161) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0 ; 7E020280 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[20:23], s[2:3], 0x0 ; C08A0300 s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 s_load_dwordx4 s[12:15], s[8:9], 0x8 ; C0860908 s_load_dwordx4 s[8:11], s[8:9], 0xc ; C084090C s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s19, s[20:23], 0x23 ; C2099523 buffer_load_format_xyzw v[2:5], v0, s[0:3], 0 idxen ; E00C2000 80000200 buffer_load_format_xyzw v[6:9], v0, s[4:7], 0 idxen ; E00C2000 80010600 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[9:12], v0, s[12:15], 0 idxen ; E00C2000 80030900 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[11:14], v0, s[8:11], 0 idxen ; E00C2000 80020B00 s_buffer_load_dword s24, s[20:23], 0x24 ; C20C1524 s_buffer_load_dword s25, s[20:23], 0x25 ; C20C9525 s_buffer_load_dword s26, s[20:23], 0x26 ; C20D1526 s_buffer_load_dword s27, s[20:23], 0x28 ; C20D9528 s_buffer_load_dword s28, s[20:23], 0x29 ; C20E1529 s_buffer_load_dword s29, s[20:23], 0x2a ; C20E952A s_buffer_load_dword s30, s[20:23], 0x2c ; C20F152C s_buffer_load_dword s31, s[20:23], 0x2d ; C20F952D s_buffer_load_dword s32, s[20:23], 0x2e ; C210152E s_buffer_load_dword s33, s[20:23], 0x30 ; C2109530 s_buffer_load_dword s34, s[20:23], 0x31 ; C2111531 s_buffer_load_dword s35, s[20:23], 0x32 ; C2119532 s_buffer_load_dword s36, s[20:23], 0x38 ; C2121538 s_buffer_load_dword s37, s[20:23], 0x39 ; C2129539 s_buffer_load_dword s11, s[20:23], 0x9 ; C2059509 s_buffer_load_dword s5, s[20:23], 0xa ; C202950A s_buffer_load_dword s3, s[20:23], 0xb ; C201950B s_buffer_load_dword s9, s[20:23], 0xc ; C204950C s_buffer_load_dword s12, s[20:23], 0xd ; C206150D s_buffer_load_dword s7, s[20:23], 0xe ; C203950E s_buffer_load_dword s4, s[20:23], 0xf ; C202150F s_buffer_load_dword s10, s[20:23], 0x10 ; C2051510 s_buffer_load_dword s13, s[20:23], 0x11 ; C2069511 s_buffer_load_dword s8, s[20:23], 0x12 ; C2041512 s_buffer_load_dword s0, s[20:23], 0x3f ; C200153F s_buffer_load_dword s1, s[20:23], 0x40 ; C2009540 s_buffer_load_dword s38, s[20:23], 0x44 ; C2131544 s_buffer_load_dword s39, s[20:23], 0x45 ; C2139545 s_buffer_load_dword s40, s[20:23], 0x46 ; C2141546 s_buffer_load_dword s41, s[20:23], 0x47 ; C2149547 s_buffer_load_dword s42, s[20:23], 0x48 ; C2151548 s_buffer_load_dword s43, s[20:23], 0x49 ; C2159549 s_buffer_load_dword s44, s[20:23], 0x4a ; C216154A s_buffer_load_dword s45, s[20:23], 0x4b ; C216954B s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v0, s0 ; 7E000200 s_buffer_load_dword s0, s[20:23], 0x0 ; C2001500 v_cmp_eq_f32_e64 vcc, 0, s1 ; D004006A 00000280 s_buffer_load_dword s1, s[20:23], 0x1 ; C2009501 s_buffer_load_dword s2, s[20:23], 0x2 ; C2011502 s_buffer_load_dword s6, s[20:23], 0x4 ; C2031504 s_buffer_load_dword s14, s[20:23], 0x8 ; C2071508 s_buffer_load_dword s46, s[20:23], 0x3a ; C217153A s_buffer_load_dword s47, s[20:23], 0x3b ; C217953B s_buffer_load_dword s48, s[20:23], 0x3c ; C218153C s_buffer_load_dword s49, s[20:23], 0x3d ; C218953D s_buffer_load_dword s50, s[20:23], 0x3e ; C219153E s_buffer_load_dword s18, s[20:23], 0x13 ; C2091513 s_buffer_load_dword s15, s[20:23], 0x14 ; C2079514 s_buffer_load_dword s16, s[20:23], 0x15 ; C2081515 s_buffer_load_dword s17, s[20:23], 0x16 ; C2089516 s_buffer_load_dword s51, s[20:23], 0x18 ; C2199518 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v13, s46 ; 7E1A022E s_buffer_load_dword s46, s[20:23], 0x19 ; C2171519 s_buffer_load_dword s52, s[20:23], 0x1a ; C21A151A s_buffer_load_dword s53, s[20:23], 0x1b ; C21A951B s_buffer_load_dword s54, s[20:23], 0x1c ; C21B151C s_buffer_load_dword s55, s[20:23], 0x1d ; C21B951D s_buffer_load_dword s56, s[20:23], 0x1e ; C21C151E s_buffer_load_dword s57, s[20:23], 0x1f ; C21C951F s_buffer_load_dword s58, s[20:23], 0x20 ; C21D1520 s_buffer_load_dword s59, s[20:23], 0x21 ; C21D9521 s_buffer_load_dword s60, s[20:23], 0x22 ; C21E1522 s_buffer_load_dword s61, s[20:23], 0x4c ; C21E954C s_buffer_load_dword s62, s[20:23], 0x4d ; C21F154D s_buffer_load_dword s63, s[20:23], 0x4e ; C21F954E s_buffer_load_dword s64, s[20:23], 0x4f ; C220154F s_buffer_load_dword s65, s[20:23], 0x50 ; C2209550 s_buffer_load_dword s66, s[20:23], 0x51 ; C2211551 s_buffer_load_dword s67, s[20:23], 0x52 ; C2219552 s_buffer_load_dword s20, s[20:23], 0x53 ; C20A1553 v_mac_f32_e32 v13, s36, v9 ; 3E1A1224 v_mov_b32_e32 v14, s47 ; 7E1C022F v_mul_f32_e32 v15, s51, v2 ; 101E0433 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v16, s46, v2 ; 1020042E v_mul_f32_e32 v17, s27, v6 ; 10220C1B v_mul_f32_e32 v18, s30, v6 ; 10240C1E v_mul_f32_e32 v6, s33, v6 ; 100C0C21 v_mac_f32_e32 v17, s28, v7 ; 3E220E1C v_mac_f32_e32 v18, s31, v7 ; 3E240E1F v_mac_f32_e32 v6, s34, v7 ; 3E0C0E22 v_mac_f32_e32 v17, s29, v8 ; 3E22101D v_mac_f32_e32 v18, s32, v8 ; 3E241020 v_mac_f32_e32 v6, s35, v8 ; 3E0C1023 v_mul_f32_e32 v7, s52, v2 ; 100E0434 v_mul_f32_e32 v8, s53, v2 ; 10100435 v_mul_f32_e32 v19, s38, v2 ; 10260426 v_mac_f32_e32 v14, s37, v10 ; 3E1C1425 v_cndmask_b32_e32 v9, v11, v9 ; 0012130B v_cndmask_b32_e32 v10, v12, v10 ; 0014150C v_mac_f32_e32 v15, s54, v3 ; 3E1E0636 v_mac_f32_e32 v16, s55, v3 ; 3E200637 v_mac_f32_e32 v7, s56, v3 ; 3E0E0638 v_mac_f32_e32 v8, s57, v3 ; 3E100639 v_mac_f32_e32 v19, s42, v3 ; 3E26062A v_mul_f32_e32 v11, s39, v2 ; 10160427 v_mac_f32_e32 v11, s43, v3 ; 3E16062B v_mul_f32_e32 v12, s40, v2 ; 10180428 v_mac_f32_e32 v12, s44, v3 ; 3E18062C v_mul_f32_e32 v2, s41, v2 ; 10040429 v_mac_f32_e32 v2, s45, v3 ; 3E04062D v_mac_f32_e32 v15, s58, v4 ; 3E1E083A v_mac_f32_e32 v16, s59, v4 ; 3E20083B v_mac_f32_e32 v7, s60, v4 ; 3E0E083C v_mac_f32_e32 v8, s19, v4 ; 3E100813 v_mac_f32_e32 v19, s61, v4 ; 3E26083D v_mac_f32_e32 v11, s62, v4 ; 3E16083E v_mac_f32_e32 v12, s63, v4 ; 3E18083F v_mac_f32_e32 v2, s64, v4 ; 3E040840 v_mac_f32_e32 v15, s24, v5 ; 3E1E0A18 v_mac_f32_e32 v16, s25, v5 ; 3E200A19 v_mac_f32_e32 v7, s26, v5 ; 3E0E0A1A v_mac_f32_e32 v19, s65, v5 ; 3E260A41 v_mac_f32_e32 v11, s66, v5 ; 3E160A42 v_mac_f32_e32 v12, s67, v5 ; 3E180A43 v_mac_f32_e32 v2, s20, v5 ; 3E040A14 v_mov_b32_e32 v3, s50 ; 7E060232 v_mul_f32_e32 v4, v17, v17 ; 10082311 v_mac_f32_e32 v4, v18, v18 ; 3E082512 v_mac_f32_e32 v4, v6, v6 ; 3E080D06 v_rsq_clamp_f32_e32 v4, v4 ; 7E085904 v_mac_f32_e32 v3, s48, v9 ; 3E061230 v_mac_f32_e32 v0, s49, v10 ; 3E001431 exp 15, 32, 0, 0, 0, v13, v14, v3, v0 ; F800020F 00030E0D s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, v4, v17 ; 10002304 v_mul_f32_e32 v3, v4, v18 ; 10062504 v_mul_f32_e32 v4, v4, v6 ; 10080D04 v_mul_f32_e32 v5, v4, v3 ; 100A0704 v_mul_f32_e32 v6, s11, v5 ; 100C0A0B v_mul_f32_e32 v9, s12, v5 ; 10120A0C v_mul_f32_e32 v5, s13, v5 ; 100A0A0D v_mul_f32_e32 v10, v3, v0 ; 10140103 v_mac_f32_e32 v6, s14, v10 ; 3E0C140E v_mac_f32_e32 v9, s9, v10 ; 3E121409 v_mac_f32_e32 v5, s10, v10 ; 3E0A140A v_mul_f32_e32 v10, v4, v4 ; 10140904 v_mac_f32_e32 v6, s5, v10 ; 3E0C1405 v_mac_f32_e32 v9, s7, v10 ; 3E121407 v_mac_f32_e32 v5, s8, v10 ; 3E0A1408 v_mul_f32_e32 v10, v0, v4 ; 10140900 v_mac_f32_e32 v6, s3, v10 ; 3E0C1403 v_mac_f32_e32 v9, s4, v10 ; 3E121404 v_mac_f32_e32 v5, s18, v10 ; 3E0A1412 exp 15, 33, 0, 0, 0, v0, v3, v4, v1 ; F800021F 01040300 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v3, v3, v3 ; 10060703 v_mad_f32 v0, v0, v0, -v3 ; D2820000 840E0100 v_mac_f32_e32 v6, s15, v0 ; 3E0C000F v_mac_f32_e32 v9, s16, v0 ; 3E120010 v_mac_f32_e32 v5, s17, v0 ; 3E0A0011 v_mul_f32_e32 v0, 0.5, v11 ; 100016F0 v_mul_f32_e32 v3, 0.5, v2 ; 100604F0 exp 15, 34, 0, 0, 0, v6, v9, v5, v1 ; F800022F 01050906 s_waitcnt expcnt(0) ; BF8C070F v_mad_f32 v1, 0.5, v19, v3 ; D2820001 040E26F0 v_mac_f32_e32 v3, s6, v0 ; 3E060006 exp 15, 35, 0, 0, 0, v1, v3, v12, v2 ; F800023F 020C0301 v_subrev_f32_e32 v0, s0, v15 ; 0A001E00 s_waitcnt expcnt(0) ; BF8C070F v_subrev_f32_e32 v1, s1, v16 ; 0A022001 v_subrev_f32_e32 v3, s2, v7 ; 0A060E02 exp 15, 36, 0, 0, 0, v12, v0, v1, v3 ; F800024F 0301000C exp 15, 37, 0, 0, 0, v15, v16, v7, v8 ; F800025F 0807100F exp 15, 12, 0, 1, 0, v19, v11, v12, v2 ; F80008CF 020C0B13 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 72 VGPRS: 20 Code Size: 788 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL IN[4], GENERIC[4], PERSPECTIVE DCL IN[5], GENERIC[5], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SVIEW[0], CUBE, FLOAT DCL SVIEW[1], CUBE, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL SVIEW[4], 2D, FLOAT DCL CONST[0..5] DCL CONST[8..20] DCL CONST[22..23] DCL CONST[25] DCL TEMP[0..18], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 2.0000, 0.5000} IMM[1] FLT32 { 0.7500, 7.0000, 1.0000, 10.0000} IMM[2] FLT32 { 0.9680, 0.0300, 0.0001, -1.0000} 0: DP3 TEMP[0].x, IN[1].xyzz, IN[1].xyzz 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MUL TEMP[0].xyz, IN[1].xyzz, TEMP[0].xxxx 3: MOV TEMP[1].xy, IN[0].xyyy 4: TEX TEMP[1], TEMP[1], SAMP[2], 2D 5: MUL TEMP[2].x, TEMP[1].wwww, CONST[19].wwww 6: FSLT TEMP[2].x, TEMP[2].xxxx, CONST[20].xxxx 7: AND TEMP[2].x, TEMP[2].xxxx, IMM[0].xxxx 8: KILL_IF -TEMP[2].xxxx 9: DP3 TEMP[2].x, IN[4].yzww, IN[4].yzww 10: RSQ TEMP[2].x, TEMP[2].xxxx 11: MUL TEMP[2].xyz, IN[4].yzww, TEMP[2].xxxx 12: MUL TEMP[1].xyz, CONST[19].xyzz, TEMP[1].xyzz 13: LRP TEMP[3].xyz, CONST[22].xxxx, TEMP[1].xyzz, CONST[16].xyzz 14: MUL TEMP[4].x, CONST[22].xxxx, CONST[16].wwww 15: ADD TEMP[4].x, CONST[16].wwww, -TEMP[4].xxxx 16: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[4].xxxx 17: MOV TEMP[5].xy, IN[0].xyyy 18: TEX TEMP[5].y, TEMP[5], SAMP[3], 2D 19: ADD TEMP[6].x, IMM[0].xxxx, -CONST[25].xxxx 20: MAD TEMP[5].x, TEMP[5].yyyy, CONST[25].xxxx, TEMP[6].xxxx 21: DP3 TEMP[6].x, TEMP[0].xyzz, CONST[0].xyzz 22: MAX TEMP[6].x, IMM[0].yyyy, TEMP[6].xxxx 23: MOV TEMP[7].xyz, IMM[0].yyyy 24: MOV TEMP[8].w, IMM[0].xxxx 25: MOV TEMP[8].xyz, TEMP[0].xyzx 26: DP4 TEMP[9].x, CONST[1], TEMP[8] 27: DP4 TEMP[10].x, CONST[2], TEMP[8] 28: MOV TEMP[9].y, TEMP[10].xxxx 29: DP4 TEMP[8].x, CONST[3], TEMP[8] 30: MOV TEMP[9].z, TEMP[8].xxxx 31: ADD TEMP[8].xyz, IN[2].xyzz, TEMP[9].xyzz 32: MOV TEMP[9].xy, IN[3].xyyy 33: MOV TEMP[9].w, IN[3].wwww 34: TXP TEMP[9].x, TEMP[9], SAMP[4], 2D 35: MUL TEMP[9].xyz, CONST[17].xyzz, TEMP[9].xxxx 36: MUL TEMP[8].xyz, TEMP[8].xyzz, TEMP[5].xxxx 37: DP3 TEMP[10].x, TEMP[0].xyzz, TEMP[2].xyzz 38: MUL TEMP[10].xyz, TEMP[10].xxxx, TEMP[0].xyzz 39: MUL TEMP[10].xyz, IMM[0].zzzz, TEMP[10].xyzz 40: ADD TEMP[10].xyz, TEMP[2].xyzz, -TEMP[10].xyzz 41: MOV TEMP[11].xyz, TEMP[10].xyzx 42: FSLT TEMP[12].x, IMM[0].yyyy, CONST[10].wwww 43: UIF TEMP[12].xxxx :0 44: DP3 TEMP[12].x, TEMP[10].xyzz, TEMP[10].xyzz 45: RSQ TEMP[12].x, TEMP[12].xxxx 46: MUL TEMP[12].xyz, TEMP[10].xyzz, TEMP[12].xxxx 47: MOV TEMP[13].xyz, -IN[5].xyzx 48: ADD TEMP[14].xyz, CONST[8].xyzz, TEMP[13].xyzz 49: RCP TEMP[15].x, TEMP[12].xxxx 50: RCP TEMP[15].y, TEMP[12].yyyy 51: RCP TEMP[15].z, TEMP[12].zzzz 52: MUL TEMP[14].xyz, TEMP[14].xyzz, TEMP[15].xyzz 53: ADD TEMP[13].xyz, CONST[9].xyzz, TEMP[13].xyzz 54: RCP TEMP[15].x, TEMP[12].xxxx 55: RCP TEMP[15].y, TEMP[12].yyyy 56: RCP TEMP[15].z, TEMP[12].zzzz 57: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[15].xyzz 58: FSLT TEMP[15].xyz, IMM[0].yyyy, TEMP[12].xyzz 59: UIF TEMP[15].xxxx :0 60: MOV TEMP[16].x, TEMP[14].xxxx 61: ELSE :0 62: MOV TEMP[16].x, TEMP[13].xxxx 63: ENDIF 64: UIF TEMP[15].yyyy :0 65: MOV TEMP[17].x, TEMP[14].yyyy 66: ELSE :0 67: MOV TEMP[17].x, TEMP[13].yyyy 68: ENDIF 69: UIF TEMP[15].zzzz :0 70: MOV TEMP[14].x, TEMP[14].zzzz 71: ELSE :0 72: MOV TEMP[14].x, TEMP[13].zzzz 73: ENDIF 74: ADD TEMP[13].xyz, CONST[8].xyzz, CONST[9].xyzz 75: MUL TEMP[13].xyz, TEMP[13].xyzz, IMM[0].wwww 76: MIN TEMP[15].x, TEMP[16].xxxx, TEMP[17].xxxx 77: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[14].xxxx 78: ADD TEMP[15].xyz, TEMP[13].xyzz, -CONST[10].xyzz 79: ADD TEMP[15].xyz, TEMP[15].xyzz, IN[5].xyzz 80: MAD TEMP[12].xyz, TEMP[12].xyzz, TEMP[14].xxxx, TEMP[15].xyzz 81: ADD TEMP[11].xyz, TEMP[12].xyzz, -TEMP[13].xyzz 82: ENDIF 83: ADD TEMP[12].x, IMM[0].xxxx, -CONST[23].xxxx 84: POW TEMP[12].x, TEMP[12].xxxx, IMM[1].xxxx 85: MUL TEMP[12].x, TEMP[12].xxxx, IMM[1].yyyy 86: MOV TEMP[11].xyz, TEMP[11].xyzz 87: MOV TEMP[11].w, TEMP[12].xxxx 88: TXL TEMP[11], TEMP[11], SAMP[0], CUBE 89: POW TEMP[12].x, TEMP[11].wwww, CONST[11].yyyy 90: MUL TEMP[12].x, CONST[11].xxxx, TEMP[12].xxxx 91: MUL TEMP[11].xyz, TEMP[12].xxxx, TEMP[11].xyzz 92: FSLT TEMP[12].x, CONST[9].wwww, IMM[1].zzzz 93: UIF TEMP[12].xxxx :0 94: MOV TEMP[12].xyz, TEMP[10].xyzx 95: FSLT TEMP[13].x, IMM[0].yyyy, CONST[14].wwww 96: UIF TEMP[13].xxxx :0 97: DP3 TEMP[13].x, TEMP[10].xyzz, TEMP[10].xyzz 98: RSQ TEMP[13].x, TEMP[13].xxxx 99: MUL TEMP[10].xyz, TEMP[10].xyzz, TEMP[13].xxxx 100: MOV TEMP[13].xyz, -IN[5].xyzx 101: ADD TEMP[14].xyz, CONST[12].xyzz, TEMP[13].xyzz 102: RCP TEMP[15].x, TEMP[10].xxxx 103: RCP TEMP[15].y, TEMP[10].yyyy 104: RCP TEMP[15].z, TEMP[10].zzzz 105: MUL TEMP[14].xyz, TEMP[14].xyzz, TEMP[15].xyzz 106: ADD TEMP[13].xyz, CONST[13].xyzz, TEMP[13].xyzz 107: RCP TEMP[15].x, TEMP[10].xxxx 108: RCP TEMP[15].y, TEMP[10].yyyy 109: RCP TEMP[15].z, TEMP[10].zzzz 110: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[15].xyzz 111: FSLT TEMP[15].xyz, IMM[0].yyyy, TEMP[10].xyzz 112: UIF TEMP[15].xxxx :0 113: MOV TEMP[16].x, TEMP[14].xxxx 114: ELSE :0 115: MOV TEMP[16].x, TEMP[13].xxxx 116: ENDIF 117: UIF TEMP[15].yyyy :0 118: MOV TEMP[17].x, TEMP[14].yyyy 119: ELSE :0 120: MOV TEMP[17].x, TEMP[13].yyyy 121: ENDIF 122: UIF TEMP[15].zzzz :0 123: MOV TEMP[14].x, TEMP[14].zzzz 124: ELSE :0 125: MOV TEMP[14].x, TEMP[13].zzzz 126: ENDIF 127: ADD TEMP[13].xyz, CONST[12].xyzz, CONST[13].xyzz 128: MUL TEMP[13].xyz, TEMP[13].xyzz, IMM[0].wwww 129: MIN TEMP[15].x, TEMP[16].xxxx, TEMP[17].xxxx 130: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[14].xxxx 131: ADD TEMP[15].xyz, TEMP[13].xyzz, -CONST[14].xyzz 132: ADD TEMP[15].xyz, TEMP[15].xyzz, IN[5].xyzz 133: MAD TEMP[10].xyz, TEMP[10].xyzz, TEMP[14].xxxx, TEMP[15].xyzz 134: ADD TEMP[12].xyz, TEMP[10].xyzz, -TEMP[13].xyzz 135: ENDIF 136: ADD TEMP[10].x, IMM[0].xxxx, -CONST[23].xxxx 137: POW TEMP[10].x, TEMP[10].xxxx, IMM[1].xxxx 138: MUL TEMP[10].x, TEMP[10].xxxx, IMM[1].yyyy 139: MOV TEMP[12].xyz, TEMP[12].xyzz 140: MOV TEMP[12].w, TEMP[10].xxxx 141: TXL TEMP[10], TEMP[12], SAMP[1], CUBE 142: POW TEMP[12].x, TEMP[10].wwww, CONST[15].yyyy 143: MUL TEMP[12].x, CONST[15].xxxx, TEMP[12].xxxx 144: MUL TEMP[10].xyz, TEMP[12].xxxx, TEMP[10].xyzz 145: LRP TEMP[7].xyz, CONST[9].wwww, TEMP[11].xyzz, TEMP[10].xyzz 146: ELSE :0 147: MOV TEMP[7].xyz, TEMP[11].xyzx 148: ENDIF 149: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[5].xxxx 150: MOV TEMP[2].xyz, -TEMP[2].xyzx 151: ADD TEMP[5].x, IMM[0].xxxx, -CONST[23].xxxx 152: ADD TEMP[10].xyz, CONST[0].xyzz, TEMP[2].xyzz 153: DP3 TEMP[11].x, TEMP[10].xyzz, TEMP[10].xyzz 154: RSQ TEMP[11].x, TEMP[11].xxxx 155: MUL TEMP[10].xyz, TEMP[10].xyzz, TEMP[11].xxxx 156: DP3 TEMP[2].x, TEMP[0].xyzz, TEMP[2].xyzz 157: MAX TEMP[2].x, IMM[0].yyyy, TEMP[2].xxxx 158: DP3 TEMP[11].x, CONST[0].xyzz, TEMP[10].xyzz 159: MAX TEMP[11].x, IMM[0].yyyy, TEMP[11].xxxx 160: MUL TEMP[12].x, TEMP[5].xxxx, TEMP[5].xxxx 161: MUL TEMP[12].x, TEMP[12].xxxx, CONST[18].wwww 162: ADD TEMP[13].x, IMM[0].xxxx, -TEMP[5].xxxx 163: MAD TEMP[13].x, TEMP[13].xxxx, IMM[2].xxxx, IMM[2].yyyy 164: LG2 TEMP[13].x, TEMP[13].xxxx 165: RCP TEMP[13].x, TEMP[13].xxxx 166: MUL TEMP[13].x, IMM[1].wwww, TEMP[13].xxxx 167: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[13].xxxx 168: ADD TEMP[14].x, IMM[0].xxxx, -TEMP[6].xxxx 169: ADD TEMP[15].x, IMM[0].xxxx, -TEMP[2].xxxx 170: MUL TEMP[16].x, IMM[0].zzzz, TEMP[11].xxxx 171: MUL TEMP[5].x, TEMP[11].xxxx, TEMP[5].xxxx 172: MAD TEMP[5].x, TEMP[16].xxxx, TEMP[5].xxxx, IMM[0].wwww 173: ADD TEMP[11].x, IMM[0].xxxx, -TEMP[11].xxxx 174: ADD TEMP[16].x, IMM[0].xxxx, -TEMP[2].xxxx 175: ADD TEMP[4].x, IMM[0].xxxx, -TEMP[4].xxxx 176: ADD TEMP[4].x, CONST[23].xxxx, TEMP[4].xxxx 177: MOV_SAT TEMP[4].x, TEMP[4].xxxx 178: MUL TEMP[17].x, TEMP[16].xxxx, TEMP[16].xxxx 179: MUL TEMP[18].x, TEMP[16].xxxx, TEMP[16].xxxx 180: MUL TEMP[16].x, TEMP[18].xxxx, TEMP[16].xxxx 181: MUL TEMP[16].x, TEMP[17].xxxx, TEMP[16].xxxx 182: LRP TEMP[4].xyz, TEMP[16].xxxx, TEMP[4].xxxx, TEMP[3].xyzz 183: LRP TEMP[16].x, TEMP[6].xxxx, IMM[0].xxxx, TEMP[12].xxxx 184: LRP TEMP[2].x, TEMP[2].xxxx, IMM[0].xxxx, TEMP[12].xxxx 185: MAD TEMP[2].x, TEMP[16].xxxx, TEMP[2].xxxx, IMM[2].zzzz 186: RCP TEMP[2].x, TEMP[2].xxxx 187: DP3 TEMP[10].x, TEMP[0].xyzz, TEMP[10].xyzz 188: MAX TEMP[10].x, IMM[0].yyyy, TEMP[10].xxxx 189: POW TEMP[10].x, TEMP[10].xxxx, TEMP[13].xxxx 190: ADD TEMP[12].x, TEMP[13].xxxx, IMM[0].xxxx 191: MUL TEMP[12].x, TEMP[12].xxxx, CONST[18].yyyy 192: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[12].xxxx 193: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[10].xxxx 194: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[6].xxxx 195: MUL TEMP[2].x, TEMP[2].xxxx, CONST[18].xxxx 196: MAX TEMP[2].x, IMM[0].yyyy, TEMP[2].xxxx 197: MUL TEMP[2].xyz, TEMP[2].xxxx, TEMP[9].xyzz 198: ADD TEMP[10].xyz, IMM[0].xxxx, -TEMP[3].xyzz 199: MUL TEMP[12].x, TEMP[11].xxxx, TEMP[11].xxxx 200: MUL TEMP[13].x, TEMP[11].xxxx, TEMP[11].xxxx 201: MUL TEMP[11].x, TEMP[13].xxxx, TEMP[11].xxxx 202: MUL TEMP[11].x, TEMP[12].xxxx, TEMP[11].xxxx 203: MAD TEMP[3].xyz, TEMP[10].xyzz, TEMP[11].xxxx, TEMP[3].xyzz 204: ADD TEMP[10].x, TEMP[5].xxxx, IMM[2].wwww 205: MUL TEMP[11].x, TEMP[14].xxxx, TEMP[14].xxxx 206: MUL TEMP[12].x, TEMP[14].xxxx, TEMP[14].xxxx 207: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[14].xxxx 208: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[12].xxxx 209: MAD TEMP[10].x, TEMP[10].xxxx, TEMP[11].xxxx, IMM[0].xxxx 210: ADD TEMP[5].x, TEMP[5].xxxx, IMM[2].wwww 211: MUL TEMP[11].x, TEMP[15].xxxx, TEMP[15].xxxx 212: MUL TEMP[12].x, TEMP[15].xxxx, TEMP[15].xxxx 213: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[15].xxxx 214: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[12].xxxx 215: MAD TEMP[5].x, TEMP[5].xxxx, TEMP[11].xxxx, IMM[0].xxxx 216: MUL TEMP[5].x, TEMP[10].xxxx, TEMP[5].xxxx 217: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[6].xxxx 218: MAD TEMP[5].xyz, TEMP[9].xyzz, TEMP[5].xxxx, TEMP[8].xyzz 219: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[5].xyzz 220: MAD TEMP[1].xyz, TEMP[2].xyzz, TEMP[3].xyzz, TEMP[1].xyzz 221: MAD TEMP[0].xyz, TEMP[7].xyzz, TEMP[4].xyzz, TEMP[1].xyzz 222: MOV TEMP[0].xyz, TEMP[0].xyzx 223: MAD TEMP[1].x, IN[4].xxxx, CONST[5].zzzz, CONST[5].wwww 224: MOV_SAT TEMP[1].x, TEMP[1].xxxx 225: LRP TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[4].xyzz 226: MOV TEMP[0].xyz, TEMP[0].xyzx 227: MOV TEMP[0].w, IMM[0].xxxx 228: MOV OUT[0], TEMP[0] 229: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 172) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200) %57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208) %58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212) %59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216) %60 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224) %61 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228) %62 = call float @llvm.SI.load.const(<16 x i8> %23, i32 232) %63 = call float @llvm.SI.load.const(<16 x i8> %23, i32 236) %64 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240) %65 = call float @llvm.SI.load.const(<16 x i8> %23, i32 244) %66 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256) %67 = call float @llvm.SI.load.const(<16 x i8> %23, i32 260) %68 = call float @llvm.SI.load.const(<16 x i8> %23, i32 264) %69 = call float @llvm.SI.load.const(<16 x i8> %23, i32 268) %70 = call float @llvm.SI.load.const(<16 x i8> %23, i32 272) %71 = call float @llvm.SI.load.const(<16 x i8> %23, i32 276) %72 = call float @llvm.SI.load.const(<16 x i8> %23, i32 280) %73 = call float @llvm.SI.load.const(<16 x i8> %23, i32 288) %74 = call float @llvm.SI.load.const(<16 x i8> %23, i32 292) %75 = call float @llvm.SI.load.const(<16 x i8> %23, i32 300) %76 = call float @llvm.SI.load.const(<16 x i8> %23, i32 304) %77 = call float @llvm.SI.load.const(<16 x i8> %23, i32 308) %78 = call float @llvm.SI.load.const(<16 x i8> %23, i32 312) %79 = call float @llvm.SI.load.const(<16 x i8> %23, i32 316) %80 = call float @llvm.SI.load.const(<16 x i8> %23, i32 320) %81 = call float @llvm.SI.load.const(<16 x i8> %23, i32 352) %82 = call float @llvm.SI.load.const(<16 x i8> %23, i32 368) %83 = call float @llvm.SI.load.const(<16 x i8> %23, i32 400) %84 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %85 = load <32 x i8>, <32 x i8> addrspace(2)* %84, align 32, !tbaa !0 %86 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %87 = load <16 x i8>, <16 x i8> addrspace(2)* %86, align 16, !tbaa !0 %88 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %89 = bitcast <8 x i32> addrspace(2)* %88 to <32 x i8> addrspace(2)* %90 = load <32 x i8>, <32 x i8> addrspace(2)* %89, align 32, !tbaa !0 %91 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %92 = bitcast <4 x i32> addrspace(2)* %91 to <16 x i8> addrspace(2)* %93 = load <16 x i8>, <16 x i8> addrspace(2)* %92, align 16, !tbaa !0 %94 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %95 = bitcast <8 x i32> addrspace(2)* %94 to <32 x i8> addrspace(2)* %96 = load <32 x i8>, <32 x i8> addrspace(2)* %95, align 32, !tbaa !0 %97 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %98 = bitcast <4 x i32> addrspace(2)* %97 to <16 x i8> addrspace(2)* %99 = load <16 x i8>, <16 x i8> addrspace(2)* %98, align 16, !tbaa !0 %100 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %101 = bitcast <8 x i32> addrspace(2)* %100 to <32 x i8> addrspace(2)* %102 = load <32 x i8>, <32 x i8> addrspace(2)* %101, align 32, !tbaa !0 %103 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %104 = bitcast <4 x i32> addrspace(2)* %103 to <16 x i8> addrspace(2)* %105 = load <16 x i8>, <16 x i8> addrspace(2)* %104, align 16, !tbaa !0 %106 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %107 = bitcast <8 x i32> addrspace(2)* %106 to <32 x i8> addrspace(2)* %108 = load <32 x i8>, <32 x i8> addrspace(2)* %107, align 32, !tbaa !0 %109 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %110 = bitcast <4 x i32> addrspace(2)* %109 to <16 x i8> addrspace(2)* %111 = load <16 x i8>, <16 x i8> addrspace(2)* %110, align 16, !tbaa !0 %112 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %113 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %114 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %115 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %116 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %117 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %118 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %119 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %120 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %121 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %122 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7) %123 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %124 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %125 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %126 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7) %127 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7) %128 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %129 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7) %130 = fmul float %114, %114 %131 = fmul float %115, %115 %132 = fadd float %131, %130 %133 = fmul float %116, %116 %134 = fadd float %132, %133 %135 = call float @llvm.AMDGPU.rsq.clamped.f32(float %134) %136 = fmul float %114, %135 %137 = fmul float %115, %135 %138 = fmul float %116, %135 %139 = bitcast float %112 to i32 %140 = bitcast float %113 to i32 %141 = insertelement <2 x i32> undef, i32 %139, i32 0 %142 = insertelement <2 x i32> %141, i32 %140, i32 1 %143 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %142, <32 x i8> %96, <16 x i8> %99, i32 2) %144 = extractelement <4 x float> %143, i32 0 %145 = extractelement <4 x float> %143, i32 1 %146 = extractelement <4 x float> %143, i32 2 %147 = extractelement <4 x float> %143, i32 3 %148 = fmul float %147, %79 %149 = fcmp olt float %148, %80 %150 = select i1 %149, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %150) %151 = fmul float %124, %124 %152 = fmul float %125, %125 %153 = fadd float %152, %151 %154 = fmul float %126, %126 %155 = fadd float %153, %154 %156 = call float @llvm.AMDGPU.rsq.clamped.f32(float %155) %157 = fmul float %124, %156 %158 = fmul float %125, %156 %159 = fmul float %126, %156 %160 = fmul float %76, %144 %161 = fmul float %77, %145 %162 = fmul float %78, %146 %163 = call float @llvm.AMDGPU.lrp(float %81, float %160, float %66) %164 = call float @llvm.AMDGPU.lrp(float %81, float %161, float %67) %165 = call float @llvm.AMDGPU.lrp(float %81, float %162, float %68) %166 = fmul float %81, %69 %167 = fsub float %69, %166 %168 = fmul float %160, %167 %169 = fmul float %161, %167 %170 = fmul float %162, %167 %171 = bitcast float %112 to i32 %172 = bitcast float %113 to i32 %173 = insertelement <2 x i32> undef, i32 %171, i32 0 %174 = insertelement <2 x i32> %173, i32 %172, i32 1 %175 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %174, <32 x i8> %102, <16 x i8> %105, i32 2) %176 = extractelement <4 x float> %175, i32 1 %177 = fsub float 1.000000e+00, %83 %178 = fmul float %176, %83 %179 = fadd float %178, %177 %180 = fmul float %136, %24 %181 = fmul float %137, %25 %182 = fadd float %181, %180 %183 = fmul float %138, %26 %184 = fadd float %182, %183 %185 = call float @llvm.maxnum.f32(float %184, float 0.000000e+00) %186 = fmul float %27, %136 %187 = fmul float %28, %137 %188 = fadd float %186, %187 %189 = fmul float %29, %138 %190 = fadd float %188, %189 %191 = fadd float %190, %30 %192 = fmul float %31, %136 %193 = fmul float %32, %137 %194 = fadd float %192, %193 %195 = fmul float %33, %138 %196 = fadd float %194, %195 %197 = fadd float %196, %34 %198 = fmul float %35, %136 %199 = fmul float %36, %137 %200 = fadd float %198, %199 %201 = fmul float %37, %138 %202 = fadd float %200, %201 %203 = fadd float %202, %38 %204 = fadd float %117, %191 %205 = fadd float %118, %197 %206 = fadd float %119, %203 %207 = fdiv float %120, %122 %208 = fdiv float %121, %122 %209 = bitcast float %207 to i32 %210 = bitcast float %208 to i32 %211 = insertelement <2 x i32> undef, i32 %209, i32 0 %212 = insertelement <2 x i32> %211, i32 %210, i32 1 %213 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %212, <32 x i8> %108, <16 x i8> %111, i32 2) %214 = extractelement <4 x float> %213, i32 0 %215 = fmul float %70, %214 %216 = fmul float %71, %214 %217 = fmul float %72, %214 %218 = fmul float %204, %179 %219 = fmul float %205, %179 %220 = fmul float %206, %179 %221 = fmul float %136, %157 %222 = fmul float %137, %158 %223 = fadd float %222, %221 %224 = fmul float %138, %159 %225 = fadd float %223, %224 %226 = fmul float %225, %136 %227 = fmul float %225, %137 %228 = fmul float %225, %138 %229 = fmul float %226, 2.000000e+00 %230 = fmul float %227, 2.000000e+00 %231 = fmul float %228, 2.000000e+00 %232 = fsub float %157, %229 %233 = fsub float %158, %230 %234 = fsub float %159, %231 %235 = fcmp ogt float %51, 0.000000e+00 br i1 %235, label %IF, label %ENDIF IF: ; preds = %main_body %236 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %237 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %238 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %239 = fmul float %232, %232 %240 = fmul float %233, %233 %241 = fadd float %240, %239 %242 = fmul float %234, %234 %243 = fadd float %241, %242 %244 = call float @llvm.AMDGPU.rsq.clamped.f32(float %243) %245 = fmul float %232, %244 %246 = fmul float %233, %244 %247 = fmul float %234, %244 %248 = fsub float %44, %127 %249 = fsub float %45, %128 %250 = fsub float %46, %129 %251 = fdiv float 1.000000e+00, %245 %252 = fdiv float 1.000000e+00, %246 %253 = fdiv float 1.000000e+00, %247 %254 = fmul float %248, %251 %255 = fmul float %249, %252 %256 = fmul float %250, %253 %257 = fsub float %47, %127 %258 = fsub float %48, %128 %259 = fsub float %49, %129 %260 = fdiv float 1.000000e+00, %245 %261 = fdiv float 1.000000e+00, %246 %262 = fdiv float 1.000000e+00, %247 %263 = fmul float %257, %260 %264 = fmul float %258, %261 %265 = fmul float %259, %262 %266 = fcmp ogt float %245, 0.000000e+00 %267 = fcmp ogt float %246, 0.000000e+00 %268 = fcmp ogt float %247, 0.000000e+00 %. = select i1 %266, float %254, float %263 %temp68.0 = select i1 %267, float %255, float %264 %.100 = select i1 %268, float %256, float %265 %269 = fadd float %44, %47 %270 = fadd float %45, %48 %271 = fadd float %46, %49 %272 = fmul float %269, 5.000000e-01 %273 = fmul float %270, 5.000000e-01 %274 = fmul float %271, 5.000000e-01 %275 = call float @llvm.minnum.f32(float %., float %temp68.0) %276 = call float @llvm.minnum.f32(float %275, float %.100) %277 = fsub float %272, %238 %278 = fsub float %273, %237 %279 = fsub float %274, %236 %280 = fadd float %277, %127 %281 = fadd float %278, %128 %282 = fadd float %279, %129 %283 = fmul float %245, %276 %284 = fadd float %283, %280 %285 = fmul float %246, %276 %286 = fadd float %285, %281 %287 = fmul float %247, %276 %288 = fadd float %287, %282 %289 = fsub float %284, %272 %290 = fsub float %286, %273 %291 = fsub float %288, %274 br label %ENDIF ENDIF: ; preds = %main_body, %IF %temp44.0 = phi float [ %289, %IF ], [ %232, %main_body ] %temp45.0 = phi float [ %290, %IF ], [ %233, %main_body ] %temp46.0 = phi float [ %291, %IF ], [ %234, %main_body ] %292 = fsub float 1.000000e+00, %82 %293 = call float @llvm.pow.f32(float %292, float 7.500000e-01) %294 = fmul float %293, 7.000000e+00 %295 = insertelement <4 x float> undef, float %temp44.0, i32 0 %296 = insertelement <4 x float> %295, float %temp45.0, i32 1 %297 = insertelement <4 x float> %296, float %temp46.0, i32 2 %298 = insertelement <4 x float> %297, float %294, i32 3 %299 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %298) %300 = extractelement <4 x float> %299, i32 0 %301 = extractelement <4 x float> %299, i32 1 %302 = extractelement <4 x float> %299, i32 2 %303 = extractelement <4 x float> %299, i32 3 %304 = call float @llvm.fabs.f32(float %302) %305 = fdiv float 1.000000e+00, %304 %306 = fmul float %300, %305 %307 = fadd float %306, 1.500000e+00 %308 = fmul float %301, %305 %309 = fadd float %308, 1.500000e+00 %310 = bitcast float %309 to i32 %311 = bitcast float %307 to i32 %312 = bitcast float %303 to i32 %313 = bitcast float %294 to i32 %314 = insertelement <4 x i32> undef, i32 %310, i32 0 %315 = insertelement <4 x i32> %314, i32 %311, i32 1 %316 = insertelement <4 x i32> %315, i32 %312, i32 2 %317 = insertelement <4 x i32> %316, i32 %313, i32 3 %318 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %317, <32 x i8> %85, <16 x i8> %87, i32 4) %319 = extractelement <4 x float> %318, i32 0 %320 = extractelement <4 x float> %318, i32 1 %321 = extractelement <4 x float> %318, i32 2 %322 = extractelement <4 x float> %318, i32 3 %323 = call float @llvm.pow.f32(float %322, float %53) %324 = fmul float %52, %323 %325 = fmul float %324, %319 %326 = fmul float %324, %320 %327 = fmul float %324, %321 %328 = fcmp olt float %50, 0x3FEFFFEB00000000 br i1 %328, label %IF86, label %ENDIF85 IF86: ; preds = %ENDIF %329 = fcmp ogt float %63, 0.000000e+00 br i1 %329, label %IF89, label %ENDIF88 ENDIF85: ; preds = %ENDIF, %ENDIF88 %temp28.0 = phi float [ %557, %ENDIF88 ], [ %325, %ENDIF ] %temp29.0 = phi float [ %558, %ENDIF88 ], [ %326, %ENDIF ] %temp30.0 = phi float [ %559, %ENDIF88 ], [ %327, %ENDIF ] %330 = fmul float %temp28.0, %179 %331 = fmul float %temp29.0, %179 %332 = fmul float %temp30.0, %179 %333 = fsub float 1.000000e+00, %82 %334 = fsub float %24, %157 %335 = fsub float %25, %158 %336 = fsub float %26, %159 %337 = fmul float %334, %334 %338 = fmul float %335, %335 %339 = fadd float %338, %337 %340 = fmul float %336, %336 %341 = fadd float %339, %340 %342 = call float @llvm.AMDGPU.rsq.clamped.f32(float %341) %343 = fmul float %334, %342 %344 = fmul float %335, %342 %345 = fmul float %336, %342 %346 = fmul float %157, %136 %347 = fsub float -0.000000e+00, %346 %348 = fmul float %158, %137 %349 = fsub float %347, %348 %350 = fmul float %159, %138 %351 = fsub float %349, %350 %352 = call float @llvm.maxnum.f32(float %351, float 0.000000e+00) %353 = fmul float %24, %343 %354 = fmul float %25, %344 %355 = fadd float %354, %353 %356 = fmul float %26, %345 %357 = fadd float %355, %356 %358 = call float @llvm.maxnum.f32(float %357, float 0.000000e+00) %359 = fmul float %333, %333 %360 = fmul float %359, %75 %361 = fsub float 1.000000e+00, %333 %362 = fmul float %361, 0x3FEEF9DB20000000 %363 = fadd float %362, 0x3F9EB851E0000000 %364 = call float @llvm.log2.f32(float %363) %365 = fdiv float 1.000000e+00, %364 %366 = fmul float %365, 1.000000e+01 %367 = fmul float %366, %366 %368 = fsub float 1.000000e+00, %185 %369 = fsub float 1.000000e+00, %352 %370 = fmul float %358, 2.000000e+00 %371 = fmul float %358, %333 %372 = fmul float %370, %371 %373 = fadd float %372, 5.000000e-01 %374 = fsub float 1.000000e+00, %358 %375 = fsub float 1.000000e+00, %352 %376 = fsub float 1.000000e+00, %167 %377 = fadd float %82, %376 %378 = call float @llvm.AMDIL.clamp.(float %377, float 0.000000e+00, float 1.000000e+00) %379 = fmul float %375, %375 %380 = fmul float %375, %375 %381 = fmul float %380, %375 %382 = fmul float %379, %381 %383 = call float @llvm.AMDGPU.lrp(float %382, float %378, float %163) %384 = call float @llvm.AMDGPU.lrp(float %382, float %378, float %164) %385 = call float @llvm.AMDGPU.lrp(float %382, float %378, float %165) %386 = call float @llvm.AMDGPU.lrp(float %185, float 1.000000e+00, float %360) %387 = call float @llvm.AMDGPU.lrp(float %352, float 1.000000e+00, float %360) %388 = fmul float %386, %387 %389 = fadd float %388, 0x3F1A36E2E0000000 %390 = fdiv float 1.000000e+00, %389 %391 = fmul float %136, %343 %392 = fmul float %137, %344 %393 = fadd float %392, %391 %394 = fmul float %138, %345 %395 = fadd float %393, %394 %396 = call float @llvm.maxnum.f32(float %395, float 0.000000e+00) %397 = call float @llvm.pow.f32(float %396, float %367) %398 = fadd float %367, 1.000000e+00 %399 = fmul float %398, %74 %400 = fmul float %397, %399 %401 = fmul float %390, %400 %402 = fmul float %401, %185 %403 = fmul float %402, %73 %404 = call float @llvm.maxnum.f32(float %403, float 0.000000e+00) %405 = fmul float %404, %215 %406 = fmul float %404, %216 %407 = fmul float %404, %217 %408 = fsub float 1.000000e+00, %163 %409 = fsub float 1.000000e+00, %164 %410 = fsub float 1.000000e+00, %165 %411 = fmul float %374, %374 %412 = fmul float %374, %374 %413 = fmul float %412, %374 %414 = fmul float %411, %413 %415 = fmul float %408, %414 %416 = fadd float %415, %163 %417 = fmul float %409, %414 %418 = fadd float %417, %164 %419 = fmul float %410, %414 %420 = fadd float %419, %165 %421 = fadd float %373, -1.000000e+00 %422 = fmul float %368, %368 %423 = fmul float %368, %368 %424 = fmul float %423, %368 %425 = fmul float %422, %424 %426 = fmul float %421, %425 %427 = fadd float %426, 1.000000e+00 %428 = fadd float %373, -1.000000e+00 %429 = fmul float %369, %369 %430 = fmul float %369, %369 %431 = fmul float %430, %369 %432 = fmul float %429, %431 %433 = fmul float %428, %432 %434 = fadd float %433, 1.000000e+00 %435 = fmul float %427, %434 %436 = fmul float %435, %185 %437 = fmul float %215, %436 %438 = fadd float %437, %218 %439 = fmul float %216, %436 %440 = fadd float %439, %219 %441 = fmul float %217, %436 %442 = fadd float %441, %220 %443 = fmul float %168, %438 %444 = fmul float %169, %440 %445 = fmul float %170, %442 %446 = fmul float %405, %416 %447 = fadd float %446, %443 %448 = fmul float %406, %418 %449 = fadd float %448, %444 %450 = fmul float %407, %420 %451 = fadd float %450, %445 %452 = fmul float %330, %383 %453 = fadd float %452, %447 %454 = fmul float %331, %384 %455 = fadd float %454, %449 %456 = fmul float %332, %385 %457 = fadd float %456, %451 %458 = fmul float %123, %42 %459 = fadd float %458, %43 %460 = call float @llvm.AMDIL.clamp.(float %459, float 0.000000e+00, float 1.000000e+00) %461 = call float @llvm.AMDGPU.lrp(float %460, float %453, float %39) %462 = call float @llvm.AMDGPU.lrp(float %460, float %455, float %40) %463 = call float @llvm.AMDGPU.lrp(float %460, float %457, float %41) %464 = call i32 @llvm.SI.packf16(float %461, float %462) %465 = bitcast i32 %464 to float %466 = call i32 @llvm.SI.packf16(float %463, float 1.000000e+00) %467 = bitcast i32 %466 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %465, float %467, float %465, float %467) ret void IF89: ; preds = %IF86 %468 = fmul float %232, %232 %469 = fmul float %233, %233 %470 = fadd float %469, %468 %471 = fmul float %234, %234 %472 = fadd float %470, %471 %473 = call float @llvm.AMDGPU.rsq.clamped.f32(float %472) %474 = fmul float %232, %473 %475 = fmul float %233, %473 %476 = fmul float %234, %473 %477 = fsub float %54, %127 %478 = fsub float %55, %128 %479 = fsub float %56, %129 %480 = fdiv float 1.000000e+00, %474 %481 = fdiv float 1.000000e+00, %475 %482 = fdiv float 1.000000e+00, %476 %483 = fmul float %477, %480 %484 = fmul float %478, %481 %485 = fmul float %479, %482 %486 = fsub float %57, %127 %487 = fsub float %58, %128 %488 = fsub float %59, %129 %489 = fdiv float 1.000000e+00, %474 %490 = fdiv float 1.000000e+00, %475 %491 = fdiv float 1.000000e+00, %476 %492 = fmul float %486, %489 %493 = fmul float %487, %490 %494 = fmul float %488, %491 %495 = fcmp ogt float %474, 0.000000e+00 %496 = fcmp ogt float %475, 0.000000e+00 %497 = fcmp ogt float %476, 0.000000e+00 %.101 = select i1 %495, float %483, float %492 %temp68.1 = select i1 %496, float %484, float %493 %.102 = select i1 %497, float %485, float %494 %498 = fadd float %54, %57 %499 = fadd float %55, %58 %500 = fadd float %56, %59 %501 = fmul float %498, 5.000000e-01 %502 = fmul float %499, 5.000000e-01 %503 = fmul float %500, 5.000000e-01 %504 = call float @llvm.minnum.f32(float %.101, float %temp68.1) %505 = call float @llvm.minnum.f32(float %504, float %.102) %506 = fsub float %501, %60 %507 = fsub float %502, %61 %508 = fsub float %503, %62 %509 = fadd float %506, %127 %510 = fadd float %507, %128 %511 = fadd float %508, %129 %512 = fmul float %474, %505 %513 = fadd float %512, %509 %514 = fmul float %475, %505 %515 = fadd float %514, %510 %516 = fmul float %476, %505 %517 = fadd float %516, %511 %518 = fsub float %513, %501 %519 = fsub float %515, %502 %520 = fsub float %517, %503 br label %ENDIF88 ENDIF88: ; preds = %IF86, %IF89 %temp48.0 = phi float [ %518, %IF89 ], [ %232, %IF86 ] %temp49.0 = phi float [ %519, %IF89 ], [ %233, %IF86 ] %temp50.0 = phi float [ %520, %IF89 ], [ %234, %IF86 ] %521 = fsub float 1.000000e+00, %82 %522 = call float @llvm.pow.f32(float %521, float 7.500000e-01) %523 = fmul float %522, 7.000000e+00 %524 = insertelement <4 x float> undef, float %temp48.0, i32 0 %525 = insertelement <4 x float> %524, float %temp49.0, i32 1 %526 = insertelement <4 x float> %525, float %temp50.0, i32 2 %527 = insertelement <4 x float> %526, float %523, i32 3 %528 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %527) %529 = extractelement <4 x float> %528, i32 0 %530 = extractelement <4 x float> %528, i32 1 %531 = extractelement <4 x float> %528, i32 2 %532 = extractelement <4 x float> %528, i32 3 %533 = call float @llvm.fabs.f32(float %531) %534 = fdiv float 1.000000e+00, %533 %535 = fmul float %529, %534 %536 = fadd float %535, 1.500000e+00 %537 = fmul float %530, %534 %538 = fadd float %537, 1.500000e+00 %539 = bitcast float %538 to i32 %540 = bitcast float %536 to i32 %541 = bitcast float %532 to i32 %542 = bitcast float %523 to i32 %543 = insertelement <4 x i32> undef, i32 %539, i32 0 %544 = insertelement <4 x i32> %543, i32 %540, i32 1 %545 = insertelement <4 x i32> %544, i32 %541, i32 2 %546 = insertelement <4 x i32> %545, i32 %542, i32 3 %547 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %546, <32 x i8> %90, <16 x i8> %93, i32 4) %548 = extractelement <4 x float> %547, i32 0 %549 = extractelement <4 x float> %547, i32 1 %550 = extractelement <4 x float> %547, i32 2 %551 = extractelement <4 x float> %547, i32 3 %552 = call float @llvm.pow.f32(float %551, float %65) %553 = fmul float %64, %552 %554 = fmul float %553, %548 %555 = fmul float %553, %549 %556 = fmul float %553, %550 %557 = call float @llvm.AMDGPU.lrp(float %50, float %325, float %554) %558 = call float @llvm.AMDGPU.lrp(float %50, float %326, float %555) %559 = call float @llvm.AMDGPU.lrp(float %50, float %327, float %556) br label %ENDIF85 } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v16, v0, 0, 0, [m0] ; C8400000 v_interp_p2_f32 v16, [v16], v1, 0, 0, [m0] ; C8410001 v_interp_p1_f32 v17, v0, 1, 0, [m0] ; C8440100 v_interp_p2_f32 v17, [v17], v1, 1, 0, [m0] ; C8450101 v_interp_p1_f32 v2, v0, 0, 1, [m0] ; C8080400 v_interp_p2_f32 v2, [v2], v1, 0, 1, [m0] ; C8090401 v_interp_p1_f32 v3, v0, 1, 1, [m0] ; C80C0500 v_interp_p2_f32 v3, [v3], v1, 1, 1, [m0] ; C80D0501 v_interp_p1_f32 v4, v0, 2, 1, [m0] ; C8100600 v_interp_p2_f32 v4, [v4], v1, 2, 1, [m0] ; C8110601 v_interp_p1_f32 v5, v0, 0, 2, [m0] ; C8140800 v_interp_p2_f32 v5, [v5], v1, 0, 2, [m0] ; C8150801 v_interp_p1_f32 v7, v0, 1, 2, [m0] ; C81C0900 v_interp_p2_f32 v7, [v7], v1, 1, 2, [m0] ; C81D0901 v_interp_p1_f32 v8, v0, 2, 2, [m0] ; C8200A00 v_interp_p2_f32 v8, [v8], v1, 2, 2, [m0] ; C8210A01 v_interp_p1_f32 v6, v0, 0, 3, [m0] ; C8180C00 v_interp_p2_f32 v6, [v6], v1, 0, 3, [m0] ; C8190C01 v_interp_p1_f32 v18, v0, 1, 3, [m0] ; C8480D00 v_interp_p2_f32 v18, [v18], v1, 1, 3, [m0] ; C8490D01 v_interp_p1_f32 v19, v0, 3, 3, [m0] ; C84C0F00 v_interp_p2_f32 v19, [v19], v1, 3, 3, [m0] ; C84D0F01 v_interp_p1_f32 v9, v0, 0, 4, [m0] ; C8241000 v_interp_p2_f32 v9, [v9], v1, 0, 4, [m0] ; C8251001 v_interp_p1_f32 v21, v0, 1, 4, [m0] ; C8541100 v_interp_p2_f32 v21, [v21], v1, 1, 4, [m0] ; C8551101 v_interp_p1_f32 v22, v0, 2, 4, [m0] ; C8581200 v_interp_p2_f32 v22, [v22], v1, 2, 4, [m0] ; C8591201 v_interp_p1_f32 v23, v0, 3, 4, [m0] ; C85C1300 v_interp_p2_f32 v23, [v23], v1, 3, 4, [m0] ; C85D1301 v_interp_p1_f32 v25, v0, 0, 5, [m0] ; C8641400 v_interp_p2_f32 v25, [v25], v1, 0, 5, [m0] ; C8651401 v_interp_p1_f32 v20, v0, 1, 5, [m0] ; C8501500 v_interp_p2_f32 v20, [v20], v1, 1, 5, [m0] ; C8511501 v_interp_p1_f32 v26, v0, 2, 5, [m0] ; C8681600 v_interp_p2_f32 v26, [v26], v1, 2, 5, [m0] ; C8691601 s_load_dwordx4 s[12:15], s[4:5], 0x8 ; C0860508 s_load_dwordx8 s[24:31], s[6:7], 0x10 ; C0CC0710 s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300 v_mul_f32_e32 v0, v2, v2 ; 10000502 v_mac_f32_e32 v0, v3, v3 ; 3E000703 v_mac_f32_e32 v0, v4, v4 ; 3E000904 v_rsq_clamp_f32_e32 v0, v0 ; 7E005900 s_load_dwordx4 s[20:23], s[4:5], 0xc ; C08A050C s_load_dwordx4 s[16:19], s[4:5], 0x10 ; C0880510 s_load_dwordx8 s[36:43], s[6:7], 0x18 ; C0D20718 s_load_dwordx8 s[44:51], s[6:7], 0x20 ; C0D60720 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[27:30], 15, 0, 0, 0, 0, 0, 0, 0, v[16:17], s[24:31], s[12:15] ; F0800F00 00661B10 s_buffer_load_dword s1, s[8:11], 0x4f ; C200894F s_buffer_load_dword s2, s[8:11], 0x50 ; C2010950 v_mul_f32_e32 v2, v0, v2 ; 10040500 v_mul_f32_e32 v1, v0, v3 ; 10020700 v_mul_f32_e32 v0, v0, v4 ; 10000900 s_buffer_load_dword s3, s[8:11], 0x4c ; C201894C s_buffer_load_dword s12, s[8:11], 0x4d ; C206094D s_buffer_load_dword s13, s[8:11], 0x4e ; C206894E s_buffer_load_dword s30, s[8:11], 0x58 ; C20F0958 s_buffer_load_dword s0, s[8:11], 0x5c ; C200095C s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v3, s1, v30 ; 10063C01 v_cmp_gt_f32_e32 vcc, s2, v3 ; 7C080602 v_cndmask_b32_e64 v3, 0, -1.0, vcc ; D2000003 01A9E680 v_cmpx_le_f32_e32 vcc, 0, v3 ; 7C260680 s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 v_mul_f32_e32 v3, v21, v21 ; 10062B15 v_mac_f32_e32 v3, v22, v22 ; 3E062D16 v_mac_f32_e32 v3, v23, v23 ; 3E062F17 v_rsq_clamp_f32_e32 v3, v3 ; 7E065903 v_mul_f32_e32 v13, s3, v27 ; 101A3603 v_mul_f32_e32 v14, s12, v28 ; 101C380C v_mul_f32_e32 v15, s13, v29 ; 101E3A0D v_mul_f32_e32 v11, v3, v21 ; 10162B03 v_mul_f32_e32 v10, v3, v22 ; 10142D03 v_mul_f32_e32 v4, v11, v2 ; 1008050B v_mac_f32_e32 v4, v10, v1 ; 3E08030A v_mul_f32_e32 v12, v3, v23 ; 10182F03 v_mac_f32_e32 v4, v12, v0 ; 3E08010C v_mul_f32_e32 v24, v2, v4 ; 10300902 v_mac_f32_e32 v24, v2, v4 ; 3E300902 v_mul_f32_e32 v28, v1, v4 ; 10380901 v_mac_f32_e32 v28, v1, v4 ; 3E380901 v_mad_f32 v27, v21, v3, -v24 ; D282001B 84620715 v_mad_f32 v28, v22, v3, -v28 ; D282001C 84720716 v_mov_b32_e32 v21, 0x6f800000 ; 7E2A02FF 6F800000 v_cmp_gt_f32_e64 vcc, |v19|, v21 ; D008016A 00022B13 v_mov_b32_e32 v21, 0x2f800000 ; 7E2A02FF 2F800000 v_cndmask_b32_e32 v21, 1.0, v21 ; 002A2AF2 v_mul_f32_e32 v19, v21, v19 ; 10262715 v_rcp_f32_e32 v19, v19 ; 7E265513 v_mul_f32_e32 v22, v0, v4 ; 102C0900 v_mac_f32_e32 v22, v0, v4 ; 3E2C0900 v_mad_f32 v29, v23, v3, -v22 ; D282001D 845A0717 v_mul_f32_e32 v3, v19, v6 ; 10060D13 v_mul_f32_e32 v4, v19, v18 ; 10082513 s_buffer_load_dword s1, s[8:11], 0x40 ; C2008940 s_buffer_load_dword s2, s[8:11], 0x41 ; C2010941 s_buffer_load_dword s3, s[8:11], 0x42 ; C2018942 v_mul_f32_e32 v18, v3, v21 ; 10242B03 v_mul_f32_e32 v19, v4, v21 ; 10262B04 s_buffer_load_dword s12, s[8:11], 0x27 ; C2060927 s_buffer_load_dword s13, s[8:11], 0x2b ; C206892B s_buffer_load_dword s32, s[8:11], 0x2c ; C210092C s_buffer_load_dword s33, s[8:11], 0x2d ; C210892D v_sub_f32_e64 v3, 1.0, s30 ; D2080003 00003CF2 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s1, v3 ; 100C0601 v_mul_f32_e32 v4, s2, v3 ; 10080602 v_mul_f32_e32 v3, s3, v3 ; 10060603 v_mac_f32_e32 v6, s30, v13 ; 3E0C1A1E v_mov_b32_e32 v30, v27 ; 7E3C031B v_mac_f32_e32 v4, s30, v14 ; 3E081C1E v_mov_b32_e32 v31, v28 ; 7E3E031C v_mac_f32_e32 v3, s30, v15 ; 3E061E1E v_mov_b32_e32 v32, v29 ; 7E40031D v_cmp_lt_f32_e64 s[2:3], 0, s13 ; D0020002 00001A80 image_sample v[21:24], 15, 0, 0, 0, 0, 0, 0, 0, v[16:17], s[36:43], s[20:23] ; F0800F00 00A91510 image_sample v[16:19], 15, 0, 0, 0, 0, 0, 0, 0, v[18:19], s[44:51], s[16:19] ; F0800F00 008B1012 s_waitcnt vmcnt(0) ; BF8C0770 s_and_saveexec_b64 s[14:15], s[2:3] ; BE8E2402 s_xor_b64 s[14:15], exec, s[14:15] ; 898E0E7E s_cbranch_execz BB0_2 ; BF880000 s_buffer_load_dword s1, s[8:11], 0x20 ; C2008920 s_buffer_load_dword s2, s[8:11], 0x21 ; C2010921 s_buffer_load_dword s3, s[8:11], 0x22 ; C2018922 s_buffer_load_dword s13, s[8:11], 0x24 ; C2068924 s_buffer_load_dword s16, s[8:11], 0x25 ; C2080925 v_mul_f32_e32 v17, v27, v27 ; 1022371B v_mac_f32_e32 v17, v28, v28 ; 3E22391C v_mac_f32_e32 v17, v29, v29 ; 3E223B1D v_rsq_clamp_f32_e32 v17, v17 ; 7E225911 s_buffer_load_dword s17, s[8:11], 0x26 ; C2088926 s_buffer_load_dword s18, s[8:11], 0x28 ; C2090928 s_buffer_load_dword s19, s[8:11], 0x29 ; C2098929 s_buffer_load_dword s20, s[8:11], 0x2a ; C20A092A v_mul_f32_e32 v18, v17, v27 ; 10243711 v_mul_f32_e32 v19, v17, v28 ; 10263911 v_mul_f32_e32 v17, v17, v29 ; 10223B11 v_rcp_f32_e32 v21, v18 ; 7E2A5512 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v23, s1, v25 ; 082E3201 v_sub_f32_e32 v24, s2, v20 ; 08302802 v_rcp_f32_e32 v30, v19 ; 7E3C5513 v_mul_f32_e32 v23, v21, v23 ; 102E2F15 v_sub_f32_e32 v31, s13, v25 ; 083E320D v_mul_f32_e32 v21, v21, v31 ; 102A3F15 v_cmp_lt_f32_e32 vcc, 0, v18 ; 7C022480 v_cndmask_b32_e32 v21, v21, v23 ; 002A2F15 v_rcp_f32_e32 v23, v17 ; 7E2E5511 v_mul_f32_e32 v24, v30, v24 ; 1030311E v_sub_f32_e32 v31, s16, v20 ; 083E2810 v_mul_f32_e32 v30, v30, v31 ; 103C3F1E v_cmp_lt_f32_e32 vcc, 0, v19 ; 7C022680 v_cndmask_b32_e32 v24, v30, v24 ; 0030311E v_sub_f32_e32 v30, s3, v26 ; 083C3403 v_mul_f32_e32 v30, v23, v30 ; 103C3D17 v_sub_f32_e32 v31, s17, v26 ; 083E3411 v_mul_f32_e32 v23, v23, v31 ; 102E3F17 v_cmp_lt_f32_e32 vcc, 0, v17 ; 7C022280 v_cndmask_b32_e32 v23, v23, v30 ; 002E3D17 v_min3_f32 v21, v21, v24, v23 ; D2A20015 045E3115 v_mov_b32_e32 v23, s13 ; 7E2E020D v_add_f32_e32 v23, s1, v23 ; 062E2E01 v_mov_b32_e32 v24, s16 ; 7E300210 v_add_f32_e32 v24, s2, v24 ; 06303002 v_mov_b32_e32 v30, s17 ; 7E3C0211 v_add_f32_e32 v32, s3, v30 ; 06403C03 v_mad_f32 v30, 0.5, v23, -s18 ; D282001E 804A2EF0 v_add_f32_e32 v30, v25, v30 ; 063C3D19 v_mac_f32_e32 v30, v21, v18 ; 3E3C2515 v_mad_f32 v18, 0.5, v24, -s19 ; D2820012 804E30F0 v_add_f32_e32 v18, v20, v18 ; 06242514 v_mac_f32_e32 v18, v21, v19 ; 3E242715 v_mad_f32 v19, 0.5, v32, -s20 ; D2820013 805240F0 v_add_f32_e32 v19, v26, v19 ; 0626271A v_mac_f32_e32 v19, v21, v17 ; 3E262315 v_mad_f32 v30, 0.5, -v23, v30 ; D282001E 447A2EF0 v_mad_f32 v31, 0.5, -v24, v18 ; D282001F 444A30F0 v_mad_f32 v32, 0.5, -v32, v19 ; D2820020 444E40F0 s_or_b64 exec, exec, s[14:15] ; 88FE0E7E s_buffer_load_dword s27, s[8:11], 0x17 ; C20D8917 s_buffer_load_dword s28, s[8:11], 0x43 ; C20E0943 s_buffer_load_dword s26, s[8:11], 0x44 ; C20D0944 s_buffer_load_dword s19, s[8:11], 0x45 ; C2098945 s_buffer_load_dword s16, s[8:11], 0x46 ; C2080946 s_buffer_load_dword s2, s[8:11], 0x0 ; C2010900 s_buffer_load_dword s3, s[8:11], 0x1 ; C2018901 s_buffer_load_dword s1, s[8:11], 0x2 ; C2008902 s_buffer_load_dword s13, s[8:11], 0x4 ; C2068904 s_buffer_load_dword s14, s[8:11], 0x5 ; C2070905 s_buffer_load_dword s15, s[8:11], 0x6 ; C2078906 s_buffer_load_dword s17, s[8:11], 0x7 ; C2088907 s_buffer_load_dword s18, s[8:11], 0x8 ; C2090908 s_buffer_load_dword s20, s[8:11], 0x9 ; C20A0909 s_buffer_load_dword s21, s[8:11], 0xa ; C20A890A s_buffer_load_dword s22, s[8:11], 0xb ; C20B090B s_buffer_load_dword s23, s[8:11], 0xc ; C20B890C s_buffer_load_dword s24, s[8:11], 0xd ; C20C090D s_buffer_load_dword s25, s[8:11], 0xe ; C20C890E v_sub_f32_e64 v17, 1.0, s0 ; D2080011 000000F2 v_log_f32_e32 v17, v17 ; 7E224F11 v_mul_legacy_f32_e32 v17, 0x3f400000, v17 ; 0E2222FF 3F400000 v_exp_f32_e32 v17, v17 ; 7E224B11 v_mul_f32_e32 v33, 0x40e00000, v17 ; 104222FF 40E00000 v_cubeid_f32 v37, v30, v31, v32 ; D2880025 04823F1E v_cubema_f32 v36, v30, v31, v32 ; D28E0024 04823F1E s_load_dwordx4 s[36:39], s[4:5], 0x0 ; C0920500 s_load_dwordx8 s[40:47], s[6:7], 0x0 ; C0D40700 v_cubesc_f32 v35, v30, v31, v32 ; D28A0023 04823F1E v_cubetc_f32 v34, v30, v31, v32 ; D28C0022 04823F1E v_rcp_f32_e64 v17, |v36| ; D3540111 00000124 v_mov_b32_e32 v30, 0x3fc00000 ; 7E3C02FF 3FC00000 v_mad_f32 v31, v17, v34, v30 ; D282001F 047A4511 v_mac_f32_e32 v30, v17, v35 ; 3E3C4711 v_mov_b32_e32 v32, v37 ; 7E400325 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[30:33], 15, 0, 0, 0, 0, 0, 0, 0, v[30:33], s[40:47], s[36:39] ; F0900F00 012A1E1E s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v17, v33 ; 7E224F21 s_buffer_load_dword s29, s[8:11], 0xf ; C20E890F s_buffer_load_dword s31, s[8:11], 0x64 ; C20F8964 v_mul_legacy_f32_e32 v17, s33, v17 ; 0E222221 v_exp_f32_e32 v17, v17 ; 7E224B11 v_mul_f32_e32 v17, s32, v17 ; 10222220 v_mul_f32_e32 v19, v30, v17 ; 1026231E v_mul_f32_e32 v18, v31, v17 ; 1024231F v_mul_f32_e32 v17, v32, v17 ; 10222320 v_mov_b32_e32 v21, s30 ; 7E2A021E v_mov_b32_e32 v23, 0x3f7fff58 ; 7E2E02FF 3F7FFF58 v_cmp_lt_f32_e32 vcc, s12, v23 ; 7C022E0C s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[32:33], vcc ; BEA0246A s_xor_b64 s[32:33], exec, s[32:33] ; 89A0207E s_cbranch_execz BB0_6 ; BF880000 s_buffer_load_dword s35, s[8:11], 0x3b ; C211893B s_buffer_load_dword s30, s[8:11], 0x3c ; C20F093C s_buffer_load_dword s34, s[8:11], 0x3d ; C211093D s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_lt_f32_e64 s[36:37], 0, s35 ; D0020024 00004680 s_and_saveexec_b64 s[36:37], s[36:37] ; BEA42424 s_xor_b64 s[36:37], exec, s[36:37] ; 89A4247E s_cbranch_execz BB0_7 ; BF880000 s_buffer_load_dword s35, s[8:11], 0x36 ; C2118936 s_buffer_load_dword s38, s[8:11], 0x38 ; C2130938 s_buffer_load_dword s39, s[8:11], 0x39 ; C2138939 s_buffer_load_dword s40, s[8:11], 0x3a ; C214093A s_buffer_load_dword s41, s[8:11], 0x30 ; C2148930 s_buffer_load_dword s42, s[8:11], 0x31 ; C2150931 s_buffer_load_dword s43, s[8:11], 0x32 ; C2158932 s_buffer_load_dword s44, s[8:11], 0x34 ; C2160934 s_buffer_load_dword s45, s[8:11], 0x35 ; C2168935 v_mul_f32_e32 v23, v27, v27 ; 102E371B v_mac_f32_e32 v23, v28, v28 ; 3E2E391C v_mac_f32_e32 v23, v29, v29 ; 3E2E3B1D v_rsq_clamp_f32_e32 v23, v23 ; 7E2E5917 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v24, s35, v26 ; 08303423 v_mov_b32_e32 v30, s35 ; 7E3C0223 v_sub_f32_e32 v31, s41, v25 ; 083E3229 v_sub_f32_e32 v32, s42, v20 ; 0840282A v_add_f32_e32 v30, s43, v30 ; 063C3C2B v_sub_f32_e32 v33, s43, v26 ; 0842342B v_mad_f32 v34, 0.5, v30, -s40 ; D2820022 80A23CF0 v_add_f32_e32 v26, v26, v34 ; 0634451A v_mul_f32_e32 v27, v23, v27 ; 10363717 v_mul_f32_e32 v28, v23, v28 ; 10383917 v_mul_f32_e32 v23, v23, v29 ; 102E3B17 v_rcp_f32_e32 v29, v27 ; 7E3A551B v_rcp_f32_e32 v34, v28 ; 7E44551C v_rcp_f32_e32 v35, v23 ; 7E465517 v_sub_f32_e32 v36, s44, v25 ; 0848322C v_mov_b32_e32 v37, s44 ; 7E4A022C v_add_f32_e32 v37, s41, v37 ; 064A4A29 v_mul_f32_e32 v31, v29, v31 ; 103E3F1D v_mul_f32_e32 v29, v29, v36 ; 103A491D v_mul_f32_e32 v32, v34, v32 ; 10404122 v_mul_f32_e32 v33, v35, v33 ; 10424323 v_mul_f32_e32 v24, v35, v24 ; 10303123 v_mad_f32 v35, 0.5, v37, -s38 ; D2820023 809A4AF0 v_add_f32_e32 v25, v25, v35 ; 06324719 v_sub_f32_e32 v35, s45, v20 ; 0846282D v_mov_b32_e32 v36, s45 ; 7E48022D v_mul_f32_e32 v34, v34, v35 ; 10444722 v_add_f32_e32 v35, s42, v36 ; 0646482A v_cmp_lt_f32_e32 vcc, 0, v27 ; 7C023680 v_cndmask_b32_e32 v29, v29, v31 ; 003A3F1D v_cmp_lt_f32_e32 vcc, 0, v28 ; 7C023880 v_cndmask_b32_e32 v31, v34, v32 ; 003E4122 v_cmp_lt_f32_e32 vcc, 0, v23 ; 7C022E80 v_cndmask_b32_e32 v24, v24, v33 ; 00304318 v_min3_f32 v24, v29, v31, v24 ; D2A20018 04623F1D v_mad_f32 v29, 0.5, v35, -s39 ; D282001D 809E46F0 v_add_f32_e32 v20, v20, v29 ; 06283B14 v_mac_f32_e32 v25, v24, v27 ; 3E323718 v_mac_f32_e32 v20, v24, v28 ; 3E283918 v_mac_f32_e32 v26, v24, v23 ; 3E342F18 v_mad_f32 v27, 0.5, -v37, v25 ; D282001B 44664AF0 v_mad_f32 v28, 0.5, -v35, v20 ; D282001C 445246F0 v_mad_f32 v29, 0.5, -v30, v26 ; D282001D 446A3CF0 s_or_b64 exec, exec, s[36:37] ; 88FE247E v_sub_f32_e64 v20, 1.0, s0 ; D2080014 000000F2 v_log_f32_e32 v20, v20 ; 7E284F14 s_load_dwordx4 s[36:39], s[4:5], 0x4 ; C0920504 v_mul_legacy_f32_e32 v20, 0x3f400000, v20 ; 0E2828FF 3F400000 v_exp_f32_e32 v20, v20 ; 7E284B14 v_mul_f32_e32 v30, 0x40e00000, v20 ; 103C28FF 40E00000 v_cubeid_f32 v26, v27, v28, v29 ; D288001A 0476391B v_cubema_f32 v25, v27, v28, v29 ; D28E0019 0476391B s_load_dwordx8 s[40:47], s[6:7], 0x8 ; C0D40708 v_cubesc_f32 v24, v27, v28, v29 ; D28A0018 0476391B v_cubetc_f32 v23, v27, v28, v29 ; D28C0017 0476391B v_rcp_f32_e64 v20, |v25| ; D3540114 00000119 v_mov_b32_e32 v27, 0x3fc00000 ; 7E3602FF 3FC00000 v_mad_f32 v28, v20, v23, v27 ; D282001C 046E2F14 v_mac_f32_e32 v27, v20, v24 ; 3E363114 v_mov_b32_e32 v29, v26 ; 7E3A031A s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[23:26], 15, 0, 0, 0, 0, 0, 0, 0, v[27:30], s[40:47], s[36:39] ; F0900F00 012A171B s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v20, v26 ; 7E284F1A v_sub_f32_e64 v26, 1.0, s12 ; D208001A 000018F2 v_mul_legacy_f32_e32 v20, s34, v20 ; 0E282822 v_exp_f32_e32 v20, v20 ; 7E284B14 v_mul_f32_e32 v20, s30, v20 ; 1028281E v_mul_f32_e32 v23, v23, v20 ; 102E2917 v_mul_f32_e32 v24, v24, v20 ; 10302918 v_mul_f32_e32 v20, v25, v20 ; 10282919 v_mul_f32_e32 v23, v23, v26 ; 102E3517 v_mul_f32_e32 v24, v24, v26 ; 10303518 v_mul_f32_e32 v20, v20, v26 ; 10283514 v_mac_f32_e32 v23, s12, v19 ; 3E2E260C v_mac_f32_e32 v24, s12, v18 ; 3E30240C v_mac_f32_e32 v20, s12, v17 ; 3E28220C v_mov_b32_e32 v17, v20 ; 7E220314 v_mov_b32_e32 v18, v24 ; 7E240318 v_mov_b32_e32 v19, v23 ; 7E260317 s_or_b64 exec, exec, s[32:33] ; 88FE207E v_mad_f32 v24, -v21, s28, s28 ; D2820018 20703915 v_mov_b32_e32 v20, s27 ; 7E28021B v_mul_f32_e32 v21, v24, v13 ; 102A1B18 v_mul_f32_e32 v14, v24, v14 ; 101C1D18 v_mul_f32_e32 v13, v24, v15 ; 101A1F18 v_mul_f32_e32 v15, s26, v16 ; 101E201A v_sub_f32_e64 v25, 1.0, s31 ; D2080019 00003EF2 v_mac_f32_e32 v25, s31, v22 ; 3E322C1F v_mul_f32_e32 v22, s19, v16 ; 102C2013 v_mul_f32_e32 v16, s16, v16 ; 10202010 s_buffer_load_dword s6, s[8:11], 0x10 ; C2030910 s_buffer_load_dword s5, s[8:11], 0x11 ; C2028911 s_buffer_load_dword s4, s[8:11], 0x12 ; C2020912 s_buffer_load_dword s16, s[8:11], 0x16 ; C2080916 s_buffer_load_dword s7, s[8:11], 0x48 ; C2038948 s_buffer_load_dword s12, s[8:11], 0x49 ; C2060949 s_buffer_load_dword s8, s[8:11], 0x4b ; C204094B v_mul_f32_e32 v23, s14, v1 ; 102E020E v_mac_f32_e32 v23, s13, v2 ; 3E2E040D v_mac_f32_e32 v23, s15, v0 ; 3E2E000F v_add_f32_e32 v23, s17, v23 ; 062E2E11 v_mul_f32_e32 v26, s20, v1 ; 10340214 v_mac_f32_e32 v26, s18, v2 ; 3E340412 v_mac_f32_e32 v26, s21, v0 ; 3E340015 v_add_f32_e32 v26, s22, v26 ; 06343416 v_mul_f32_e32 v27, s24, v1 ; 10360218 v_mac_f32_e32 v27, s23, v2 ; 3E360417 v_mac_f32_e32 v27, s25, v0 ; 3E360019 v_add_f32_e32 v27, s29, v27 ; 0636361D v_add_f32_e32 v5, v23, v5 ; 060A0B17 v_add_f32_e32 v7, v26, v7 ; 060E0F1A v_add_f32_e32 v26, v27, v8 ; 0634111B v_mul_f32_e32 v8, s2, v2 ; 10100402 v_mac_f32_e32 v8, s3, v1 ; 3E100203 v_mac_f32_e32 v8, s1, v0 ; 3E100001 v_max_f32_e32 v23, 0, v8 ; 202E1080 v_mul_f32_e32 v8, v25, v5 ; 10100B19 v_mul_f32_e32 v5, v25, v7 ; 100A0F19 v_mul_f32_e32 v7, v25, v26 ; 100E3519 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v20, s16, v9 ; 3E281210 v_mul_f32_e32 v9, v25, v19 ; 10122719 v_mul_f32_e32 v18, v25, v18 ; 10242519 v_mul_f32_e32 v17, v25, v17 ; 10222319 v_sub_f32_e32 v19, 1.0, v24 ; 082630F2 v_add_f32_e32 v19, s0, v19 ; 06262600 v_add_f32_e64 v19, 0, v19 clamp ; D2060813 00022680 v_sub_f32_e32 v24, s2, v11 ; 08301602 v_sub_f32_e32 v25, s3, v10 ; 08321403 v_mul_f32_e32 v26, v24, v24 ; 10343118 v_mac_f32_e32 v26, v25, v25 ; 3E343319 v_sub_f32_e32 v27, s1, v12 ; 08361801 v_mac_f32_e32 v26, v27, v27 ; 3E34371B v_rsq_clamp_f32_e32 v26, v26 ; 7E34591A v_mul_f32_e32 v24, v26, v24 ; 1030311A v_mul_f32_e32 v25, v26, v25 ; 1032331A v_mul_f32_e32 v26, v26, v27 ; 1034371A v_mul_f32_e32 v11, v11, v2 ; 1016050B v_mad_f32 v10, -v10, v1, -v11 ; D282000A A42E030A v_mad_f32 v10, -v12, v0, v10 ; D282000A 242A010C v_mul_f32_e32 v2, v24, v2 ; 10040518 v_mac_f32_e32 v2, v25, v1 ; 3E040319 v_mul_f32_e32 v1, s2, v24 ; 10023002 v_mac_f32_e32 v1, s3, v25 ; 3E023203 v_mac_f32_e32 v2, v26, v0 ; 3E04011A v_mac_f32_e32 v1, s1, v26 ; 3E023401 v_max_f32_e32 v0, 0, v1 ; 20000280 v_sub_f32_e32 v1, 1.0, v0 ; 080200F2 v_mul_f32_e32 v11, v1, v1 ; 10160301 v_mul_f32_e32 v1, v1, v11 ; 10021701 v_mul_f32_e32 v1, v1, v11 ; 10021701 v_max_f32_e32 v10, 0, v10 ; 20141480 v_sub_f32_e32 v11, 1.0, v10 ; 081614F2 v_mul_f32_e32 v12, v11, v11 ; 1018170B v_mul_f32_e32 v24, v11, v12 ; 1030190B v_mad_f32 v25, -v12, v24, 1.0 ; D2820019 23CA310C v_mul_f32_e32 v26, v6, v25 ; 10343306 v_sub_f32_e32 v27, 1.0, v6 ; 08360CF2 v_mac_f32_e32 v6, v1, v27 ; 3E0C3701 v_mul_f32_e32 v27, v4, v25 ; 10363304 v_sub_f32_e32 v28, 1.0, v4 ; 083808F2 v_mac_f32_e32 v4, v1, v28 ; 3E083901 v_mul_f32_e32 v25, v3, v25 ; 10323303 v_sub_f32_e32 v28, 1.0, v3 ; 083806F2 v_mac_f32_e32 v3, v1, v28 ; 3E063901 v_sub_f32_e64 v1, 1.0, s0 ; D2080001 000000F2 v_sub_f32_e32 v28, 1.0, v1 ; 083802F2 v_mov_b32_e32 v29, 0x3cf5c28f ; 7E3A02FF 3CF5C28F v_madmk_f32_e32 v28, v28, v29, 0x3f77ced9 ; 40383B1C 3F77CED9 v_add_f32_e32 v29, v0, v0 ; 063A0100 v_mul_f32_e32 v0, v1, v0 ; 10000101 v_mad_f32 v0, v29, v0, 0.5 ; D2820000 03C2011D v_mul_f32_e32 v12, v24, v12 ; 10181918 v_mac_f32_e32 v26, v19, v12 ; 3E341913 v_mac_f32_e32 v27, v19, v12 ; 3E361913 v_mac_f32_e32 v25, v19, v12 ; 3E321913 v_mul_f32_e32 v1, v1, v1 ; 10020301 v_log_f32_e32 v19, v28 ; 7E264F1C v_mul_f32_e32 v1, s8, v1 ; 10020208 v_mul_f32_e32 v11, v1, v11 ; 10161701 v_mac_f32_e32 v11, 1.0, v10 ; 3E1614F2 v_rcp_f32_e32 v10, v19 ; 7E145513 v_sub_f32_e32 v19, 1.0, v23 ; 08262EF2 v_mul_f32_e32 v1, v1, v19 ; 10022701 v_mac_f32_e32 v1, 1.0, v23 ; 3E022EF2 v_max_f32_e32 v2, 0, v2 ; 20040480 v_log_f32_e32 v2, v2 ; 7E044F02 v_madak_f32_e32 v1, v1, v11, 0x38d1b717 ; 42021701 38D1B717 v_mul_f32_e32 v10, 0x41200000, v10 ; 101414FF 41200000 v_mul_f32_e32 v11, v10, v10 ; 1016150A v_mul_legacy_f32_e32 v2, v11, v2 ; 0E04050B v_rcp_f32_e32 v1, v1 ; 7E025501 v_mad_f32 v10, v10, v10, 1.0 ; D282000A 03CA150A v_mul_f32_e32 v10, s12, v10 ; 1014140C v_exp_f32_e32 v2, v2 ; 7E044B02 v_mul_f32_e32 v2, v10, v2 ; 1004050A v_mul_f32_e32 v1, v2, v1 ; 10020302 v_mul_f32_e32 v2, v19, v19 ; 10042713 v_mul_f32_e32 v10, v19, v2 ; 10140513 v_mul_f32_e32 v2, v10, v2 ; 1004050A v_add_f32_e32 v0, -1.0, v0 ; 060000F3 v_mad_f32 v2, v0, v2, 1.0 ; D2820002 03CA0500 v_mad_f32 v0, v0, v12, 1.0 ; D2820000 03CA1900 v_mul_f32_e32 v0, v0, v2 ; 10000500 v_mul_f32_e32 v1, v23, v1 ; 10020317 v_mul_f32_e32 v1, s7, v1 ; 10020207 v_mul_f32_e32 v0, v23, v0 ; 10000117 v_mac_f32_e32 v8, v0, v15 ; 3E101F00 v_mul_f32_e32 v2, v8, v21 ; 10042B08 v_max_f32_e32 v1, 0, v1 ; 20020280 v_mul_f32_e32 v8, v15, v1 ; 1010030F v_mac_f32_e32 v2, v6, v8 ; 3E041106 v_mac_f32_e32 v5, v0, v22 ; 3E0A2D00 v_mac_f32_e32 v7, v0, v16 ; 3E0E2100 v_mul_f32_e32 v0, v22, v1 ; 10000316 v_mul_f32_e32 v1, v16, v1 ; 10020310 v_mul_f32_e32 v5, v5, v14 ; 100A1D05 v_mul_f32_e32 v6, v7, v13 ; 100C1B07 v_mac_f32_e32 v5, v4, v0 ; 3E0A0104 v_mac_f32_e32 v6, v3, v1 ; 3E0C0303 v_mac_f32_e32 v2, v26, v9 ; 3E04131A v_mac_f32_e32 v5, v27, v18 ; 3E0A251B v_mac_f32_e32 v6, v25, v17 ; 3E0C2319 v_add_f32_e64 v0, 0, v20 clamp ; D2060800 00022880 v_sub_f32_e32 v1, 1.0, v0 ; 080200F2 v_mul_f32_e32 v3, s6, v1 ; 10060206 v_mac_f32_e32 v3, v2, v0 ; 3E060102 v_mul_f32_e32 v2, s5, v1 ; 10040205 v_mac_f32_e32 v2, v5, v0 ; 3E040105 v_mul_f32_e32 v1, s4, v1 ; 10020204 v_mac_f32_e32 v1, v6, v0 ; 3E020106 v_cvt_pkrtz_f16_f32_e32 v0, v3, v2 ; 5E000503 v_cvt_pkrtz_f16_f32_e64 v1, v1, 1.0 ; D25E0001 0001E501 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 56 VGPRS: 40 Code Size: 2264 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL OUT[5], GENERIC[4] DCL OUT[6], GENERIC[5] DCL OUT[7], GENERIC[6] DCL CONST[0..19] DCL TEMP[0..9], LOCAL IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MUL TEMP[0], CONST[5], IN[0].xxxx 1: MAD TEMP[0], CONST[6], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[7], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0].xyz, CONST[8], IN[0].wwww, TEMP[0] 4: MUL TEMP[1], CONST[16], IN[0].xxxx 5: MAD TEMP[1], CONST[17], IN[0].yyyy, TEMP[1] 6: MAD TEMP[1], CONST[18], IN[0].zzzz, TEMP[1] 7: MAD TEMP[1], CONST[19], IN[0].wwww, TEMP[1] 8: MAD TEMP[2].xy, IN[2].xyyy, CONST[13].xyyy, CONST[13].zwww 9: FSEQ TEMP[3].x, CONST[15].xxxx, IMM[0].xxxx 10: UIF TEMP[3].xxxx :0 11: MOV TEMP[3].xy, IN[2].xyxx 12: ELSE :0 13: MOV TEMP[3].xy, IN[3].xyxx 14: ENDIF 15: MAD TEMP[3].xy, TEMP[3].xyyy, CONST[14].xyyy, CONST[14].zwww 16: MOV TEMP[2].zw, TEMP[3].yyxy 17: MOV TEMP[3].x, CONST[9].xxxx 18: MOV TEMP[3].y, CONST[10].xxxx 19: MOV TEMP[3].z, CONST[11].xxxx 20: MOV TEMP[4].x, CONST[9].yyyy 21: MOV TEMP[4].y, CONST[10].yyyy 22: MOV TEMP[4].z, CONST[11].yyyy 23: MOV TEMP[5].x, CONST[9].zzzz 24: MOV TEMP[5].y, CONST[10].zzzz 25: MOV TEMP[5].z, CONST[11].zzzz 26: MUL TEMP[3].xyz, TEMP[3].xyzz, IN[1].xxxx 27: MAD TEMP[3].xyz, TEMP[4].xyzz, IN[1].yyyy, TEMP[3].xyzz 28: MAD TEMP[3].xyz, TEMP[5].xyzz, IN[1].zzzz, TEMP[3].xyzz 29: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz 30: RSQ TEMP[4].x, TEMP[4].xxxx 31: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx 32: MUL TEMP[4].xyz, CONST[5].xyzz, IN[4].xxxx 33: MAD TEMP[4].xyz, CONST[6].xyzz, IN[4].yyyy, TEMP[4].xyzz 34: MAD TEMP[4].xyz, CONST[7].xyzz, IN[4].zzzz, TEMP[4].xyzz 35: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz 36: RSQ TEMP[5].x, TEMP[5].xxxx 37: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx 38: MUL TEMP[5].xyz, TEMP[3].zxyy, TEMP[4].yzxx 39: MAD TEMP[5].xyz, TEMP[3].yzxx, TEMP[4].zxyy, -TEMP[5].xyzz 40: MUL TEMP[5].xyz, TEMP[5].xyzz, IN[4].wwww 41: MOV TEMP[4].xyz, TEMP[4].xyzx 42: MOV TEMP[5].xyz, TEMP[5].xyzx 43: MOV TEMP[6].xyz, TEMP[3].xyzx 44: MUL TEMP[7], TEMP[3].xyzz, TEMP[3].yzzx 45: DP4 TEMP[8].x, CONST[1], TEMP[7] 46: DP4 TEMP[9].x, CONST[2], TEMP[7] 47: MOV TEMP[8].y, TEMP[9].xxxx 48: DP4 TEMP[7].x, CONST[3], TEMP[7] 49: MOV TEMP[8].z, TEMP[7].xxxx 50: MUL TEMP[7].x, TEMP[3].yyyy, TEMP[3].yyyy 51: MAD TEMP[3].x, TEMP[3].xxxx, TEMP[3].xxxx, -TEMP[7].xxxx 52: MAD TEMP[3].xyz, CONST[4].xyzz, TEMP[3].xxxx, TEMP[8].xyzz 53: ADD TEMP[7].xyz, TEMP[0].xyzz, -CONST[0].xyzz 54: MOV TEMP[7].yzw, TEMP[7].yxyz 55: MOV TEMP[7].x, TEMP[1].zzzz 56: MOV TEMP[0].xyz, TEMP[0].xyzx 57: MOV OUT[7], TEMP[0] 58: MOV OUT[1], TEMP[2] 59: MOV OUT[3], TEMP[5] 60: MOV OUT[2], TEMP[4] 61: MOV OUT[4], TEMP[6] 62: MOV OUT[5], TEMP[3] 63: MOV OUT[0], TEMP[1] 64: MOV OUT[6], TEMP[7] 65: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236) %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240) %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256) %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260) %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264) %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 268) %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272) %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276) %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280) %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284) %72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288) %73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292) %74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296) %75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300) %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304) %77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308) %78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312) %79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316) %80 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %81 = load <16 x i8>, <16 x i8> addrspace(2)* %80, align 16, !tbaa !0 %82 = add i32 %5, %7 %83 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %81, i32 0, i32 %82) %84 = extractelement <4 x float> %83, i32 0 %85 = extractelement <4 x float> %83, i32 1 %86 = extractelement <4 x float> %83, i32 2 %87 = extractelement <4 x float> %83, i32 3 %88 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %89 = load <16 x i8>, <16 x i8> addrspace(2)* %88, align 16, !tbaa !0 %90 = add i32 %5, %7 %91 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %89, i32 0, i32 %90) %92 = extractelement <4 x float> %91, i32 0 %93 = extractelement <4 x float> %91, i32 1 %94 = extractelement <4 x float> %91, i32 2 %95 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %96 = load <16 x i8>, <16 x i8> addrspace(2)* %95, align 16, !tbaa !0 %97 = add i32 %5, %7 %98 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %96, i32 0, i32 %97) %99 = extractelement <4 x float> %98, i32 0 %100 = extractelement <4 x float> %98, i32 1 %101 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %102 = load <16 x i8>, <16 x i8> addrspace(2)* %101, align 16, !tbaa !0 %103 = add i32 %5, %7 %104 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %102, i32 0, i32 %103) %105 = extractelement <4 x float> %104, i32 0 %106 = extractelement <4 x float> %104, i32 1 %107 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4 %108 = load <16 x i8>, <16 x i8> addrspace(2)* %107, align 16, !tbaa !0 %109 = add i32 %5, %7 %110 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %108, i32 0, i32 %109) %111 = extractelement <4 x float> %110, i32 0 %112 = extractelement <4 x float> %110, i32 1 %113 = extractelement <4 x float> %110, i32 2 %114 = extractelement <4 x float> %110, i32 3 %115 = fmul float %31, %84 %116 = fmul float %32, %84 %117 = fmul float %33, %84 %118 = fmul float %34, %84 %119 = fmul float %35, %85 %120 = fadd float %119, %115 %121 = fmul float %36, %85 %122 = fadd float %121, %116 %123 = fmul float %37, %85 %124 = fadd float %123, %117 %125 = fmul float %38, %85 %126 = fadd float %125, %118 %127 = fmul float %39, %86 %128 = fadd float %127, %120 %129 = fmul float %40, %86 %130 = fadd float %129, %122 %131 = fmul float %41, %86 %132 = fadd float %131, %124 %133 = fmul float %42, %86 %134 = fadd float %133, %126 %135 = fmul float %43, %87 %136 = fadd float %135, %128 %137 = fmul float %44, %87 %138 = fadd float %137, %130 %139 = fmul float %45, %87 %140 = fadd float %139, %132 %141 = fmul float %64, %84 %142 = fmul float %65, %84 %143 = fmul float %66, %84 %144 = fmul float %67, %84 %145 = fmul float %68, %85 %146 = fadd float %145, %141 %147 = fmul float %69, %85 %148 = fadd float %147, %142 %149 = fmul float %70, %85 %150 = fadd float %149, %143 %151 = fmul float %71, %85 %152 = fadd float %151, %144 %153 = fmul float %72, %86 %154 = fadd float %153, %146 %155 = fmul float %73, %86 %156 = fadd float %155, %148 %157 = fmul float %74, %86 %158 = fadd float %157, %150 %159 = fmul float %75, %86 %160 = fadd float %159, %152 %161 = fmul float %76, %87 %162 = fadd float %161, %154 %163 = fmul float %77, %87 %164 = fadd float %163, %156 %165 = fmul float %78, %87 %166 = fadd float %165, %158 %167 = fmul float %79, %87 %168 = fadd float %167, %160 %169 = fmul float %99, %55 %170 = fadd float %169, %57 %171 = fmul float %100, %56 %172 = fadd float %171, %58 %173 = fcmp oeq float %63, 0.000000e+00 %. = select i1 %173, float %99, float %105 %.40 = select i1 %173, float %100, float %106 %174 = fmul float %., %59 %175 = fadd float %174, %61 %176 = fmul float %.40, %60 %177 = fadd float %176, %62 %178 = fmul float %46, %92 %179 = fmul float %49, %92 %180 = fmul float %52, %92 %181 = fmul float %47, %93 %182 = fadd float %181, %178 %183 = fmul float %50, %93 %184 = fadd float %183, %179 %185 = fmul float %53, %93 %186 = fadd float %185, %180 %187 = fmul float %48, %94 %188 = fadd float %187, %182 %189 = fmul float %51, %94 %190 = fadd float %189, %184 %191 = fmul float %54, %94 %192 = fadd float %191, %186 %193 = fmul float %188, %188 %194 = fmul float %190, %190 %195 = fadd float %194, %193 %196 = fmul float %192, %192 %197 = fadd float %195, %196 %198 = call float @llvm.AMDGPU.rsq.clamped.f32(float %197) %199 = fmul float %188, %198 %200 = fmul float %190, %198 %201 = fmul float %192, %198 %202 = fmul float %31, %111 %203 = fmul float %32, %111 %204 = fmul float %33, %111 %205 = fmul float %35, %112 %206 = fadd float %205, %202 %207 = fmul float %36, %112 %208 = fadd float %207, %203 %209 = fmul float %37, %112 %210 = fadd float %209, %204 %211 = fmul float %39, %113 %212 = fadd float %211, %206 %213 = fmul float %40, %113 %214 = fadd float %213, %208 %215 = fmul float %41, %113 %216 = fadd float %215, %210 %217 = fmul float %212, %212 %218 = fmul float %214, %214 %219 = fadd float %218, %217 %220 = fmul float %216, %216 %221 = fadd float %219, %220 %222 = call float @llvm.AMDGPU.rsq.clamped.f32(float %221) %223 = fmul float %212, %222 %224 = fmul float %214, %222 %225 = fmul float %216, %222 %226 = fmul float %201, %224 %227 = fmul float %199, %225 %228 = fmul float %200, %223 %229 = fmul float %200, %225 %230 = fsub float %229, %226 %231 = fmul float %201, %223 %232 = fsub float %231, %227 %233 = fmul float %199, %224 %234 = fsub float %233, %228 %235 = fmul float %230, %114 %236 = fmul float %232, %114 %237 = fmul float %234, %114 %238 = fmul float %199, %200 %239 = fmul float %200, %201 %240 = fmul float %201, %201 %241 = fmul float %201, %199 %242 = fmul float %16, %238 %243 = fmul float %17, %239 %244 = fadd float %242, %243 %245 = fmul float %18, %240 %246 = fadd float %244, %245 %247 = fmul float %19, %241 %248 = fadd float %246, %247 %249 = fmul float %20, %238 %250 = fmul float %21, %239 %251 = fadd float %249, %250 %252 = fmul float %22, %240 %253 = fadd float %251, %252 %254 = fmul float %23, %241 %255 = fadd float %253, %254 %256 = fmul float %24, %238 %257 = fmul float %25, %239 %258 = fadd float %256, %257 %259 = fmul float %26, %240 %260 = fadd float %258, %259 %261 = fmul float %27, %241 %262 = fadd float %260, %261 %263 = fmul float %200, %200 %264 = fmul float %199, %199 %265 = fsub float %264, %263 %266 = fmul float %28, %265 %267 = fadd float %266, %248 %268 = fmul float %29, %265 %269 = fadd float %268, %255 %270 = fmul float %30, %265 %271 = fadd float %270, %262 %272 = fsub float %136, %13 %273 = fsub float %138, %14 %274 = fsub float %140, %15 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %170, float %172, float %175, float %177) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %223, float %224, float %225, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %235, float %236, float %237, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %199, float %200, float %201, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %267, float %269, float %271, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %166, float %272, float %273, float %274) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %136, float %138, float %140, float %134) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %162, float %164, float %166, float %168) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0 ; 7E020280 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[28:31], s[2:3], 0x0 ; C08E0300 s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 s_load_dwordx4 s[12:15], s[8:9], 0x8 ; C0860908 s_load_dwordx4 s[16:19], s[8:9], 0xc ; C088090C s_load_dwordx4 s[8:11], s[8:9], 0x10 ; C0840910 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s27, s[28:31], 0x20 ; C20D9D20 buffer_load_format_xyzw v[2:5], v0, s[0:3], 0 idxen ; E00C2000 80000200 buffer_load_format_xyzw v[6:9], v0, s[4:7], 0 idxen ; E00C2000 80010600 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[9:12], v0, s[12:15], 0 idxen ; E00C2000 80030900 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[11:14], v0, s[16:19], 0 idxen ; E00C2000 80040B00 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[13:16], v0, s[8:11], 0 idxen ; E00C2000 80020D00 s_buffer_load_dword s32, s[28:31], 0x21 ; C2101D21 s_buffer_load_dword s33, s[28:31], 0x22 ; C2109D22 s_buffer_load_dword s34, s[28:31], 0x24 ; C2111D24 s_buffer_load_dword s35, s[28:31], 0x25 ; C2119D25 s_buffer_load_dword s1, s[28:31], 0x10 ; C2009D10 s_buffer_load_dword s2, s[28:31], 0x11 ; C2011D11 s_buffer_load_dword s0, s[28:31], 0x12 ; C2001D12 s_buffer_load_dword s18, s[28:31], 0x14 ; C2091D14 s_buffer_load_dword s17, s[28:31], 0x15 ; C2089D15 s_buffer_load_dword s36, s[28:31], 0x26 ; C2121D26 s_buffer_load_dword s37, s[28:31], 0x28 ; C2129D28 s_buffer_load_dword s38, s[28:31], 0x29 ; C2131D29 s_buffer_load_dword s39, s[28:31], 0x2a ; C2139D2A s_buffer_load_dword s40, s[28:31], 0x2c ; C2141D2C s_buffer_load_dword s20, s[28:31], 0x16 ; C20A1D16 s_buffer_load_dword s41, s[28:31], 0x17 ; C2149D17 s_buffer_load_dword s26, s[28:31], 0x18 ; C20D1D18 s_buffer_load_dword s25, s[28:31], 0x19 ; C20C9D19 s_buffer_load_dword s21, s[28:31], 0x1a ; C20A9D1A s_buffer_load_dword s42, s[28:31], 0x1b ; C2151D1B s_buffer_load_dword s23, s[28:31], 0x1c ; C20B9D1C s_buffer_load_dword s24, s[28:31], 0x1d ; C20C1D1D s_buffer_load_dword s22, s[28:31], 0x1e ; C20B1D1E s_buffer_load_dword s43, s[28:31], 0x1f ; C2159D1F s_buffer_load_dword s44, s[28:31], 0x2d ; C2161D2D s_buffer_load_dword s45, s[28:31], 0x2e ; C2169D2E s_buffer_load_dword s46, s[28:31], 0x34 ; C2171D34 s_buffer_load_dword s47, s[28:31], 0x35 ; C2179D35 s_buffer_load_dword s3, s[28:31], 0x36 ; C2019D36 s_buffer_load_dword s4, s[28:31], 0x3c ; C2021D3C s_buffer_load_dword s48, s[28:31], 0x40 ; C2181D40 s_buffer_load_dword s49, s[28:31], 0x41 ; C2189D41 s_buffer_load_dword s50, s[28:31], 0x42 ; C2191D42 s_buffer_load_dword s51, s[28:31], 0x43 ; C2199D43 s_buffer_load_dword s6, s[28:31], 0x37 ; C2031D37 s_buffer_load_dword s52, s[28:31], 0x38 ; C21A1D38 s_buffer_load_dword s53, s[28:31], 0x39 ; C21A9D39 s_buffer_load_dword s8, s[28:31], 0x3a ; C2041D3A s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v0, s3 ; 7E000203 s_buffer_load_dword s10, s[28:31], 0x3b ; C2051D3B v_cmp_eq_f32_e64 vcc, 0, s4 ; D004006A 00000880 s_buffer_load_dword s3, s[28:31], 0x0 ; C2019D00 s_buffer_load_dword s4, s[28:31], 0x1 ; C2021D01 s_buffer_load_dword s5, s[28:31], 0x2 ; C2029D02 s_buffer_load_dword s9, s[28:31], 0x4 ; C2049D04 v_mov_b32_e32 v17, s6 ; 7E220206 s_buffer_load_dword s14, s[28:31], 0x5 ; C2071D05 s_buffer_load_dword s7, s[28:31], 0x6 ; C2039D06 s_buffer_load_dword s6, s[28:31], 0x7 ; C2031D07 v_mov_b32_e32 v18, s8 ; 7E240208 s_buffer_load_dword s12, s[28:31], 0x8 ; C2061D08 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v19, s10 ; 7E26020A s_buffer_load_dword s16, s[28:31], 0x9 ; C2081D09 s_buffer_load_dword s10, s[28:31], 0xa ; C2051D0A s_buffer_load_dword s8, s[28:31], 0xb ; C2041D0B s_buffer_load_dword s15, s[28:31], 0xc ; C2079D0C s_buffer_load_dword s19, s[28:31], 0xd ; C2099D0D s_buffer_load_dword s13, s[28:31], 0xe ; C2069D0E s_buffer_load_dword s11, s[28:31], 0xf ; C2059D0F s_buffer_load_dword s54, s[28:31], 0x44 ; C21B1D44 s_buffer_load_dword s55, s[28:31], 0x45 ; C21B9D45 s_buffer_load_dword s56, s[28:31], 0x46 ; C21C1D46 s_buffer_load_dword s57, s[28:31], 0x47 ; C21C9D47 s_buffer_load_dword s58, s[28:31], 0x48 ; C21D1D48 s_buffer_load_dword s59, s[28:31], 0x49 ; C21D9D49 s_buffer_load_dword s60, s[28:31], 0x4a ; C21E1D4A s_buffer_load_dword s61, s[28:31], 0x4b ; C21E9D4B s_buffer_load_dword s62, s[28:31], 0x4c ; C21F1D4C s_buffer_load_dword s63, s[28:31], 0x4d ; C21F9D4D s_buffer_load_dword s64, s[28:31], 0x4e ; C2201D4E s_buffer_load_dword s28, s[28:31], 0x4f ; C20E1D4F v_mul_f32_e32 v20, s41, v2 ; 10280429 v_mac_f32_e32 v20, s42, v3 ; 3E28062A v_mac_f32_e32 v20, s43, v4 ; 3E28082B v_mac_f32_e32 v0, s46, v9 ; 3E00122E v_mac_f32_e32 v17, s47, v10 ; 3E22142F v_mul_f32_e32 v21, s48, v2 ; 102A0430 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v21, s54, v3 ; 3E2A0636 v_mac_f32_e32 v21, s58, v4 ; 3E2A083A v_mac_f32_e32 v21, s62, v5 ; 3E2A0A3E v_mul_f32_e32 v22, s49, v2 ; 102C0431 v_mac_f32_e32 v22, s55, v3 ; 3E2C0637 v_mac_f32_e32 v22, s59, v4 ; 3E2C083B v_mac_f32_e32 v22, s63, v5 ; 3E2C0A3F v_mul_f32_e32 v23, s50, v2 ; 102E0432 v_mac_f32_e32 v23, s56, v3 ; 3E2E0638 v_mac_f32_e32 v23, s60, v4 ; 3E2E083C v_mac_f32_e32 v23, s64, v5 ; 3E2E0A40 v_mul_f32_e32 v24, s51, v2 ; 10300433 v_mac_f32_e32 v24, s57, v3 ; 3E300639 v_mac_f32_e32 v24, s61, v4 ; 3E30083D v_mac_f32_e32 v24, s28, v5 ; 3E300A1C v_cndmask_b32_e32 v9, v11, v9 ; 0012130B v_cndmask_b32_e32 v10, v12, v10 ; 0014150C v_mul_f32_e32 v11, s34, v6 ; 10160C22 v_mac_f32_e32 v11, s35, v7 ; 3E160E23 v_mul_f32_e32 v12, s37, v6 ; 10180C25 v_mac_f32_e32 v12, s38, v7 ; 3E180E26 v_mul_f32_e32 v6, s40, v6 ; 100C0C28 v_mac_f32_e32 v6, s44, v7 ; 3E0C0E2C v_mac_f32_e32 v11, s36, v8 ; 3E161024 v_mac_f32_e32 v12, s39, v8 ; 3E181027 v_mac_f32_e32 v6, s45, v8 ; 3E0C102D v_mul_f32_e32 v7, s18, v2 ; 100E0412 v_mac_f32_e32 v7, s26, v3 ; 3E0E061A v_mac_f32_e32 v7, s23, v4 ; 3E0E0817 v_mac_f32_e32 v7, s27, v5 ; 3E0E0A1B v_mul_f32_e32 v8, s17, v2 ; 10100411 v_mac_f32_e32 v8, s25, v3 ; 3E100619 v_mac_f32_e32 v8, s24, v4 ; 3E100818 v_mac_f32_e32 v8, s32, v5 ; 3E100A20 v_mul_f32_e32 v2, s20, v2 ; 10040414 v_mac_f32_e32 v2, s21, v3 ; 3E040615 v_mac_f32_e32 v2, s22, v4 ; 3E040816 v_mac_f32_e32 v2, s33, v5 ; 3E040A21 v_mac_f32_e32 v18, s52, v9 ; 3E241234 v_mac_f32_e32 v19, s53, v10 ; 3E261435 exp 15, 32, 0, 0, 0, v0, v17, v18, v19 ; F800020F 13121100 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s18, v13 ; 10001A12 v_mac_f32_e32 v0, s26, v14 ; 3E001C1A v_mul_f32_e32 v3, s17, v13 ; 10061A11 v_mac_f32_e32 v3, s25, v14 ; 3E061C19 v_mul_f32_e32 v4, s20, v13 ; 10081A14 v_mac_f32_e32 v4, s21, v14 ; 3E081C15 v_mac_f32_e32 v0, s23, v15 ; 3E001E17 v_mac_f32_e32 v3, s24, v15 ; 3E061E18 v_mac_f32_e32 v4, s22, v15 ; 3E081E16 v_mul_f32_e32 v5, v11, v11 ; 100A170B v_mac_f32_e32 v5, v12, v12 ; 3E0A190C v_mac_f32_e32 v5, v6, v6 ; 3E0A0D06 v_rsq_clamp_f32_e32 v5, v5 ; 7E0A5905 v_mul_f32_e32 v9, v0, v0 ; 10120100 v_mac_f32_e32 v9, v3, v3 ; 3E120703 v_mac_f32_e32 v9, v4, v4 ; 3E120904 v_rsq_clamp_f32_e32 v9, v9 ; 7E125909 v_mul_f32_e32 v10, v5, v11 ; 10141705 v_mul_f32_e32 v11, v5, v12 ; 10161905 v_mul_f32_e32 v5, v5, v6 ; 100A0D05 v_mul_f32_e32 v0, v9, v0 ; 10000109 v_mul_f32_e32 v3, v9, v3 ; 10060709 v_mul_f32_e32 v4, v9, v4 ; 10080909 v_mul_f32_e32 v6, v3, v5 ; 100C0B03 v_mad_f32 v6, v11, v4, -v6 ; D2820006 841A090B v_mul_f32_e32 v9, v4, v10 ; 10121504 v_mad_f32 v9, v5, v0, -v9 ; D2820009 84260105 v_mul_f32_e32 v12, v0, v11 ; 10181700 v_mad_f32 v12, v10, v3, -v12 ; D282000C 8432070A v_mul_f32_e32 v6, v16, v6 ; 100C0D10 v_mul_f32_e32 v9, v16, v9 ; 10121310 v_mul_f32_e32 v12, v16, v12 ; 10181910 exp 15, 33, 0, 0, 0, v0, v3, v4, v1 ; F800021F 01040300 exp 15, 34, 0, 0, 0, v6, v9, v12, v1 ; F800022F 010C0906 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, v5, v11 ; 10001705 v_mul_f32_e32 v3, s14, v0 ; 1006000E v_mul_f32_e32 v4, s16, v0 ; 10080010 v_mul_f32_e32 v0, s19, v0 ; 10000013 v_mul_f32_e32 v6, v11, v10 ; 100C150B v_mac_f32_e32 v3, s9, v6 ; 3E060C09 v_mac_f32_e32 v4, s12, v6 ; 3E080C0C v_mac_f32_e32 v0, s15, v6 ; 3E000C0F v_mul_f32_e32 v6, v5, v5 ; 100C0B05 v_mac_f32_e32 v3, s7, v6 ; 3E060C07 v_mac_f32_e32 v4, s10, v6 ; 3E080C0A v_mac_f32_e32 v0, s13, v6 ; 3E000C0D v_mul_f32_e32 v6, v10, v5 ; 100C0B0A v_mac_f32_e32 v3, s6, v6 ; 3E060C06 v_mac_f32_e32 v4, s8, v6 ; 3E080C08 v_mac_f32_e32 v0, s11, v6 ; 3E000C0B v_mul_f32_e32 v6, v11, v11 ; 100C170B v_mad_f32 v6, v10, v10, -v6 ; D2820006 841A150A v_mac_f32_e32 v3, s1, v6 ; 3E060C01 v_mac_f32_e32 v4, s2, v6 ; 3E080C02 v_mac_f32_e32 v0, s0, v6 ; 3E000C00 v_subrev_f32_e32 v6, s3, v7 ; 0A0C0E03 v_subrev_f32_e32 v9, s4, v8 ; 0A121004 v_subrev_f32_e32 v12, s5, v2 ; 0A180405 exp 15, 35, 0, 0, 0, v10, v11, v5, v1 ; F800023F 01050B0A exp 15, 36, 0, 0, 0, v3, v4, v0, v1 ; F800024F 01000403 exp 15, 37, 0, 0, 0, v23, v6, v9, v12 ; F800025F 0C090617 exp 15, 38, 0, 0, 0, v7, v8, v2, v20 ; F800026F 14020807 exp 15, 12, 0, 1, 0, v21, v22, v23, v24 ; F80008CF 18171615 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 72 VGPRS: 28 Code Size: 892 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL IN[4], GENERIC[4], PERSPECTIVE DCL IN[5], GENERIC[5], PERSPECTIVE DCL IN[6], GENERIC[6], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SVIEW[0], CUBE, FLOAT DCL SVIEW[1], CUBE, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL SVIEW[4], 2D, FLOAT DCL CONST[0..5] DCL CONST[8..20] DCL CONST[23..25] DCL CONST[27] DCL TEMP[0..17], LOCAL IMM[0] FLT32 { 1.0000, 2.0000, -1.0000, 0.0000} IMM[1] FLT32 { 0.5000, 0.7500, 7.0000, 1.0000} IMM[2] FLT32 { 10.0000, 0.9680, 0.0300, 0.0001} 0: MOV TEMP[0].x, IN[1].xxxx 1: MOV TEMP[0].y, IN[2].xxxx 2: MOV TEMP[0].z, IN[3].xxxx 3: MOV TEMP[1].x, IN[1].yyyy 4: MOV TEMP[1].y, IN[2].yyyy 5: MOV TEMP[1].z, IN[3].yyyy 6: MOV TEMP[2].x, IN[1].zzzz 7: MOV TEMP[2].y, IN[2].zzzz 8: MOV TEMP[2].z, IN[3].zzzz 9: MOV TEMP[3].xy, IN[0].xyyy 10: TEX TEMP[3], TEMP[3], SAMP[2], 2D 11: MUL TEMP[4].x, TEMP[3].wwww, CONST[19].wwww 12: FSLT TEMP[4].x, TEMP[4].xxxx, CONST[20].xxxx 13: AND TEMP[4].x, TEMP[4].xxxx, IMM[0].xxxx 14: KILL_IF -TEMP[4].xxxx 15: MOV TEMP[4].xy, IN[0].xyyy 16: TEX TEMP[4].yw, TEMP[4], SAMP[3], 2D 17: MAD TEMP[4].xy, TEMP[4].wyyy, IMM[0].yyyy, IMM[0].zzzz 18: MUL TEMP[4].xy, TEMP[4].xyyy, CONST[23].xxxx 19: DP2 TEMP[5].x, TEMP[4].xyyy, TEMP[4].xyyy 20: MOV_SAT TEMP[5].x, TEMP[5].xxxx 21: ADD TEMP[5].x, IMM[0].xxxx, -TEMP[5].xxxx 22: SQRT TEMP[5].x, TEMP[5].xxxx 23: MOV TEMP[4].z, TEMP[5].xxxx 24: DP3 TEMP[0].x, TEMP[4].xyzz, TEMP[0].xyzz 25: DP3 TEMP[1].x, TEMP[4].xyzz, TEMP[1].xyzz 26: MOV TEMP[0].y, TEMP[1].xxxx 27: DP3 TEMP[1].x, TEMP[4].xyzz, TEMP[2].xyzz 28: MOV TEMP[0].z, TEMP[1].xxxx 29: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[0].xyzz 30: RSQ TEMP[1].x, TEMP[1].xxxx 31: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xxxx 32: DP3 TEMP[1].x, IN[5].yzww, IN[5].yzww 33: RSQ TEMP[1].x, TEMP[1].xxxx 34: MUL TEMP[1].xyz, IN[5].yzww, TEMP[1].xxxx 35: MUL TEMP[2].xyz, CONST[19].xyzz, TEMP[3].xyzz 36: LRP TEMP[3].xyz, CONST[24].xxxx, TEMP[2].xyzz, CONST[16].xyzz 37: MUL TEMP[4].x, CONST[24].xxxx, CONST[16].wwww 38: ADD TEMP[4].x, CONST[16].wwww, -TEMP[4].xxxx 39: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[4].xxxx 40: MOV TEMP[5].xy, IN[0].xyyy 41: TEX TEMP[5].y, TEMP[5], SAMP[4], 2D 42: ADD TEMP[6].x, IMM[0].xxxx, -CONST[27].xxxx 43: MAD TEMP[5].x, TEMP[5].yyyy, CONST[27].xxxx, TEMP[6].xxxx 44: DP3 TEMP[6].x, TEMP[0].xyzz, CONST[0].xyzz 45: MAX TEMP[6].x, IMM[0].wwww, TEMP[6].xxxx 46: MOV TEMP[7].xyz, IMM[0].wwww 47: MOV TEMP[8].w, IMM[0].xxxx 48: MOV TEMP[8].xyz, TEMP[0].xyzx 49: DP4 TEMP[9].x, CONST[1], TEMP[8] 50: DP4 TEMP[10].x, CONST[2], TEMP[8] 51: MOV TEMP[9].y, TEMP[10].xxxx 52: DP4 TEMP[8].x, CONST[3], TEMP[8] 53: MOV TEMP[9].z, TEMP[8].xxxx 54: ADD TEMP[8].xyz, IN[4].xyzz, TEMP[9].xyzz 55: MUL TEMP[8].xyz, TEMP[8].xyzz, TEMP[5].xxxx 56: DP3 TEMP[9].x, TEMP[0].xyzz, TEMP[1].xyzz 57: MUL TEMP[9].xyz, TEMP[9].xxxx, TEMP[0].xyzz 58: MUL TEMP[9].xyz, IMM[0].yyyy, TEMP[9].xyzz 59: ADD TEMP[9].xyz, TEMP[1].xyzz, -TEMP[9].xyzz 60: MOV TEMP[10].xyz, TEMP[9].xyzx 61: FSLT TEMP[11].x, IMM[0].wwww, CONST[10].wwww 62: UIF TEMP[11].xxxx :0 63: DP3 TEMP[11].x, TEMP[9].xyzz, TEMP[9].xyzz 64: RSQ TEMP[11].x, TEMP[11].xxxx 65: MUL TEMP[11].xyz, TEMP[9].xyzz, TEMP[11].xxxx 66: MOV TEMP[12].xyz, -IN[6].xyzx 67: ADD TEMP[13].xyz, CONST[8].xyzz, TEMP[12].xyzz 68: RCP TEMP[14].x, TEMP[11].xxxx 69: RCP TEMP[14].y, TEMP[11].yyyy 70: RCP TEMP[14].z, TEMP[11].zzzz 71: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[14].xyzz 72: ADD TEMP[12].xyz, CONST[9].xyzz, TEMP[12].xyzz 73: RCP TEMP[14].x, TEMP[11].xxxx 74: RCP TEMP[14].y, TEMP[11].yyyy 75: RCP TEMP[14].z, TEMP[11].zzzz 76: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[14].xyzz 77: FSLT TEMP[14].xyz, IMM[0].wwww, TEMP[11].xyzz 78: UIF TEMP[14].xxxx :0 79: MOV TEMP[15].x, TEMP[13].xxxx 80: ELSE :0 81: MOV TEMP[15].x, TEMP[12].xxxx 82: ENDIF 83: UIF TEMP[14].yyyy :0 84: MOV TEMP[16].x, TEMP[13].yyyy 85: ELSE :0 86: MOV TEMP[16].x, TEMP[12].yyyy 87: ENDIF 88: UIF TEMP[14].zzzz :0 89: MOV TEMP[13].x, TEMP[13].zzzz 90: ELSE :0 91: MOV TEMP[13].x, TEMP[12].zzzz 92: ENDIF 93: ADD TEMP[12].xyz, CONST[8].xyzz, CONST[9].xyzz 94: MUL TEMP[12].xyz, TEMP[12].xyzz, IMM[1].xxxx 95: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[16].xxxx 96: MIN TEMP[13].x, TEMP[14].xxxx, TEMP[13].xxxx 97: ADD TEMP[14].xyz, TEMP[12].xyzz, -CONST[10].xyzz 98: ADD TEMP[14].xyz, TEMP[14].xyzz, IN[6].xyzz 99: MAD TEMP[11].xyz, TEMP[11].xyzz, TEMP[13].xxxx, TEMP[14].xyzz 100: ADD TEMP[10].xyz, TEMP[11].xyzz, -TEMP[12].xyzz 101: ENDIF 102: ADD TEMP[11].x, IMM[0].xxxx, -CONST[25].xxxx 103: POW TEMP[11].x, TEMP[11].xxxx, IMM[1].yyyy 104: MUL TEMP[11].x, TEMP[11].xxxx, IMM[1].zzzz 105: MOV TEMP[10].xyz, TEMP[10].xyzz 106: MOV TEMP[10].w, TEMP[11].xxxx 107: TXL TEMP[10], TEMP[10], SAMP[0], CUBE 108: POW TEMP[11].x, TEMP[10].wwww, CONST[11].yyyy 109: MUL TEMP[11].x, CONST[11].xxxx, TEMP[11].xxxx 110: MUL TEMP[10].xyz, TEMP[11].xxxx, TEMP[10].xyzz 111: FSLT TEMP[11].x, CONST[9].wwww, IMM[1].wwww 112: UIF TEMP[11].xxxx :0 113: MOV TEMP[11].xyz, TEMP[9].xyzx 114: FSLT TEMP[12].x, IMM[0].wwww, CONST[14].wwww 115: UIF TEMP[12].xxxx :0 116: DP3 TEMP[12].x, TEMP[9].xyzz, TEMP[9].xyzz 117: RSQ TEMP[12].x, TEMP[12].xxxx 118: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[12].xxxx 119: MOV TEMP[12].xyz, -IN[6].xyzx 120: ADD TEMP[13].xyz, CONST[12].xyzz, TEMP[12].xyzz 121: RCP TEMP[14].x, TEMP[9].xxxx 122: RCP TEMP[14].y, TEMP[9].yyyy 123: RCP TEMP[14].z, TEMP[9].zzzz 124: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[14].xyzz 125: ADD TEMP[12].xyz, CONST[13].xyzz, TEMP[12].xyzz 126: RCP TEMP[14].x, TEMP[9].xxxx 127: RCP TEMP[14].y, TEMP[9].yyyy 128: RCP TEMP[14].z, TEMP[9].zzzz 129: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[14].xyzz 130: FSLT TEMP[14].xyz, IMM[0].wwww, TEMP[9].xyzz 131: UIF TEMP[14].xxxx :0 132: MOV TEMP[15].x, TEMP[13].xxxx 133: ELSE :0 134: MOV TEMP[15].x, TEMP[12].xxxx 135: ENDIF 136: UIF TEMP[14].yyyy :0 137: MOV TEMP[16].x, TEMP[13].yyyy 138: ELSE :0 139: MOV TEMP[16].x, TEMP[12].yyyy 140: ENDIF 141: UIF TEMP[14].zzzz :0 142: MOV TEMP[13].x, TEMP[13].zzzz 143: ELSE :0 144: MOV TEMP[13].x, TEMP[12].zzzz 145: ENDIF 146: ADD TEMP[12].xyz, CONST[12].xyzz, CONST[13].xyzz 147: MUL TEMP[12].xyz, TEMP[12].xyzz, IMM[1].xxxx 148: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[16].xxxx 149: MIN TEMP[13].x, TEMP[14].xxxx, TEMP[13].xxxx 150: ADD TEMP[14].xyz, TEMP[12].xyzz, -CONST[14].xyzz 151: ADD TEMP[14].xyz, TEMP[14].xyzz, IN[6].xyzz 152: MAD TEMP[9].xyz, TEMP[9].xyzz, TEMP[13].xxxx, TEMP[14].xyzz 153: ADD TEMP[11].xyz, TEMP[9].xyzz, -TEMP[12].xyzz 154: ENDIF 155: ADD TEMP[9].x, IMM[0].xxxx, -CONST[25].xxxx 156: POW TEMP[9].x, TEMP[9].xxxx, IMM[1].yyyy 157: MUL TEMP[9].x, TEMP[9].xxxx, IMM[1].zzzz 158: MOV TEMP[11].xyz, TEMP[11].xyzz 159: MOV TEMP[11].w, TEMP[9].xxxx 160: TXL TEMP[9], TEMP[11], SAMP[1], CUBE 161: POW TEMP[11].x, TEMP[9].wwww, CONST[15].yyyy 162: MUL TEMP[11].x, CONST[15].xxxx, TEMP[11].xxxx 163: MUL TEMP[9].xyz, TEMP[11].xxxx, TEMP[9].xyzz 164: LRP TEMP[7].xyz, CONST[9].wwww, TEMP[10].xyzz, TEMP[9].xyzz 165: ELSE :0 166: MOV TEMP[7].xyz, TEMP[10].xyzx 167: ENDIF 168: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[5].xxxx 169: MOV TEMP[1].xyz, -TEMP[1].xyzx 170: ADD TEMP[5].x, IMM[0].xxxx, -CONST[25].xxxx 171: ADD TEMP[9].xyz, CONST[0].xyzz, TEMP[1].xyzz 172: DP3 TEMP[10].x, TEMP[9].xyzz, TEMP[9].xyzz 173: RSQ TEMP[10].x, TEMP[10].xxxx 174: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[10].xxxx 175: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[1].xyzz 176: MAX TEMP[1].x, IMM[0].wwww, TEMP[1].xxxx 177: DP3 TEMP[10].x, CONST[0].xyzz, TEMP[9].xyzz 178: MAX TEMP[10].x, IMM[0].wwww, TEMP[10].xxxx 179: MUL TEMP[11].x, TEMP[5].xxxx, TEMP[5].xxxx 180: MUL TEMP[11].x, TEMP[11].xxxx, CONST[18].wwww 181: ADD TEMP[12].x, IMM[0].xxxx, -TEMP[5].xxxx 182: MAD TEMP[12].x, TEMP[12].xxxx, IMM[2].yyyy, IMM[2].zzzz 183: LG2 TEMP[12].x, TEMP[12].xxxx 184: RCP TEMP[12].x, TEMP[12].xxxx 185: MUL TEMP[12].x, IMM[2].xxxx, TEMP[12].xxxx 186: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[12].xxxx 187: ADD TEMP[13].x, IMM[0].xxxx, -TEMP[6].xxxx 188: ADD TEMP[14].x, IMM[0].xxxx, -TEMP[1].xxxx 189: MUL TEMP[15].x, IMM[0].yyyy, TEMP[10].xxxx 190: MUL TEMP[5].x, TEMP[10].xxxx, TEMP[5].xxxx 191: MAD TEMP[5].x, TEMP[15].xxxx, TEMP[5].xxxx, IMM[1].xxxx 192: ADD TEMP[10].x, IMM[0].xxxx, -TEMP[10].xxxx 193: ADD TEMP[15].x, IMM[0].xxxx, -TEMP[1].xxxx 194: ADD TEMP[4].x, IMM[0].xxxx, -TEMP[4].xxxx 195: ADD TEMP[4].x, CONST[25].xxxx, TEMP[4].xxxx 196: MOV_SAT TEMP[4].x, TEMP[4].xxxx 197: MUL TEMP[16].x, TEMP[15].xxxx, TEMP[15].xxxx 198: MUL TEMP[17].x, TEMP[15].xxxx, TEMP[15].xxxx 199: MUL TEMP[15].x, TEMP[17].xxxx, TEMP[15].xxxx 200: MUL TEMP[15].x, TEMP[16].xxxx, TEMP[15].xxxx 201: LRP TEMP[4].xyz, TEMP[15].xxxx, TEMP[4].xxxx, TEMP[3].xyzz 202: LRP TEMP[15].x, TEMP[6].xxxx, IMM[0].xxxx, TEMP[11].xxxx 203: LRP TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx, TEMP[11].xxxx 204: MAD TEMP[1].x, TEMP[15].xxxx, TEMP[1].xxxx, IMM[2].wwww 205: RCP TEMP[1].x, TEMP[1].xxxx 206: DP3 TEMP[9].x, TEMP[0].xyzz, TEMP[9].xyzz 207: MAX TEMP[9].x, IMM[0].wwww, TEMP[9].xxxx 208: POW TEMP[9].x, TEMP[9].xxxx, TEMP[12].xxxx 209: ADD TEMP[11].x, TEMP[12].xxxx, IMM[0].xxxx 210: MUL TEMP[11].x, TEMP[11].xxxx, CONST[18].yyyy 211: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[11].xxxx 212: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[9].xxxx 213: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[6].xxxx 214: MUL TEMP[1].x, TEMP[1].xxxx, CONST[18].xxxx 215: MAX TEMP[1].x, IMM[0].wwww, TEMP[1].xxxx 216: MUL TEMP[1].xyz, TEMP[1].xxxx, CONST[17].xyzz 217: ADD TEMP[9].xyz, IMM[0].xxxx, -TEMP[3].xyzz 218: MUL TEMP[11].x, TEMP[10].xxxx, TEMP[10].xxxx 219: MUL TEMP[12].x, TEMP[10].xxxx, TEMP[10].xxxx 220: MUL TEMP[10].x, TEMP[12].xxxx, TEMP[10].xxxx 221: MUL TEMP[10].x, TEMP[11].xxxx, TEMP[10].xxxx 222: MAD TEMP[3].xyz, TEMP[9].xyzz, TEMP[10].xxxx, TEMP[3].xyzz 223: ADD TEMP[9].x, TEMP[5].xxxx, IMM[0].zzzz 224: MUL TEMP[10].x, TEMP[13].xxxx, TEMP[13].xxxx 225: MUL TEMP[11].x, TEMP[13].xxxx, TEMP[13].xxxx 226: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[13].xxxx 227: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[11].xxxx 228: MAD TEMP[9].x, TEMP[9].xxxx, TEMP[10].xxxx, IMM[0].xxxx 229: ADD TEMP[5].x, TEMP[5].xxxx, IMM[0].zzzz 230: MUL TEMP[10].x, TEMP[14].xxxx, TEMP[14].xxxx 231: MUL TEMP[11].x, TEMP[14].xxxx, TEMP[14].xxxx 232: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[14].xxxx 233: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[11].xxxx 234: MAD TEMP[5].x, TEMP[5].xxxx, TEMP[10].xxxx, IMM[0].xxxx 235: MUL TEMP[5].x, TEMP[9].xxxx, TEMP[5].xxxx 236: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[6].xxxx 237: MAD TEMP[5].xyz, CONST[17].xyzz, TEMP[5].xxxx, TEMP[8].xyzz 238: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[5].xyzz 239: MAD TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xyzz, TEMP[2].xyzz 240: MAD TEMP[0].xyz, TEMP[7].xyzz, TEMP[4].xyzz, TEMP[1].xyzz 241: MOV TEMP[0].xyz, TEMP[0].xyzx 242: MAD TEMP[1].x, IN[5].xxxx, CONST[5].zzzz, CONST[5].wwww 243: MOV_SAT TEMP[1].x, TEMP[1].xxxx 244: LRP TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[4].xyzz 245: MOV TEMP[0].xyz, TEMP[0].xyzx 246: MOV TEMP[0].w, IMM[0].xxxx 247: MOV OUT[0], TEMP[0] 248: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 172) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200) %57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208) %58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212) %59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216) %60 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224) %61 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228) %62 = call float @llvm.SI.load.const(<16 x i8> %23, i32 232) %63 = call float @llvm.SI.load.const(<16 x i8> %23, i32 236) %64 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240) %65 = call float @llvm.SI.load.const(<16 x i8> %23, i32 244) %66 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256) %67 = call float @llvm.SI.load.const(<16 x i8> %23, i32 260) %68 = call float @llvm.SI.load.const(<16 x i8> %23, i32 264) %69 = call float @llvm.SI.load.const(<16 x i8> %23, i32 268) %70 = call float @llvm.SI.load.const(<16 x i8> %23, i32 272) %71 = call float @llvm.SI.load.const(<16 x i8> %23, i32 276) %72 = call float @llvm.SI.load.const(<16 x i8> %23, i32 280) %73 = call float @llvm.SI.load.const(<16 x i8> %23, i32 288) %74 = call float @llvm.SI.load.const(<16 x i8> %23, i32 292) %75 = call float @llvm.SI.load.const(<16 x i8> %23, i32 300) %76 = call float @llvm.SI.load.const(<16 x i8> %23, i32 304) %77 = call float @llvm.SI.load.const(<16 x i8> %23, i32 308) %78 = call float @llvm.SI.load.const(<16 x i8> %23, i32 312) %79 = call float @llvm.SI.load.const(<16 x i8> %23, i32 316) %80 = call float @llvm.SI.load.const(<16 x i8> %23, i32 320) %81 = call float @llvm.SI.load.const(<16 x i8> %23, i32 368) %82 = call float @llvm.SI.load.const(<16 x i8> %23, i32 384) %83 = call float @llvm.SI.load.const(<16 x i8> %23, i32 400) %84 = call float @llvm.SI.load.const(<16 x i8> %23, i32 432) %85 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %86 = load <32 x i8>, <32 x i8> addrspace(2)* %85, align 32, !tbaa !0 %87 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %88 = load <16 x i8>, <16 x i8> addrspace(2)* %87, align 16, !tbaa !0 %89 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %90 = bitcast <8 x i32> addrspace(2)* %89 to <32 x i8> addrspace(2)* %91 = load <32 x i8>, <32 x i8> addrspace(2)* %90, align 32, !tbaa !0 %92 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %93 = bitcast <4 x i32> addrspace(2)* %92 to <16 x i8> addrspace(2)* %94 = load <16 x i8>, <16 x i8> addrspace(2)* %93, align 16, !tbaa !0 %95 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %96 = bitcast <8 x i32> addrspace(2)* %95 to <32 x i8> addrspace(2)* %97 = load <32 x i8>, <32 x i8> addrspace(2)* %96, align 32, !tbaa !0 %98 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %99 = bitcast <4 x i32> addrspace(2)* %98 to <16 x i8> addrspace(2)* %100 = load <16 x i8>, <16 x i8> addrspace(2)* %99, align 16, !tbaa !0 %101 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %102 = bitcast <8 x i32> addrspace(2)* %101 to <32 x i8> addrspace(2)* %103 = load <32 x i8>, <32 x i8> addrspace(2)* %102, align 32, !tbaa !0 %104 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %105 = bitcast <4 x i32> addrspace(2)* %104 to <16 x i8> addrspace(2)* %106 = load <16 x i8>, <16 x i8> addrspace(2)* %105, align 16, !tbaa !0 %107 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %108 = bitcast <8 x i32> addrspace(2)* %107 to <32 x i8> addrspace(2)* %109 = load <32 x i8>, <32 x i8> addrspace(2)* %108, align 32, !tbaa !0 %110 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %111 = bitcast <4 x i32> addrspace(2)* %110 to <16 x i8> addrspace(2)* %112 = load <16 x i8>, <16 x i8> addrspace(2)* %111, align 16, !tbaa !0 %113 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %114 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %115 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %116 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %117 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %118 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %119 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %120 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %121 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %122 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %123 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %124 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %125 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %126 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %127 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7) %128 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %129 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7) %130 = call float @llvm.SI.fs.interp(i32 3, i32 5, i32 %5, <2 x i32> %7) %131 = call float @llvm.SI.fs.interp(i32 0, i32 6, i32 %5, <2 x i32> %7) %132 = call float @llvm.SI.fs.interp(i32 1, i32 6, i32 %5, <2 x i32> %7) %133 = call float @llvm.SI.fs.interp(i32 2, i32 6, i32 %5, <2 x i32> %7) %134 = bitcast float %113 to i32 %135 = bitcast float %114 to i32 %136 = insertelement <2 x i32> undef, i32 %134, i32 0 %137 = insertelement <2 x i32> %136, i32 %135, i32 1 %138 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %137, <32 x i8> %97, <16 x i8> %100, i32 2) %139 = extractelement <4 x float> %138, i32 0 %140 = extractelement <4 x float> %138, i32 1 %141 = extractelement <4 x float> %138, i32 2 %142 = extractelement <4 x float> %138, i32 3 %143 = fmul float %142, %79 %144 = fcmp olt float %143, %80 %145 = select i1 %144, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %145) %146 = bitcast float %113 to i32 %147 = bitcast float %114 to i32 %148 = insertelement <2 x i32> undef, i32 %146, i32 0 %149 = insertelement <2 x i32> %148, i32 %147, i32 1 %150 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %149, <32 x i8> %103, <16 x i8> %106, i32 2) %151 = extractelement <4 x float> %150, i32 1 %152 = extractelement <4 x float> %150, i32 3 %153 = fmul float %152, 2.000000e+00 %154 = fadd float %153, -1.000000e+00 %155 = fmul float %151, 2.000000e+00 %156 = fadd float %155, -1.000000e+00 %157 = fmul float %154, %81 %158 = fmul float %156, %81 %159 = fmul float %157, %157 %160 = fmul float %158, %158 %161 = fadd float %159, %160 %162 = call float @llvm.AMDIL.clamp.(float %161, float 0.000000e+00, float 1.000000e+00) %163 = fsub float 1.000000e+00, %162 %164 = call float @llvm.sqrt.f32(float %163) %165 = fmul float %157, %115 %166 = fmul float %158, %118 %167 = fadd float %166, %165 %168 = fmul float %164, %121 %169 = fadd float %167, %168 %170 = fmul float %157, %116 %171 = fmul float %158, %119 %172 = fadd float %171, %170 %173 = fmul float %164, %122 %174 = fadd float %172, %173 %175 = fmul float %157, %117 %176 = fmul float %158, %120 %177 = fadd float %176, %175 %178 = fmul float %164, %123 %179 = fadd float %177, %178 %180 = fmul float %169, %169 %181 = fmul float %174, %174 %182 = fadd float %181, %180 %183 = fmul float %179, %179 %184 = fadd float %182, %183 %185 = call float @llvm.AMDGPU.rsq.clamped.f32(float %184) %186 = fmul float %169, %185 %187 = fmul float %174, %185 %188 = fmul float %179, %185 %189 = fmul float %128, %128 %190 = fmul float %129, %129 %191 = fadd float %190, %189 %192 = fmul float %130, %130 %193 = fadd float %191, %192 %194 = call float @llvm.AMDGPU.rsq.clamped.f32(float %193) %195 = fmul float %128, %194 %196 = fmul float %129, %194 %197 = fmul float %130, %194 %198 = fmul float %76, %139 %199 = fmul float %77, %140 %200 = fmul float %78, %141 %201 = call float @llvm.AMDGPU.lrp(float %82, float %198, float %66) %202 = call float @llvm.AMDGPU.lrp(float %82, float %199, float %67) %203 = call float @llvm.AMDGPU.lrp(float %82, float %200, float %68) %204 = fmul float %82, %69 %205 = fsub float %69, %204 %206 = fmul float %198, %205 %207 = fmul float %199, %205 %208 = fmul float %200, %205 %209 = bitcast float %113 to i32 %210 = bitcast float %114 to i32 %211 = insertelement <2 x i32> undef, i32 %209, i32 0 %212 = insertelement <2 x i32> %211, i32 %210, i32 1 %213 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %212, <32 x i8> %109, <16 x i8> %112, i32 2) %214 = extractelement <4 x float> %213, i32 1 %215 = fsub float 1.000000e+00, %84 %216 = fmul float %214, %84 %217 = fadd float %216, %215 %218 = fmul float %186, %24 %219 = fmul float %187, %25 %220 = fadd float %219, %218 %221 = fmul float %188, %26 %222 = fadd float %220, %221 %223 = call float @llvm.maxnum.f32(float %222, float 0.000000e+00) %224 = fmul float %27, %186 %225 = fmul float %28, %187 %226 = fadd float %224, %225 %227 = fmul float %29, %188 %228 = fadd float %226, %227 %229 = fadd float %228, %30 %230 = fmul float %31, %186 %231 = fmul float %32, %187 %232 = fadd float %230, %231 %233 = fmul float %33, %188 %234 = fadd float %232, %233 %235 = fadd float %234, %34 %236 = fmul float %35, %186 %237 = fmul float %36, %187 %238 = fadd float %236, %237 %239 = fmul float %37, %188 %240 = fadd float %238, %239 %241 = fadd float %240, %38 %242 = fadd float %124, %229 %243 = fadd float %125, %235 %244 = fadd float %126, %241 %245 = fmul float %242, %217 %246 = fmul float %243, %217 %247 = fmul float %244, %217 %248 = fmul float %186, %195 %249 = fmul float %187, %196 %250 = fadd float %249, %248 %251 = fmul float %188, %197 %252 = fadd float %250, %251 %253 = fmul float %252, %186 %254 = fmul float %252, %187 %255 = fmul float %252, %188 %256 = fmul float %253, 2.000000e+00 %257 = fmul float %254, 2.000000e+00 %258 = fmul float %255, 2.000000e+00 %259 = fsub float %195, %256 %260 = fsub float %196, %257 %261 = fsub float %197, %258 %262 = fcmp ogt float %51, 0.000000e+00 br i1 %262, label %IF, label %ENDIF IF: ; preds = %main_body %263 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %264 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %265 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %266 = fmul float %259, %259 %267 = fmul float %260, %260 %268 = fadd float %267, %266 %269 = fmul float %261, %261 %270 = fadd float %268, %269 %271 = call float @llvm.AMDGPU.rsq.clamped.f32(float %270) %272 = fmul float %259, %271 %273 = fmul float %260, %271 %274 = fmul float %261, %271 %275 = fsub float %44, %131 %276 = fsub float %45, %132 %277 = fsub float %46, %133 %278 = fdiv float 1.000000e+00, %272 %279 = fdiv float 1.000000e+00, %273 %280 = fdiv float 1.000000e+00, %274 %281 = fmul float %275, %278 %282 = fmul float %276, %279 %283 = fmul float %277, %280 %284 = fsub float %47, %131 %285 = fsub float %48, %132 %286 = fsub float %49, %133 %287 = fdiv float 1.000000e+00, %272 %288 = fdiv float 1.000000e+00, %273 %289 = fdiv float 1.000000e+00, %274 %290 = fmul float %284, %287 %291 = fmul float %285, %288 %292 = fmul float %286, %289 %293 = fcmp ogt float %272, 0.000000e+00 %294 = fcmp ogt float %273, 0.000000e+00 %295 = fcmp ogt float %274, 0.000000e+00 %. = select i1 %293, float %281, float %290 %temp64.0 = select i1 %294, float %282, float %291 %.96 = select i1 %295, float %283, float %292 %296 = fadd float %44, %47 %297 = fadd float %45, %48 %298 = fadd float %46, %49 %299 = fmul float %296, 5.000000e-01 %300 = fmul float %297, 5.000000e-01 %301 = fmul float %298, 5.000000e-01 %302 = call float @llvm.minnum.f32(float %., float %temp64.0) %303 = call float @llvm.minnum.f32(float %302, float %.96) %304 = fsub float %299, %265 %305 = fsub float %300, %264 %306 = fsub float %301, %263 %307 = fadd float %304, %131 %308 = fadd float %305, %132 %309 = fadd float %306, %133 %310 = fmul float %272, %303 %311 = fadd float %310, %307 %312 = fmul float %273, %303 %313 = fadd float %312, %308 %314 = fmul float %274, %303 %315 = fadd float %314, %309 %316 = fsub float %311, %299 %317 = fsub float %313, %300 %318 = fsub float %315, %301 br label %ENDIF ENDIF: ; preds = %main_body, %IF %temp40.0 = phi float [ %316, %IF ], [ %259, %main_body ] %temp41.0 = phi float [ %317, %IF ], [ %260, %main_body ] %temp42.0 = phi float [ %318, %IF ], [ %261, %main_body ] %319 = fsub float 1.000000e+00, %83 %320 = call float @llvm.pow.f32(float %319, float 7.500000e-01) %321 = fmul float %320, 7.000000e+00 %322 = insertelement <4 x float> undef, float %temp40.0, i32 0 %323 = insertelement <4 x float> %322, float %temp41.0, i32 1 %324 = insertelement <4 x float> %323, float %temp42.0, i32 2 %325 = insertelement <4 x float> %324, float %321, i32 3 %326 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %325) %327 = extractelement <4 x float> %326, i32 0 %328 = extractelement <4 x float> %326, i32 1 %329 = extractelement <4 x float> %326, i32 2 %330 = extractelement <4 x float> %326, i32 3 %331 = call float @llvm.fabs.f32(float %329) %332 = fdiv float 1.000000e+00, %331 %333 = fmul float %327, %332 %334 = fadd float %333, 1.500000e+00 %335 = fmul float %328, %332 %336 = fadd float %335, 1.500000e+00 %337 = bitcast float %336 to i32 %338 = bitcast float %334 to i32 %339 = bitcast float %330 to i32 %340 = bitcast float %321 to i32 %341 = insertelement <4 x i32> undef, i32 %337, i32 0 %342 = insertelement <4 x i32> %341, i32 %338, i32 1 %343 = insertelement <4 x i32> %342, i32 %339, i32 2 %344 = insertelement <4 x i32> %343, i32 %340, i32 3 %345 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %344, <32 x i8> %86, <16 x i8> %88, i32 4) %346 = extractelement <4 x float> %345, i32 0 %347 = extractelement <4 x float> %345, i32 1 %348 = extractelement <4 x float> %345, i32 2 %349 = extractelement <4 x float> %345, i32 3 %350 = call float @llvm.pow.f32(float %349, float %53) %351 = fmul float %52, %350 %352 = fmul float %351, %346 %353 = fmul float %351, %347 %354 = fmul float %351, %348 %355 = fcmp olt float %50, 0x3FEFFFEB00000000 br i1 %355, label %IF82, label %ENDIF81 IF82: ; preds = %ENDIF %356 = fcmp ogt float %63, 0.000000e+00 br i1 %356, label %IF85, label %ENDIF84 ENDIF81: ; preds = %ENDIF, %ENDIF84 %temp28.0 = phi float [ %584, %ENDIF84 ], [ %352, %ENDIF ] %temp29.0 = phi float [ %585, %ENDIF84 ], [ %353, %ENDIF ] %temp30.0 = phi float [ %586, %ENDIF84 ], [ %354, %ENDIF ] %357 = fmul float %temp28.0, %217 %358 = fmul float %temp29.0, %217 %359 = fmul float %temp30.0, %217 %360 = fsub float 1.000000e+00, %83 %361 = fsub float %24, %195 %362 = fsub float %25, %196 %363 = fsub float %26, %197 %364 = fmul float %361, %361 %365 = fmul float %362, %362 %366 = fadd float %365, %364 %367 = fmul float %363, %363 %368 = fadd float %366, %367 %369 = call float @llvm.AMDGPU.rsq.clamped.f32(float %368) %370 = fmul float %361, %369 %371 = fmul float %362, %369 %372 = fmul float %363, %369 %373 = fmul float %195, %186 %374 = fsub float -0.000000e+00, %373 %375 = fmul float %196, %187 %376 = fsub float %374, %375 %377 = fmul float %197, %188 %378 = fsub float %376, %377 %379 = call float @llvm.maxnum.f32(float %378, float 0.000000e+00) %380 = fmul float %24, %370 %381 = fmul float %25, %371 %382 = fadd float %381, %380 %383 = fmul float %26, %372 %384 = fadd float %382, %383 %385 = call float @llvm.maxnum.f32(float %384, float 0.000000e+00) %386 = fmul float %360, %360 %387 = fmul float %386, %75 %388 = fsub float 1.000000e+00, %360 %389 = fmul float %388, 0x3FEEF9DB20000000 %390 = fadd float %389, 0x3F9EB851E0000000 %391 = call float @llvm.log2.f32(float %390) %392 = fdiv float 1.000000e+00, %391 %393 = fmul float %392, 1.000000e+01 %394 = fmul float %393, %393 %395 = fsub float 1.000000e+00, %223 %396 = fsub float 1.000000e+00, %379 %397 = fmul float %385, 2.000000e+00 %398 = fmul float %385, %360 %399 = fmul float %397, %398 %400 = fadd float %399, 5.000000e-01 %401 = fsub float 1.000000e+00, %385 %402 = fsub float 1.000000e+00, %379 %403 = fsub float 1.000000e+00, %205 %404 = fadd float %83, %403 %405 = call float @llvm.AMDIL.clamp.(float %404, float 0.000000e+00, float 1.000000e+00) %406 = fmul float %402, %402 %407 = fmul float %402, %402 %408 = fmul float %407, %402 %409 = fmul float %406, %408 %410 = call float @llvm.AMDGPU.lrp(float %409, float %405, float %201) %411 = call float @llvm.AMDGPU.lrp(float %409, float %405, float %202) %412 = call float @llvm.AMDGPU.lrp(float %409, float %405, float %203) %413 = call float @llvm.AMDGPU.lrp(float %223, float 1.000000e+00, float %387) %414 = call float @llvm.AMDGPU.lrp(float %379, float 1.000000e+00, float %387) %415 = fmul float %413, %414 %416 = fadd float %415, 0x3F1A36E2E0000000 %417 = fdiv float 1.000000e+00, %416 %418 = fmul float %186, %370 %419 = fmul float %187, %371 %420 = fadd float %419, %418 %421 = fmul float %188, %372 %422 = fadd float %420, %421 %423 = call float @llvm.maxnum.f32(float %422, float 0.000000e+00) %424 = call float @llvm.pow.f32(float %423, float %394) %425 = fadd float %394, 1.000000e+00 %426 = fmul float %425, %74 %427 = fmul float %424, %426 %428 = fmul float %417, %427 %429 = fmul float %428, %223 %430 = fmul float %429, %73 %431 = call float @llvm.maxnum.f32(float %430, float 0.000000e+00) %432 = fmul float %431, %70 %433 = fmul float %431, %71 %434 = fmul float %431, %72 %435 = fsub float 1.000000e+00, %201 %436 = fsub float 1.000000e+00, %202 %437 = fsub float 1.000000e+00, %203 %438 = fmul float %401, %401 %439 = fmul float %401, %401 %440 = fmul float %439, %401 %441 = fmul float %438, %440 %442 = fmul float %435, %441 %443 = fadd float %442, %201 %444 = fmul float %436, %441 %445 = fadd float %444, %202 %446 = fmul float %437, %441 %447 = fadd float %446, %203 %448 = fadd float %400, -1.000000e+00 %449 = fmul float %395, %395 %450 = fmul float %395, %395 %451 = fmul float %450, %395 %452 = fmul float %449, %451 %453 = fmul float %448, %452 %454 = fadd float %453, 1.000000e+00 %455 = fadd float %400, -1.000000e+00 %456 = fmul float %396, %396 %457 = fmul float %396, %396 %458 = fmul float %457, %396 %459 = fmul float %456, %458 %460 = fmul float %455, %459 %461 = fadd float %460, 1.000000e+00 %462 = fmul float %454, %461 %463 = fmul float %462, %223 %464 = fmul float %70, %463 %465 = fadd float %464, %245 %466 = fmul float %71, %463 %467 = fadd float %466, %246 %468 = fmul float %72, %463 %469 = fadd float %468, %247 %470 = fmul float %206, %465 %471 = fmul float %207, %467 %472 = fmul float %208, %469 %473 = fmul float %432, %443 %474 = fadd float %473, %470 %475 = fmul float %433, %445 %476 = fadd float %475, %471 %477 = fmul float %434, %447 %478 = fadd float %477, %472 %479 = fmul float %357, %410 %480 = fadd float %479, %474 %481 = fmul float %358, %411 %482 = fadd float %481, %476 %483 = fmul float %359, %412 %484 = fadd float %483, %478 %485 = fmul float %127, %42 %486 = fadd float %485, %43 %487 = call float @llvm.AMDIL.clamp.(float %486, float 0.000000e+00, float 1.000000e+00) %488 = call float @llvm.AMDGPU.lrp(float %487, float %480, float %39) %489 = call float @llvm.AMDGPU.lrp(float %487, float %482, float %40) %490 = call float @llvm.AMDGPU.lrp(float %487, float %484, float %41) %491 = call i32 @llvm.SI.packf16(float %488, float %489) %492 = bitcast i32 %491 to float %493 = call i32 @llvm.SI.packf16(float %490, float 1.000000e+00) %494 = bitcast i32 %493 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %492, float %494, float %492, float %494) ret void IF85: ; preds = %IF82 %495 = fmul float %259, %259 %496 = fmul float %260, %260 %497 = fadd float %496, %495 %498 = fmul float %261, %261 %499 = fadd float %497, %498 %500 = call float @llvm.AMDGPU.rsq.clamped.f32(float %499) %501 = fmul float %259, %500 %502 = fmul float %260, %500 %503 = fmul float %261, %500 %504 = fsub float %54, %131 %505 = fsub float %55, %132 %506 = fsub float %56, %133 %507 = fdiv float 1.000000e+00, %501 %508 = fdiv float 1.000000e+00, %502 %509 = fdiv float 1.000000e+00, %503 %510 = fmul float %504, %507 %511 = fmul float %505, %508 %512 = fmul float %506, %509 %513 = fsub float %57, %131 %514 = fsub float %58, %132 %515 = fsub float %59, %133 %516 = fdiv float 1.000000e+00, %501 %517 = fdiv float 1.000000e+00, %502 %518 = fdiv float 1.000000e+00, %503 %519 = fmul float %513, %516 %520 = fmul float %514, %517 %521 = fmul float %515, %518 %522 = fcmp ogt float %501, 0.000000e+00 %523 = fcmp ogt float %502, 0.000000e+00 %524 = fcmp ogt float %503, 0.000000e+00 %.97 = select i1 %522, float %510, float %519 %temp64.1 = select i1 %523, float %511, float %520 %.98 = select i1 %524, float %512, float %521 %525 = fadd float %54, %57 %526 = fadd float %55, %58 %527 = fadd float %56, %59 %528 = fmul float %525, 5.000000e-01 %529 = fmul float %526, 5.000000e-01 %530 = fmul float %527, 5.000000e-01 %531 = call float @llvm.minnum.f32(float %.97, float %temp64.1) %532 = call float @llvm.minnum.f32(float %531, float %.98) %533 = fsub float %528, %60 %534 = fsub float %529, %61 %535 = fsub float %530, %62 %536 = fadd float %533, %131 %537 = fadd float %534, %132 %538 = fadd float %535, %133 %539 = fmul float %501, %532 %540 = fadd float %539, %536 %541 = fmul float %502, %532 %542 = fadd float %541, %537 %543 = fmul float %503, %532 %544 = fadd float %543, %538 %545 = fsub float %540, %528 %546 = fsub float %542, %529 %547 = fsub float %544, %530 br label %ENDIF84 ENDIF84: ; preds = %IF82, %IF85 %temp44.0 = phi float [ %545, %IF85 ], [ %259, %IF82 ] %temp45.0 = phi float [ %546, %IF85 ], [ %260, %IF82 ] %temp46.0 = phi float [ %547, %IF85 ], [ %261, %IF82 ] %548 = fsub float 1.000000e+00, %83 %549 = call float @llvm.pow.f32(float %548, float 7.500000e-01) %550 = fmul float %549, 7.000000e+00 %551 = insertelement <4 x float> undef, float %temp44.0, i32 0 %552 = insertelement <4 x float> %551, float %temp45.0, i32 1 %553 = insertelement <4 x float> %552, float %temp46.0, i32 2 %554 = insertelement <4 x float> %553, float %550, i32 3 %555 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %554) %556 = extractelement <4 x float> %555, i32 0 %557 = extractelement <4 x float> %555, i32 1 %558 = extractelement <4 x float> %555, i32 2 %559 = extractelement <4 x float> %555, i32 3 %560 = call float @llvm.fabs.f32(float %558) %561 = fdiv float 1.000000e+00, %560 %562 = fmul float %556, %561 %563 = fadd float %562, 1.500000e+00 %564 = fmul float %557, %561 %565 = fadd float %564, 1.500000e+00 %566 = bitcast float %565 to i32 %567 = bitcast float %563 to i32 %568 = bitcast float %559 to i32 %569 = bitcast float %550 to i32 %570 = insertelement <4 x i32> undef, i32 %566, i32 0 %571 = insertelement <4 x i32> %570, i32 %567, i32 1 %572 = insertelement <4 x i32> %571, i32 %568, i32 2 %573 = insertelement <4 x i32> %572, i32 %569, i32 3 %574 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %573, <32 x i8> %91, <16 x i8> %94, i32 4) %575 = extractelement <4 x float> %574, i32 0 %576 = extractelement <4 x float> %574, i32 1 %577 = extractelement <4 x float> %574, i32 2 %578 = extractelement <4 x float> %574, i32 3 %579 = call float @llvm.pow.f32(float %578, float %65) %580 = fmul float %64, %579 %581 = fmul float %580, %575 %582 = fmul float %580, %576 %583 = fmul float %580, %577 %584 = call float @llvm.AMDGPU.lrp(float %50, float %352, float %581) %585 = call float @llvm.AMDGPU.lrp(float %50, float %353, float %582) %586 = call float @llvm.AMDGPU.lrp(float %50, float %354, float %583) br label %ENDIF81 } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v17, v0, 0, 0, [m0] ; C8440000 v_interp_p2_f32 v17, [v17], v1, 0, 0, [m0] ; C8450001 v_interp_p1_f32 v18, v0, 1, 0, [m0] ; C8480100 v_interp_p2_f32 v18, [v18], v1, 1, 0, [m0] ; C8490101 v_interp_p1_f32 v2, v0, 0, 1, [m0] ; C8080400 v_interp_p2_f32 v2, [v2], v1, 0, 1, [m0] ; C8090401 v_interp_p1_f32 v4, v0, 1, 1, [m0] ; C8100500 v_interp_p2_f32 v4, [v4], v1, 1, 1, [m0] ; C8110501 v_interp_p1_f32 v5, v0, 2, 1, [m0] ; C8140600 v_interp_p2_f32 v5, [v5], v1, 2, 1, [m0] ; C8150601 v_interp_p1_f32 v6, v0, 0, 2, [m0] ; C8180800 v_interp_p2_f32 v6, [v6], v1, 0, 2, [m0] ; C8190801 v_interp_p1_f32 v10, v0, 1, 2, [m0] ; C8280900 v_interp_p2_f32 v10, [v10], v1, 1, 2, [m0] ; C8290901 v_interp_p1_f32 v11, v0, 2, 2, [m0] ; C82C0A00 v_interp_p2_f32 v11, [v11], v1, 2, 2, [m0] ; C82D0A01 v_interp_p1_f32 v12, v0, 0, 3, [m0] ; C8300C00 v_interp_p2_f32 v12, [v12], v1, 0, 3, [m0] ; C8310C01 v_interp_p1_f32 v19, v0, 1, 3, [m0] ; C84C0D00 v_interp_p2_f32 v19, [v19], v1, 1, 3, [m0] ; C84D0D01 v_interp_p1_f32 v20, v0, 2, 3, [m0] ; C8500E00 v_interp_p2_f32 v20, [v20], v1, 2, 3, [m0] ; C8510E01 v_interp_p1_f32 v7, v0, 0, 4, [m0] ; C81C1000 v_interp_p2_f32 v7, [v7], v1, 0, 4, [m0] ; C81D1001 v_interp_p1_f32 v8, v0, 1, 4, [m0] ; C8201100 v_interp_p2_f32 v8, [v8], v1, 1, 4, [m0] ; C8211101 v_interp_p1_f32 v9, v0, 2, 4, [m0] ; C8241200 v_interp_p2_f32 v9, [v9], v1, 2, 4, [m0] ; C8251201 v_interp_p1_f32 v3, v0, 0, 5, [m0] ; C80C1400 v_interp_p2_f32 v3, [v3], v1, 0, 5, [m0] ; C80D1401 v_interp_p1_f32 v23, v0, 1, 5, [m0] ; C85C1500 v_interp_p2_f32 v23, [v23], v1, 1, 5, [m0] ; C85D1501 v_interp_p1_f32 v24, v0, 2, 5, [m0] ; C8601600 v_interp_p2_f32 v24, [v24], v1, 2, 5, [m0] ; C8611601 v_interp_p1_f32 v25, v0, 3, 5, [m0] ; C8641700 s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300 v_interp_p2_f32 v25, [v25], v1, 3, 5, [m0] ; C8651701 v_interp_p1_f32 v21, v0, 0, 6, [m0] ; C8541800 v_interp_p2_f32 v21, [v21], v1, 0, 6, [m0] ; C8551801 v_interp_p1_f32 v16, v0, 1, 6, [m0] ; C8401900 s_load_dwordx4 s[0:3], s[4:5], 0x8 ; C0800508 s_load_dwordx8 s[40:47], s[6:7], 0x10 ; C0D40710 v_interp_p2_f32 v16, [v16], v1, 1, 6, [m0] ; C8411901 v_interp_p1_f32 v22, v0, 2, 6, [m0] ; C8581A00 v_interp_p2_f32 v22, [v22], v1, 2, 6, [m0] ; C8591A01 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[8:11], 0x4f ; C206094F s_buffer_load_dword s13, s[8:11], 0x50 ; C2068950 s_load_dwordx4 s[28:31], s[4:5], 0xc ; C08E050C s_load_dwordx8 s[32:39], s[6:7], 0x18 ; C0D00718 s_load_dwordx8 s[16:23], s[6:7], 0x20 ; C0C80720 s_load_dwordx4 s[24:27], s[4:5], 0x10 ; C08C0510 image_sample v[26:29], 15, 0, 0, 0, 0, 0, 0, 0, v[17:18], s[40:47], s[0:3] ; F0800F00 000A1A11 s_buffer_load_dword s1, s[8:11], 0x4c ; C200894C s_buffer_load_dword s2, s[8:11], 0x4d ; C201094D s_buffer_load_dword s3, s[8:11], 0x4e ; C201894E s_buffer_load_dword s15, s[8:11], 0x5c ; C207895C s_buffer_load_dword s14, s[8:11], 0x60 ; C2070960 s_buffer_load_dword s0, s[8:11], 0x64 ; C2000964 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v0, s12, v29 ; 10003A0C v_cmp_gt_f32_e32 vcc, s13, v0 ; 7C08000D v_cndmask_b32_e64 v0, 0, -1.0, vcc ; D2000000 01A9E680 v_cmpx_le_f32_e32 vcc, 0, v0 ; 7C260080 s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 v_mul_f32_e32 v13, s1, v26 ; 101A3401 v_mul_f32_e32 v14, s2, v27 ; 101C3602 v_mul_f32_e32 v15, s3, v28 ; 101E3803 image_sample v[0:1], 10, 0, 0, 0, 0, 0, 0, 0, v[17:18], s[32:39], s[28:31] ; F0800A00 00E80011 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, 2.0, v1, -1.0 ; D2820001 03CE02F4 v_mad_f32 v0, 2.0, v0, -1.0 ; D2820000 03CE00F4 v_mul_f32_e32 v1, s15, v1 ; 1002020F v_mul_f32_e32 v0, s15, v0 ; 1000000F v_mul_f32_e32 v2, v2, v1 ; 10040302 v_mac_f32_e32 v2, v6, v0 ; 3E040106 v_mul_f32_e32 v4, v4, v1 ; 10080304 v_mac_f32_e32 v4, v10, v0 ; 3E08010A v_mul_f32_e32 v10, v5, v1 ; 10140305 v_mac_f32_e32 v10, v11, v0 ; 3E14010B v_mul_f32_e32 v0, v0, v0 ; 10000100 v_mac_f32_e32 v0, v1, v1 ; 3E000301 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_sub_f32_e32 v0, 1.0, v0 ; 080000F2 v_sqrt_f32_e32 v0, v0 ; 7E006700 v_mac_f32_e32 v2, v12, v0 ; 3E04010C v_mac_f32_e32 v4, v19, v0 ; 3E080113 v_mac_f32_e32 v10, v20, v0 ; 3E140114 v_mul_f32_e32 v0, v2, v2 ; 10000502 v_mac_f32_e32 v0, v4, v4 ; 3E000904 v_mac_f32_e32 v0, v10, v10 ; 3E00150A v_rsq_clamp_f32_e32 v0, v0 ; 7E005900 v_mul_f32_e32 v1, v23, v23 ; 10022F17 v_mac_f32_e32 v1, v24, v24 ; 3E023118 v_mac_f32_e32 v1, v25, v25 ; 3E023319 v_rsq_clamp_f32_e32 v1, v1 ; 7E025901 v_mul_f32_e32 v6, v0, v2 ; 100C0500 v_mul_f32_e32 v5, v0, v4 ; 100A0900 v_mul_f32_e32 v4, v0, v10 ; 10081500 v_mul_f32_e32 v12, v1, v23 ; 10182F01 v_mul_f32_e32 v11, v1, v24 ; 10163101 v_mul_f32_e32 v0, v12, v6 ; 10000D0C v_mac_f32_e32 v0, v11, v5 ; 3E000B0B v_mul_f32_e32 v10, v1, v25 ; 10143301 v_mac_f32_e32 v0, v10, v4 ; 3E00090A v_mul_f32_e32 v2, v6, v0 ; 10040106 v_mac_f32_e32 v2, v6, v0 ; 3E040106 v_mul_f32_e32 v19, v5, v0 ; 10260105 v_mac_f32_e32 v19, v5, v0 ; 3E260105 v_mad_f32 v23, v23, v1, -v2 ; D2820017 840A0317 v_mad_f32 v24, v24, v1, -v19 ; D2820018 844E0318 s_buffer_load_dword s1, s[8:11], 0x40 ; C2008940 s_buffer_load_dword s2, s[8:11], 0x41 ; C2010941 s_buffer_load_dword s3, s[8:11], 0x42 ; C2018942 v_mul_f32_e32 v2, v4, v0 ; 10040104 v_mac_f32_e32 v2, v4, v0 ; 3E040104 v_mad_f32 v25, v25, v1, -v2 ; D2820019 840A0319 s_buffer_load_dword s12, s[8:11], 0x27 ; C2060927 s_buffer_load_dword s13, s[8:11], 0x2b ; C206892B s_buffer_load_dword s29, s[8:11], 0x2c ; C20E892C s_buffer_load_dword s30, s[8:11], 0x2d ; C20F092D v_sub_f32_e64 v0, 1.0, s14 ; D2080000 00001CF2 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s1, v0 ; 10040001 v_mul_f32_e32 v1, s2, v0 ; 10020002 v_mul_f32_e32 v0, s3, v0 ; 10000003 v_mac_f32_e32 v2, s14, v13 ; 3E041A0E v_mov_b32_e32 v26, v23 ; 7E340317 v_mac_f32_e32 v1, s14, v14 ; 3E021C0E v_mov_b32_e32 v27, v24 ; 7E360318 v_mac_f32_e32 v0, s14, v15 ; 3E001E0E v_mov_b32_e32 v28, v25 ; 7E380319 v_cmp_lt_f32_e64 s[2:3], 0, s13 ; D0020002 00001A80 image_sample v[17:20], 15, 0, 0, 0, 0, 0, 0, 0, v[17:18], s[16:23], s[24:27] ; F0800F00 00C41111 s_waitcnt vmcnt(0) ; BF8C0770 s_and_saveexec_b64 s[20:21], s[2:3] ; BE942402 s_xor_b64 s[20:21], exec, s[20:21] ; 8994147E s_cbranch_execz BB0_2 ; BF880000 s_buffer_load_dword s1, s[8:11], 0x20 ; C2008920 s_buffer_load_dword s2, s[8:11], 0x21 ; C2010921 s_buffer_load_dword s3, s[8:11], 0x22 ; C2018922 s_buffer_load_dword s13, s[8:11], 0x24 ; C2068924 s_buffer_load_dword s15, s[8:11], 0x25 ; C2078925 v_mul_f32_e32 v17, v23, v23 ; 10222F17 v_mac_f32_e32 v17, v24, v24 ; 3E223118 v_mac_f32_e32 v17, v25, v25 ; 3E223319 v_rsq_clamp_f32_e32 v17, v17 ; 7E225911 s_buffer_load_dword s16, s[8:11], 0x26 ; C2080926 s_buffer_load_dword s17, s[8:11], 0x28 ; C2088928 s_buffer_load_dword s18, s[8:11], 0x29 ; C2090929 s_buffer_load_dword s19, s[8:11], 0x2a ; C209892A v_mul_f32_e32 v19, v17, v23 ; 10262F11 v_mul_f32_e32 v20, v17, v24 ; 10283111 v_mul_f32_e32 v17, v17, v25 ; 10223311 v_rcp_f32_e32 v26, v19 ; 7E345513 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v27, s1, v21 ; 08362A01 v_sub_f32_e32 v28, s2, v16 ; 08382002 v_rcp_f32_e32 v29, v20 ; 7E3A5514 v_mul_f32_e32 v27, v26, v27 ; 1036371A v_sub_f32_e32 v30, s13, v21 ; 083C2A0D v_mul_f32_e32 v26, v26, v30 ; 10343D1A v_cmp_lt_f32_e32 vcc, 0, v19 ; 7C022680 v_cndmask_b32_e32 v26, v26, v27 ; 0034371A v_rcp_f32_e32 v27, v17 ; 7E365511 v_mul_f32_e32 v28, v29, v28 ; 1038391D v_sub_f32_e32 v30, s15, v16 ; 083C200F v_mul_f32_e32 v29, v29, v30 ; 103A3D1D v_cmp_lt_f32_e32 vcc, 0, v20 ; 7C022880 v_cndmask_b32_e32 v28, v29, v28 ; 0038391D v_sub_f32_e32 v29, s3, v22 ; 083A2C03 v_mul_f32_e32 v29, v27, v29 ; 103A3B1B v_sub_f32_e32 v30, s16, v22 ; 083C2C10 v_mul_f32_e32 v27, v27, v30 ; 10363D1B v_cmp_lt_f32_e32 vcc, 0, v17 ; 7C022280 v_cndmask_b32_e32 v27, v27, v29 ; 00363B1B v_min3_f32 v26, v26, v28, v27 ; D2A2001A 046E391A v_mov_b32_e32 v27, s13 ; 7E36020D v_add_f32_e32 v27, s1, v27 ; 06363601 v_mov_b32_e32 v28, s15 ; 7E38020F v_add_f32_e32 v28, s2, v28 ; 06383802 v_mov_b32_e32 v29, s16 ; 7E3A0210 v_add_f32_e32 v29, s3, v29 ; 063A3A03 v_mad_f32 v30, 0.5, v27, -s17 ; D282001E 804636F0 v_add_f32_e32 v30, v21, v30 ; 063C3D15 v_mac_f32_e32 v30, v26, v19 ; 3E3C271A v_mad_f32 v19, 0.5, v28, -s18 ; D2820013 804A38F0 v_add_f32_e32 v19, v16, v19 ; 06262710 v_mac_f32_e32 v19, v26, v20 ; 3E26291A v_mad_f32 v20, 0.5, v29, -s19 ; D2820014 804E3AF0 v_add_f32_e32 v20, v22, v20 ; 06282916 v_mac_f32_e32 v20, v26, v17 ; 3E28231A v_mad_f32 v26, 0.5, -v27, v30 ; D282001A 447A36F0 v_mad_f32 v27, 0.5, -v28, v19 ; D282001B 444E38F0 v_mad_f32 v28, 0.5, -v29, v20 ; D282001C 44523AF0 s_or_b64 exec, exec, s[20:21] ; 88FE147E s_buffer_load_dword s15, s[8:11], 0x17 ; C2078917 s_buffer_load_dword s16, s[8:11], 0x43 ; C2080943 s_buffer_load_dword s13, s[8:11], 0x6c ; C206896C s_buffer_load_dword s1, s[8:11], 0x0 ; C2008900 s_buffer_load_dword s2, s[8:11], 0x1 ; C2010901 s_buffer_load_dword s3, s[8:11], 0x2 ; C2018902 s_buffer_load_dword s17, s[8:11], 0x4 ; C2088904 s_buffer_load_dword s18, s[8:11], 0x5 ; C2090905 s_buffer_load_dword s19, s[8:11], 0x6 ; C2098906 s_buffer_load_dword s21, s[8:11], 0x7 ; C20A8907 s_buffer_load_dword s20, s[8:11], 0x8 ; C20A0908 s_buffer_load_dword s22, s[8:11], 0x9 ; C20B0909 s_buffer_load_dword s23, s[8:11], 0xa ; C20B890A s_buffer_load_dword s24, s[8:11], 0xb ; C20C090B s_buffer_load_dword s25, s[8:11], 0xc ; C20C890C s_buffer_load_dword s26, s[8:11], 0xd ; C20D090D s_buffer_load_dword s27, s[8:11], 0xe ; C20D890E s_buffer_load_dword s28, s[8:11], 0xf ; C20E090F v_sub_f32_e64 v17, 1.0, s0 ; D2080011 000000F2 v_log_f32_e32 v17, v17 ; 7E224F11 s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500 s_load_dwordx8 s[36:43], s[6:7], 0x0 ; C0D20700 v_mul_legacy_f32_e32 v17, 0x3f400000, v17 ; 0E2222FF 3F400000 v_exp_f32_e32 v17, v17 ; 7E224B11 v_mul_f32_e32 v29, 0x40e00000, v17 ; 103A22FF 40E00000 v_cubeid_f32 v33, v26, v27, v28 ; D2880021 0472371A v_cubema_f32 v32, v26, v27, v28 ; D28E0020 0472371A v_cubesc_f32 v31, v26, v27, v28 ; D28A001F 0472371A v_cubetc_f32 v30, v26, v27, v28 ; D28C001E 0472371A v_mov_b32_e32 v26, 0x3fc00000 ; 7E3402FF 3FC00000 v_rcp_f32_e64 v17, |v32| ; D3540111 00000120 v_mad_f32 v27, v17, v30, v26 ; D282001B 046A3D11 v_mac_f32_e32 v26, v17, v31 ; 3E343F11 v_mov_b32_e32 v28, v33 ; 7E380321 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[26:29], 15, 0, 0, 0, 0, 0, 0, 0, v[26:29], s[36:43], s[32:35] ; F0900F00 01091A1A s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v17, v29 ; 7E224F1D v_mul_legacy_f32_e32 v17, s30, v17 ; 0E22221E v_exp_f32_e32 v17, v17 ; 7E224B11 v_mul_f32_e32 v17, s29, v17 ; 1022221D v_mul_f32_e32 v20, v26, v17 ; 1028231A v_mul_f32_e32 v19, v27, v17 ; 1026231B v_mul_f32_e32 v17, v28, v17 ; 1022231C v_mov_b32_e32 v27, s14 ; 7E36020E v_mov_b32_e32 v26, 0x3f7fff58 ; 7E3402FF 3F7FFF58 v_cmp_lt_f32_e32 vcc, s12, v26 ; 7C02340C s_and_saveexec_b64 s[30:31], vcc ; BE9E246A s_xor_b64 s[30:31], exec, s[30:31] ; 899E1E7E s_cbranch_execz BB0_6 ; BF880000 s_buffer_load_dword s32, s[8:11], 0x3b ; C210093B s_buffer_load_dword s14, s[8:11], 0x3c ; C207093C s_buffer_load_dword s29, s[8:11], 0x3d ; C20E893D s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_lt_f32_e64 s[32:33], 0, s32 ; D0020020 00004080 s_and_saveexec_b64 s[32:33], s[32:33] ; BEA02420 s_xor_b64 s[32:33], exec, s[32:33] ; 89A0207E s_cbranch_execz BB0_7 ; BF880000 s_buffer_load_dword s34, s[8:11], 0x36 ; C2110936 s_buffer_load_dword s35, s[8:11], 0x38 ; C2118938 s_buffer_load_dword s36, s[8:11], 0x39 ; C2120939 s_buffer_load_dword s37, s[8:11], 0x3a ; C212893A s_buffer_load_dword s38, s[8:11], 0x30 ; C2130930 s_buffer_load_dword s39, s[8:11], 0x31 ; C2138931 s_buffer_load_dword s40, s[8:11], 0x32 ; C2140932 s_buffer_load_dword s41, s[8:11], 0x34 ; C2148934 s_buffer_load_dword s42, s[8:11], 0x35 ; C2150935 v_mul_f32_e32 v26, v23, v23 ; 10342F17 v_mac_f32_e32 v26, v24, v24 ; 3E343118 v_mac_f32_e32 v26, v25, v25 ; 3E343319 v_rsq_clamp_f32_e32 v26, v26 ; 7E34591A s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v28, s34, v22 ; 08382C22 v_mov_b32_e32 v29, s34 ; 7E3A0222 v_sub_f32_e32 v30, s38, v21 ; 083C2A26 v_sub_f32_e32 v31, s39, v16 ; 083E2027 v_add_f32_e32 v29, s40, v29 ; 063A3A28 v_sub_f32_e32 v32, s40, v22 ; 08402C28 v_mad_f32 v33, 0.5, v29, -s37 ; D2820021 80963AF0 v_add_f32_e32 v22, v22, v33 ; 062C4316 v_mul_f32_e32 v23, v26, v23 ; 102E2F1A v_mul_f32_e32 v24, v26, v24 ; 1030311A v_mul_f32_e32 v25, v26, v25 ; 1032331A v_rcp_f32_e32 v26, v23 ; 7E345517 v_rcp_f32_e32 v33, v24 ; 7E425518 v_rcp_f32_e32 v34, v25 ; 7E445519 v_sub_f32_e32 v35, s41, v21 ; 08462A29 v_mov_b32_e32 v36, s41 ; 7E480229 v_add_f32_e32 v36, s38, v36 ; 06484826 v_mul_f32_e32 v30, v26, v30 ; 103C3D1A v_mul_f32_e32 v26, v26, v35 ; 1034471A v_mul_f32_e32 v31, v33, v31 ; 103E3F21 v_mul_f32_e32 v32, v34, v32 ; 10404122 v_mul_f32_e32 v28, v34, v28 ; 10383922 v_mad_f32 v34, 0.5, v36, -s35 ; D2820022 808E48F0 v_add_f32_e32 v21, v21, v34 ; 062A4515 v_sub_f32_e32 v34, s42, v16 ; 0844202A v_mov_b32_e32 v35, s42 ; 7E46022A v_mul_f32_e32 v33, v33, v34 ; 10424521 v_add_f32_e32 v34, s39, v35 ; 06444627 v_cmp_lt_f32_e32 vcc, 0, v23 ; 7C022E80 v_cndmask_b32_e32 v26, v26, v30 ; 00343D1A v_cmp_lt_f32_e32 vcc, 0, v24 ; 7C023080 v_cndmask_b32_e32 v30, v33, v31 ; 003C3F21 v_cmp_lt_f32_e32 vcc, 0, v25 ; 7C023280 v_cndmask_b32_e32 v28, v28, v32 ; 0038411C v_min3_f32 v26, v26, v30, v28 ; D2A2001A 04723D1A v_mad_f32 v28, 0.5, v34, -s36 ; D282001C 809244F0 v_add_f32_e32 v16, v16, v28 ; 06203910 v_mac_f32_e32 v21, v26, v23 ; 3E2A2F1A v_mac_f32_e32 v16, v26, v24 ; 3E20311A v_mac_f32_e32 v22, v26, v25 ; 3E2C331A v_mad_f32 v23, 0.5, -v36, v21 ; D2820017 445648F0 v_mad_f32 v24, 0.5, -v34, v16 ; D2820018 444244F0 v_mad_f32 v25, 0.5, -v29, v22 ; D2820019 445A3AF0 s_or_b64 exec, exec, s[32:33] ; 88FE207E v_sub_f32_e64 v16, 1.0, s0 ; D2080010 000000F2 v_log_f32_e32 v16, v16 ; 7E204F10 s_load_dwordx4 s[32:35], s[4:5], 0x4 ; C0900504 v_mul_legacy_f32_e32 v16, 0x3f400000, v16 ; 0E2020FF 3F400000 v_exp_f32_e32 v16, v16 ; 7E204B10 v_mul_f32_e32 v26, 0x40e00000, v16 ; 103420FF 40E00000 v_cubeid_f32 v31, v23, v24, v25 ; D288001F 04663117 v_cubema_f32 v30, v23, v24, v25 ; D28E001E 04663117 s_load_dwordx8 s[36:43], s[6:7], 0x8 ; C0D20708 v_cubesc_f32 v29, v23, v24, v25 ; D28A001D 04663117 v_cubetc_f32 v28, v23, v24, v25 ; D28C001C 04663117 v_rcp_f32_e64 v16, |v30| ; D3540110 0000011E v_mov_b32_e32 v23, 0x3fc00000 ; 7E2E02FF 3FC00000 v_mad_f32 v24, v16, v28, v23 ; D2820018 045E3910 v_mac_f32_e32 v23, v16, v29 ; 3E2E3B10 v_mov_b32_e32 v25, v31 ; 7E32031F s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[21:24], 15, 0, 0, 0, 0, 0, 0, 0, v[23:26], s[36:43], s[32:35] ; F0900F00 01091517 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v16, v24 ; 7E204F18 v_sub_f32_e64 v24, 1.0, s12 ; D2080018 000018F2 v_mul_legacy_f32_e32 v16, s29, v16 ; 0E20201D v_exp_f32_e32 v16, v16 ; 7E204B10 v_mul_f32_e32 v16, s14, v16 ; 1020200E v_mul_f32_e32 v21, v21, v16 ; 102A2115 v_mul_f32_e32 v22, v22, v16 ; 102C2116 v_mul_f32_e32 v16, v23, v16 ; 10202117 v_mul_f32_e32 v21, v21, v24 ; 102A3115 v_mul_f32_e32 v22, v22, v24 ; 102C3116 v_mul_f32_e32 v16, v16, v24 ; 10203110 v_mac_f32_e32 v21, s12, v20 ; 3E2A280C v_mac_f32_e32 v22, s12, v19 ; 3E2C260C v_mac_f32_e32 v16, s12, v17 ; 3E20220C v_mov_b32_e32 v17, v16 ; 7E220310 v_mov_b32_e32 v19, v22 ; 7E260316 v_mov_b32_e32 v20, v21 ; 7E280315 s_or_b64 exec, exec, s[30:31] ; 88FE1E7E v_mad_f32 v22, -v27, s16, s16 ; D2820016 2040211B v_mov_b32_e32 v16, s15 ; 7E20020F v_mul_f32_e32 v21, v22, v13 ; 102A1B16 v_mul_f32_e32 v14, v22, v14 ; 101C1D16 v_mul_f32_e32 v13, v22, v15 ; 101A1F16 v_mul_f32_e32 v15, s18, v5 ; 101E0A12 v_mac_f32_e32 v15, s17, v6 ; 3E1E0C11 v_mac_f32_e32 v15, s19, v4 ; 3E1E0813 v_add_f32_e32 v15, s21, v15 ; 061E1E15 v_add_f32_e32 v15, v15, v7 ; 061E0F0F v_mul_f32_e32 v7, s22, v5 ; 100E0A16 v_mac_f32_e32 v7, s20, v6 ; 3E0E0C14 v_mac_f32_e32 v7, s23, v4 ; 3E0E0817 v_add_f32_e32 v7, s24, v7 ; 060E0E18 v_add_f32_e32 v8, v7, v8 ; 06101107 v_mul_f32_e32 v7, s26, v5 ; 100E0A1A v_mac_f32_e32 v7, s25, v6 ; 3E0E0C19 v_mac_f32_e32 v7, s27, v4 ; 3E0E081B v_add_f32_e32 v7, s28, v7 ; 060E0E1C v_add_f32_e32 v9, v7, v9 ; 06121307 s_buffer_load_dword s6, s[8:11], 0x10 ; C2030910 s_buffer_load_dword s5, s[8:11], 0x11 ; C2028911 s_buffer_load_dword s4, s[8:11], 0x12 ; C2020912 s_buffer_load_dword s17, s[8:11], 0x16 ; C2088916 s_buffer_load_dword s14, s[8:11], 0x44 ; C2070944 s_buffer_load_dword s7, s[8:11], 0x45 ; C2038945 s_buffer_load_dword s12, s[8:11], 0x46 ; C2060946 s_buffer_load_dword s15, s[8:11], 0x48 ; C2078948 s_buffer_load_dword s16, s[8:11], 0x49 ; C2080949 s_buffer_load_dword s8, s[8:11], 0x4b ; C204094B v_sub_f32_e64 v23, 1.0, s13 ; D2080017 00001AF2 v_mac_f32_e32 v23, s13, v18 ; 3E2E240D v_mul_f32_e32 v7, s1, v6 ; 100E0C01 v_mac_f32_e32 v7, s2, v5 ; 3E0E0A02 v_mac_f32_e32 v7, s3, v4 ; 3E0E0803 v_max_f32_e32 v7, 0, v7 ; 200E0E80 v_mul_f32_e32 v15, v23, v15 ; 101E1F17 v_mul_f32_e32 v8, v23, v8 ; 10101117 v_mul_f32_e32 v9, v23, v9 ; 10121317 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v16, s17, v3 ; 3E200611 v_mul_f32_e32 v3, v23, v20 ; 10062917 v_mul_f32_e32 v18, v23, v19 ; 10242717 v_mul_f32_e32 v17, v23, v17 ; 10222317 v_sub_f32_e32 v19, 1.0, v22 ; 08262CF2 v_add_f32_e32 v19, s0, v19 ; 06262600 v_sub_f32_e64 v20, 1.0, s0 ; D2080014 000000F2 v_add_f32_e64 v19, 0, v19 clamp ; D2060813 00022680 v_sub_f32_e32 v22, s1, v12 ; 082C1801 v_sub_f32_e32 v23, s2, v11 ; 082E1602 v_mul_f32_e32 v24, v22, v22 ; 10302D16 v_mac_f32_e32 v24, v23, v23 ; 3E302F17 v_sub_f32_e32 v25, s3, v10 ; 08321403 v_mac_f32_e32 v24, v25, v25 ; 3E303319 v_rsq_clamp_f32_e32 v24, v24 ; 7E305918 v_mul_f32_e32 v22, v24, v22 ; 102C2D18 v_mul_f32_e32 v23, v24, v23 ; 102E2F18 v_mul_f32_e32 v24, v24, v25 ; 10303318 v_mul_f32_e32 v12, v12, v6 ; 10180D0C v_mad_f32 v11, -v11, v5, -v12 ; D282000B A4320B0B v_mul_f32_e32 v6, v22, v6 ; 100C0D16 v_mac_f32_e32 v6, v23, v5 ; 3E0C0B17 v_mul_f32_e32 v5, s1, v22 ; 100A2C01 v_mac_f32_e32 v5, s2, v23 ; 3E0A2E02 v_mad_f32 v10, -v10, v4, v11 ; D282000A 242E090A v_mac_f32_e32 v5, s3, v24 ; 3E0A3003 v_mac_f32_e32 v6, v24, v4 ; 3E0C0918 v_max_f32_e32 v4, 0, v5 ; 20080A80 v_sub_f32_e32 v5, 1.0, v4 ; 080A08F2 v_mul_f32_e32 v11, v5, v5 ; 10160B05 v_mul_f32_e32 v5, v5, v11 ; 100A1705 v_mul_f32_e32 v5, v5, v11 ; 100A1705 v_max_f32_e32 v10, 0, v10 ; 20141480 v_sub_f32_e32 v11, 1.0, v10 ; 081614F2 v_mul_f32_e32 v12, v11, v11 ; 1018170B v_mul_f32_e32 v22, v11, v12 ; 102C190B v_mad_f32 v23, -v12, v22, 1.0 ; D2820017 23CA2D0C v_mul_f32_e32 v24, v2, v23 ; 10302F02 v_sub_f32_e32 v25, 1.0, v2 ; 083204F2 v_mac_f32_e32 v2, v5, v25 ; 3E043305 v_mul_f32_e32 v25, v1, v23 ; 10322F01 v_sub_f32_e32 v26, 1.0, v1 ; 083402F2 v_mac_f32_e32 v1, v5, v26 ; 3E023505 v_mul_f32_e32 v23, v0, v23 ; 102E2F00 v_sub_f32_e32 v26, 1.0, v0 ; 083400F2 v_mac_f32_e32 v0, v5, v26 ; 3E003505 v_sub_f32_e32 v5, 1.0, v20 ; 080A28F2 v_mov_b32_e32 v26, 0x3cf5c28f ; 7E3402FF 3CF5C28F v_madmk_f32_e32 v5, v5, v26, 0x3f77ced9 ; 400A3505 3F77CED9 v_add_f32_e32 v26, v4, v4 ; 06340904 v_mul_f32_e32 v4, v20, v4 ; 10080914 v_mad_f32 v4, v26, v4, 0.5 ; D2820004 03C2091A v_mul_f32_e32 v12, v22, v12 ; 10181916 v_mac_f32_e32 v24, v19, v12 ; 3E301913 v_mac_f32_e32 v25, v19, v12 ; 3E321913 v_mac_f32_e32 v23, v19, v12 ; 3E2E1913 v_mul_f32_e32 v19, v20, v20 ; 10262914 v_log_f32_e32 v5, v5 ; 7E0A4F05 v_mul_f32_e32 v19, s8, v19 ; 10262608 v_mul_f32_e32 v11, v19, v11 ; 10161713 v_mac_f32_e32 v11, 1.0, v10 ; 3E1614F2 v_rcp_f32_e32 v5, v5 ; 7E0A5505 v_sub_f32_e32 v10, 1.0, v7 ; 08140EF2 v_mul_f32_e32 v19, v19, v10 ; 10261513 v_mac_f32_e32 v19, 1.0, v7 ; 3E260EF2 v_max_f32_e32 v6, 0, v6 ; 200C0C80 v_log_f32_e32 v6, v6 ; 7E0C4F06 v_madak_f32_e32 v11, v19, v11, 0x38d1b717 ; 42161713 38D1B717 v_mul_f32_e32 v5, 0x41200000, v5 ; 100A0AFF 41200000 v_mul_f32_e32 v19, v5, v5 ; 10260B05 v_mul_legacy_f32_e32 v6, v19, v6 ; 0E0C0D13 v_rcp_f32_e32 v11, v11 ; 7E16550B v_mad_f32 v5, v5, v5, 1.0 ; D2820005 03CA0B05 v_mul_f32_e32 v5, s16, v5 ; 100A0A10 v_exp_f32_e32 v6, v6 ; 7E0C4B06 v_mul_f32_e32 v5, v5, v6 ; 100A0D05 v_mul_f32_e32 v5, v5, v11 ; 100A1705 v_mul_f32_e32 v5, v7, v5 ; 100A0B07 v_mul_f32_e32 v5, s15, v5 ; 100A0A0F v_mul_f32_e32 v6, v10, v10 ; 100C150A v_mul_f32_e32 v10, v10, v6 ; 10140D0A v_mul_f32_e32 v6, v10, v6 ; 100C0D0A v_add_f32_e32 v4, -1.0, v4 ; 060808F3 v_mad_f32 v6, v4, v6, 1.0 ; D2820006 03CA0D04 v_mad_f32 v4, v4, v12, 1.0 ; D2820004 03CA1904 v_mul_f32_e32 v4, v4, v6 ; 10080D04 v_mul_f32_e32 v4, v7, v4 ; 10080907 v_mac_f32_e32 v15, s14, v4 ; 3E1E080E v_mul_f32_e32 v6, v15, v21 ; 100C2B0F v_max_f32_e32 v5, 0, v5 ; 200A0A80 v_mul_f32_e32 v7, s14, v5 ; 100E0A0E v_mac_f32_e32 v6, v2, v7 ; 3E0C0F02 v_mac_f32_e32 v8, s7, v4 ; 3E100807 v_mac_f32_e32 v9, s12, v4 ; 3E12080C v_mul_f32_e32 v2, s7, v5 ; 10040A07 v_mul_f32_e32 v4, s12, v5 ; 10080A0C v_mul_f32_e32 v5, v8, v14 ; 100A1D08 v_mul_f32_e32 v7, v9, v13 ; 100E1B09 v_mac_f32_e32 v5, v1, v2 ; 3E0A0501 v_mac_f32_e32 v7, v0, v4 ; 3E0E0900 v_mac_f32_e32 v6, v24, v3 ; 3E0C0718 v_mac_f32_e32 v5, v25, v18 ; 3E0A2519 v_mac_f32_e32 v7, v23, v17 ; 3E0E2317 v_add_f32_e64 v0, 0, v16 clamp ; D2060800 00022080 v_sub_f32_e32 v1, 1.0, v0 ; 080200F2 v_mul_f32_e32 v2, s6, v1 ; 10040206 v_mac_f32_e32 v2, v6, v0 ; 3E040106 v_mul_f32_e32 v3, s5, v1 ; 10060205 v_mac_f32_e32 v3, v5, v0 ; 3E060105 v_mul_f32_e32 v1, s4, v1 ; 10020204 v_mac_f32_e32 v1, v7, v0 ; 3E020107 v_cvt_pkrtz_f16_f32_e32 v0, v2, v3 ; 5E000702 v_cvt_pkrtz_f16_f32_e64 v1, v1, 1.0 ; D25E0001 0001E501 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 56 VGPRS: 40 Code Size: 2312 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL OUT[5], GENERIC[4] DCL OUT[6], GENERIC[5] DCL CONST[0..20] DCL TEMP[0..7], LOCAL IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MUL TEMP[0], CONST[2], IN[0].xxxx 1: MAD TEMP[0], CONST[3], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[4], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0].xyz, CONST[5], IN[0].wwww, TEMP[0] 4: MUL TEMP[1], CONST[17], IN[0].xxxx 5: MAD TEMP[1], CONST[18], IN[0].yyyy, TEMP[1] 6: MAD TEMP[1], CONST[19], IN[0].zzzz, TEMP[1] 7: MAD TEMP[1], CONST[20], IN[0].wwww, TEMP[1] 8: MAD TEMP[2].xy, IN[2].xyyy, CONST[10].xyyy, CONST[10].zwww 9: FSEQ TEMP[3].x, CONST[12].xxxx, IMM[0].xxxx 10: UIF TEMP[3].xxxx :0 11: MOV TEMP[3].xy, IN[2].xyxx 12: ELSE :0 13: MOV TEMP[3].xy, IN[3].xyxx 14: ENDIF 15: MAD TEMP[3].xy, TEMP[3].xyyy, CONST[11].xyyy, CONST[11].zwww 16: MOV TEMP[2].zw, TEMP[3].yyxy 17: MOV TEMP[3].x, CONST[6].xxxx 18: MOV TEMP[3].y, CONST[7].xxxx 19: MOV TEMP[3].z, CONST[8].xxxx 20: MOV TEMP[4].x, CONST[6].yyyy 21: MOV TEMP[4].y, CONST[7].yyyy 22: MOV TEMP[4].z, CONST[8].yyyy 23: MOV TEMP[5].x, CONST[6].zzzz 24: MOV TEMP[5].y, CONST[7].zzzz 25: MOV TEMP[5].z, CONST[8].zzzz 26: MUL TEMP[3].xyz, TEMP[3].xyzz, IN[1].xxxx 27: MAD TEMP[3].xyz, TEMP[4].xyzz, IN[1].yyyy, TEMP[3].xyzz 28: MAD TEMP[3].xyz, TEMP[5].xyzz, IN[1].zzzz, TEMP[3].xyzz 29: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz 30: RSQ TEMP[4].x, TEMP[4].xxxx 31: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx 32: MUL TEMP[4].xyz, CONST[2].xyzz, IN[4].xxxx 33: MAD TEMP[4].xyz, CONST[3].xyzz, IN[4].yyyy, TEMP[4].xyzz 34: MAD TEMP[4].xyz, CONST[4].xyzz, IN[4].zzzz, TEMP[4].xyzz 35: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz 36: RSQ TEMP[5].x, TEMP[5].xxxx 37: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx 38: MUL TEMP[5].xyz, TEMP[3].zxyy, TEMP[4].yzxx 39: MAD TEMP[5].xyz, TEMP[3].yzxx, TEMP[4].zxyy, -TEMP[5].xyzz 40: MUL TEMP[5].xyz, TEMP[5].xyzz, IN[4].wwww 41: MOV TEMP[4].xyz, TEMP[4].xyzx 42: MOV TEMP[5].xyz, TEMP[5].xyzx 43: MOV TEMP[3].xyz, TEMP[3].xyzx 44: MUL TEMP[6].xyz, TEMP[0].xyzz, CONST[1].wwww 45: ADD TEMP[6].xyz, CONST[1].xyzz, -TEMP[6].xyzz 46: MOV TEMP[4].w, TEMP[6].xxxx 47: MOV TEMP[5].w, TEMP[6].yyyy 48: MOV TEMP[3].w, TEMP[6].zzzz 49: MUL TEMP[6], CONST[2], IN[0].xxxx 50: MAD TEMP[6], CONST[3], IN[0].yyyy, TEMP[6] 51: MAD TEMP[6], CONST[4], IN[0].zzzz, TEMP[6] 52: MAD TEMP[6], CONST[5], IN[0].wwww, TEMP[6] 53: ADD TEMP[0].xyz, TEMP[0].xyzz, -CONST[0].xyzz 54: MOV TEMP[0].yzw, TEMP[0].yxyz 55: MUL TEMP[7], CONST[13], TEMP[6].xxxx 56: MAD TEMP[7], CONST[14], TEMP[6].yyyy, TEMP[7] 57: MAD TEMP[7], CONST[15], TEMP[6].zzzz, TEMP[7] 58: MAD TEMP[6].xyz, CONST[16], TEMP[6].wwww, TEMP[7] 59: MOV TEMP[6].xyz, TEMP[6].xyzx 60: MOV TEMP[0].x, TEMP[1].zzzz 61: MOV OUT[1], TEMP[2] 62: MOV OUT[3], TEMP[5] 63: MOV OUT[2], TEMP[4] 64: MOV OUT[4], TEMP[3] 65: MOV OUT[0], TEMP[1] 66: MOV OUT[6], TEMP[6] 67: MOV OUT[5], TEMP[0] 68: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 172) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248) %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256) %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260) %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264) %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272) %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276) %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280) %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284) %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288) %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292) %72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296) %73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300) %74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304) %75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308) %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312) %77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316) %78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320) %79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 324) %80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 328) %81 = call float @llvm.SI.load.const(<16 x i8> %12, i32 332) %82 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %83 = load <16 x i8>, <16 x i8> addrspace(2)* %82, align 16, !tbaa !0 %84 = add i32 %5, %7 %85 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %83, i32 0, i32 %84) %86 = extractelement <4 x float> %85, i32 0 %87 = extractelement <4 x float> %85, i32 1 %88 = extractelement <4 x float> %85, i32 2 %89 = extractelement <4 x float> %85, i32 3 %90 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %91 = load <16 x i8>, <16 x i8> addrspace(2)* %90, align 16, !tbaa !0 %92 = add i32 %5, %7 %93 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %91, i32 0, i32 %92) %94 = extractelement <4 x float> %93, i32 0 %95 = extractelement <4 x float> %93, i32 1 %96 = extractelement <4 x float> %93, i32 2 %97 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %98 = load <16 x i8>, <16 x i8> addrspace(2)* %97, align 16, !tbaa !0 %99 = add i32 %5, %7 %100 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %98, i32 0, i32 %99) %101 = extractelement <4 x float> %100, i32 0 %102 = extractelement <4 x float> %100, i32 1 %103 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %104 = load <16 x i8>, <16 x i8> addrspace(2)* %103, align 16, !tbaa !0 %105 = add i32 %5, %7 %106 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %104, i32 0, i32 %105) %107 = extractelement <4 x float> %106, i32 0 %108 = extractelement <4 x float> %106, i32 1 %109 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4 %110 = load <16 x i8>, <16 x i8> addrspace(2)* %109, align 16, !tbaa !0 %111 = add i32 %5, %7 %112 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %110, i32 0, i32 %111) %113 = extractelement <4 x float> %112, i32 0 %114 = extractelement <4 x float> %112, i32 1 %115 = extractelement <4 x float> %112, i32 2 %116 = extractelement <4 x float> %112, i32 3 %117 = fmul float %20, %86 %118 = fmul float %21, %86 %119 = fmul float %22, %86 %120 = fmul float %24, %87 %121 = fadd float %120, %117 %122 = fmul float %25, %87 %123 = fadd float %122, %118 %124 = fmul float %26, %87 %125 = fadd float %124, %119 %126 = fmul float %28, %88 %127 = fadd float %126, %121 %128 = fmul float %29, %88 %129 = fadd float %128, %123 %130 = fmul float %30, %88 %131 = fadd float %130, %125 %132 = fmul float %32, %89 %133 = fadd float %132, %127 %134 = fmul float %33, %89 %135 = fadd float %134, %129 %136 = fmul float %34, %89 %137 = fadd float %136, %131 %138 = fmul float %66, %86 %139 = fmul float %67, %86 %140 = fmul float %68, %86 %141 = fmul float %69, %86 %142 = fmul float %70, %87 %143 = fadd float %142, %138 %144 = fmul float %71, %87 %145 = fadd float %144, %139 %146 = fmul float %72, %87 %147 = fadd float %146, %140 %148 = fmul float %73, %87 %149 = fadd float %148, %141 %150 = fmul float %74, %88 %151 = fadd float %150, %143 %152 = fmul float %75, %88 %153 = fadd float %152, %145 %154 = fmul float %76, %88 %155 = fadd float %154, %147 %156 = fmul float %77, %88 %157 = fadd float %156, %149 %158 = fmul float %78, %89 %159 = fadd float %158, %151 %160 = fmul float %79, %89 %161 = fadd float %160, %153 %162 = fmul float %80, %89 %163 = fadd float %162, %155 %164 = fmul float %81, %89 %165 = fadd float %164, %157 %166 = fmul float %101, %45 %167 = fadd float %166, %47 %168 = fmul float %102, %46 %169 = fadd float %168, %48 %170 = fcmp oeq float %53, 0.000000e+00 %. = select i1 %170, float %101, float %107 %.32 = select i1 %170, float %102, float %108 %171 = fmul float %., %49 %172 = fadd float %171, %51 %173 = fmul float %.32, %50 %174 = fadd float %173, %52 %175 = fmul float %36, %94 %176 = fmul float %39, %94 %177 = fmul float %42, %94 %178 = fmul float %37, %95 %179 = fadd float %178, %175 %180 = fmul float %40, %95 %181 = fadd float %180, %176 %182 = fmul float %43, %95 %183 = fadd float %182, %177 %184 = fmul float %38, %96 %185 = fadd float %184, %179 %186 = fmul float %41, %96 %187 = fadd float %186, %181 %188 = fmul float %44, %96 %189 = fadd float %188, %183 %190 = fmul float %185, %185 %191 = fmul float %187, %187 %192 = fadd float %191, %190 %193 = fmul float %189, %189 %194 = fadd float %192, %193 %195 = call float @llvm.AMDGPU.rsq.clamped.f32(float %194) %196 = fmul float %185, %195 %197 = fmul float %187, %195 %198 = fmul float %189, %195 %199 = fmul float %20, %113 %200 = fmul float %21, %113 %201 = fmul float %22, %113 %202 = fmul float %24, %114 %203 = fadd float %202, %199 %204 = fmul float %25, %114 %205 = fadd float %204, %200 %206 = fmul float %26, %114 %207 = fadd float %206, %201 %208 = fmul float %28, %115 %209 = fadd float %208, %203 %210 = fmul float %29, %115 %211 = fadd float %210, %205 %212 = fmul float %30, %115 %213 = fadd float %212, %207 %214 = fmul float %209, %209 %215 = fmul float %211, %211 %216 = fadd float %215, %214 %217 = fmul float %213, %213 %218 = fadd float %216, %217 %219 = call float @llvm.AMDGPU.rsq.clamped.f32(float %218) %220 = fmul float %209, %219 %221 = fmul float %211, %219 %222 = fmul float %213, %219 %223 = fmul float %198, %221 %224 = fmul float %196, %222 %225 = fmul float %197, %220 %226 = fmul float %197, %222 %227 = fsub float %226, %223 %228 = fmul float %198, %220 %229 = fsub float %228, %224 %230 = fmul float %196, %221 %231 = fsub float %230, %225 %232 = fmul float %227, %116 %233 = fmul float %229, %116 %234 = fmul float %231, %116 %235 = fmul float %133, %19 %236 = fmul float %135, %19 %237 = fmul float %137, %19 %238 = fsub float %16, %235 %239 = fsub float %17, %236 %240 = fsub float %18, %237 %241 = fmul float %20, %86 %242 = fmul float %21, %86 %243 = fmul float %22, %86 %244 = fmul float %23, %86 %245 = fmul float %24, %87 %246 = fadd float %245, %241 %247 = fmul float %25, %87 %248 = fadd float %247, %242 %249 = fmul float %26, %87 %250 = fadd float %249, %243 %251 = fmul float %27, %87 %252 = fadd float %251, %244 %253 = fmul float %28, %88 %254 = fadd float %253, %246 %255 = fmul float %29, %88 %256 = fadd float %255, %248 %257 = fmul float %30, %88 %258 = fadd float %257, %250 %259 = fmul float %31, %88 %260 = fadd float %259, %252 %261 = fmul float %32, %89 %262 = fadd float %261, %254 %263 = fmul float %33, %89 %264 = fadd float %263, %256 %265 = fmul float %34, %89 %266 = fadd float %265, %258 %267 = fmul float %35, %89 %268 = fadd float %267, %260 %269 = fsub float %133, %13 %270 = fsub float %135, %14 %271 = fsub float %137, %15 %272 = fmul float %54, %262 %273 = fmul float %55, %262 %274 = fmul float %56, %262 %275 = fmul float %57, %264 %276 = fadd float %275, %272 %277 = fmul float %58, %264 %278 = fadd float %277, %273 %279 = fmul float %59, %264 %280 = fadd float %279, %274 %281 = fmul float %60, %266 %282 = fadd float %281, %276 %283 = fmul float %61, %266 %284 = fadd float %283, %278 %285 = fmul float %62, %266 %286 = fadd float %285, %280 %287 = fmul float %63, %268 %288 = fadd float %287, %282 %289 = fmul float %64, %268 %290 = fadd float %289, %284 %291 = fmul float %65, %268 %292 = fadd float %291, %286 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %167, float %169, float %172, float %174) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %220, float %221, float %222, float %238) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %232, float %233, float %234, float %239) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %196, float %197, float %198, float %240) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %163, float %269, float %270, float %271) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %288, float %290, float %292, float %268) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %159, float %161, float %163, float %165) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 s_load_dwordx4 s[20:23], s[8:9], 0xc ; C08A090C v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[8:11], s[8:9], 0x10 ; C0840910 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[8:11], v0, s[16:19], 0 idxen ; E00C2000 80040800 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[10:13], v0, s[20:23], 0 idxen ; E00C2000 80050A00 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[12:15], v0, s[8:11], 0 idxen ; E00C2000 80020C00 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x30 ; C2020130 s_buffer_load_dword s5, s[0:3], 0x2a ; C202812A s_buffer_load_dword s6, s[0:3], 0x28 ; C2030128 s_buffer_load_dword s7, s[0:3], 0x34 ; C2038134 s_buffer_load_dword s8, s[0:3], 0x35 ; C2040135 s_buffer_load_dword s9, s[0:3], 0x36 ; C2048136 s_buffer_load_dword s10, s[0:3], 0x38 ; C2050138 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_eq_f32_e64 vcc, 0, s4 ; D004006A 00000880 v_cndmask_b32_e32 v0, v10, v8 ; 0000110A v_cndmask_b32_e32 v10, v11, v9 ; 0014130B v_mov_b32_e32 v11, s5 ; 7E160205 s_buffer_load_dword s4, s[0:3], 0x29 ; C2020129 s_buffer_load_dword s5, s[0:3], 0x2b ; C202812B v_mac_f32_e32 v11, s6, v8 ; 3E161006 s_buffer_load_dword s6, s[0:3], 0x18 ; C2030118 s_buffer_load_dword s11, s[0:3], 0x19 ; C2058119 s_buffer_load_dword s12, s[0:3], 0x1c ; C206011C s_buffer_load_dword s13, s[0:3], 0x1d ; C206811D s_buffer_load_dword s14, s[0:3], 0x20 ; C2070120 s_buffer_load_dword s15, s[0:3], 0x2c ; C207812C s_buffer_load_dword s16, s[0:3], 0x2d ; C208012D s_buffer_load_dword s17, s[0:3], 0x2e ; C208812E s_buffer_load_dword s18, s[0:3], 0x2f ; C209012F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v8, s5 ; 7E100205 s_buffer_load_dword s5, s[0:3], 0x21 ; C2028121 v_mac_f32_e32 v8, s4, v9 ; 3E101204 v_mul_f32_e32 v9, s6, v5 ; 10120A06 v_mac_f32_e32 v9, s11, v6 ; 3E120C0B s_buffer_load_dword s4, s[0:3], 0x1a ; C202011A s_buffer_load_dword s6, s[0:3], 0x1e ; C203011E v_mul_f32_e32 v16, s12, v5 ; 10200A0C v_mac_f32_e32 v16, s13, v6 ; 3E200C0D v_mul_f32_e32 v5, s14, v5 ; 100A0A0E s_buffer_load_dword s11, s[0:3], 0x22 ; C2058122 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v5, s5, v6 ; 3E0A0C05 s_buffer_load_dword s5, s[0:3], 0x44 ; C2028144 s_buffer_load_dword s12, s[0:3], 0x48 ; C2060148 s_buffer_load_dword s13, s[0:3], 0x45 ; C2068145 s_buffer_load_dword s14, s[0:3], 0x49 ; C2070149 v_mac_f32_e32 v9, s4, v7 ; 3E120E04 s_buffer_load_dword s4, s[0:3], 0x46 ; C2020146 s_buffer_load_dword s19, s[0:3], 0x4a ; C209814A s_buffer_load_dword s20, s[0:3], 0x47 ; C20A0147 v_mac_f32_e32 v16, s6, v7 ; 3E200E06 v_mac_f32_e32 v5, s11, v7 ; 3E0A0E0B s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s5, v1 ; 100C0205 v_mac_f32_e32 v6, s12, v2 ; 3E0C040C v_mul_f32_e32 v7, s13, v1 ; 100E020D v_mac_f32_e32 v7, s14, v2 ; 3E0E040E s_buffer_load_dword s5, s[0:3], 0x4b ; C202814B v_mul_f32_e32 v17, s4, v1 ; 10220204 v_mac_f32_e32 v17, s19, v2 ; 3E220413 v_mul_f32_e32 v18, s20, v1 ; 10240214 s_buffer_load_dword s4, s[0:3], 0xb ; C202010B s_buffer_load_dword s6, s[0:3], 0xf ; C203010F s_buffer_load_dword s11, s[0:3], 0x4c ; C205814C s_buffer_load_dword s12, s[0:3], 0x4d ; C206014D s_buffer_load_dword s13, s[0:3], 0x4e ; C206814E s_buffer_load_dword s14, s[0:3], 0x4f ; C207014F s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v18, s5, v2 ; 3E240405 s_buffer_load_dword s5, s[0:3], 0xc ; C202810C s_buffer_load_dword s19, s[0:3], 0xd ; C209810D s_buffer_load_dword s20, s[0:3], 0xe ; C20A010E v_mul_f32_e32 v19, s4, v1 ; 10260204 s_buffer_load_dword s4, s[0:3], 0x13 ; C2020113 v_mac_f32_e32 v19, s6, v2 ; 3E260406 v_mac_f32_e32 v6, s11, v3 ; 3E0C060B s_buffer_load_dword s6, s[0:3], 0x8 ; C2030108 v_mac_f32_e32 v7, s12, v3 ; 3E0E060C v_mac_f32_e32 v17, s13, v3 ; 3E22060D s_buffer_load_dword s11, s[0:3], 0x10 ; C2058110 s_buffer_load_dword s12, s[0:3], 0x14 ; C2060114 v_mac_f32_e32 v18, s14, v3 ; 3E24060E s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v19, s4, v3 ; 3E260604 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A v_mul_f32_e32 v20, s6, v1 ; 10280206 v_mac_f32_e32 v20, s5, v2 ; 3E280405 s_buffer_load_dword s21, s[0:3], 0x12 ; C20A8112 v_mac_f32_e32 v20, s11, v3 ; 3E28060B v_mac_f32_e32 v20, s12, v4 ; 3E28080C s_buffer_load_dword s12, s[0:3], 0x15 ; C2060115 v_mul_f32_e32 v21, s13, v1 ; 102A020D v_mac_f32_e32 v21, s19, v2 ; 3E2A0413 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v21, s4, v3 ; 3E2A0604 s_buffer_load_dword s22, s[0:3], 0x16 ; C20B0116 s_buffer_load_dword s23, s[0:3], 0x50 ; C20B8150 s_buffer_load_dword s24, s[0:3], 0x51 ; C20C0151 s_buffer_load_dword s25, s[0:3], 0x52 ; C20C8152 s_buffer_load_dword s26, s[0:3], 0x53 ; C20D0153 s_buffer_load_dword s27, s[0:3], 0x17 ; C20D8117 v_mac_f32_e32 v21, s12, v4 ; 3E2A080C v_mul_f32_e32 v1, s14, v1 ; 1002020E v_mac_f32_e32 v1, s20, v2 ; 3E020414 v_mac_f32_e32 v1, s21, v3 ; 3E020615 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v1, s22, v4 ; 3E020816 v_mac_f32_e32 v6, s23, v4 ; 3E0C0817 v_mac_f32_e32 v7, s24, v4 ; 3E0E0818 v_mac_f32_e32 v17, s25, v4 ; 3E220819 v_mac_f32_e32 v18, s26, v4 ; 3E24081A v_mac_f32_e32 v19, s27, v4 ; 3E26081B v_mov_b32_e32 v2, s17 ; 7E040211 v_mac_f32_e32 v2, s15, v0 ; 3E04000F v_mov_b32_e32 v0, s18 ; 7E000212 v_mac_f32_e32 v0, s16, v10 ; 3E001410 exp 15, 32, 0, 0, 0, v11, v8, v2, v0 ; F800020F 0002080B s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s6, v12 ; 10001806 v_mac_f32_e32 v0, s5, v13 ; 3E001A05 v_mul_f32_e32 v2, s13, v12 ; 1004180D v_mac_f32_e32 v2, s19, v13 ; 3E041A13 v_mul_f32_e32 v3, s14, v12 ; 1006180E v_mac_f32_e32 v3, s20, v13 ; 3E061A14 v_mac_f32_e32 v0, s11, v14 ; 3E001C0B v_mac_f32_e32 v2, s4, v14 ; 3E041C04 v_mac_f32_e32 v3, s21, v14 ; 3E061C15 v_mul_f32_e32 v4, v9, v9 ; 10081309 v_mac_f32_e32 v4, v16, v16 ; 3E082110 v_mul_f32_e32 v8, v0, v0 ; 10100100 v_mac_f32_e32 v8, v2, v2 ; 3E100502 v_mac_f32_e32 v4, v5, v5 ; 3E080B05 v_rsq_clamp_f32_e32 v4, v4 ; 7E085904 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_buffer_load_dword s5, s[0:3], 0x7 ; C2028107 v_mac_f32_e32 v8, v3, v3 ; 3E100703 v_rsq_clamp_f32_e32 v8, v8 ; 7E105908 v_mul_f32_e32 v9, v4, v9 ; 10121304 v_mul_f32_e32 v10, v4, v16 ; 10142104 v_mul_f32_e32 v4, v4, v5 ; 10080B04 v_mul_f32_e32 v0, v8, v0 ; 10000108 v_mul_f32_e32 v2, v8, v2 ; 10040508 v_mul_f32_e32 v3, v8, v3 ; 10060708 s_buffer_load_dword s6, s[0:3], 0x5 ; C2030105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v5, s4 ; 7E0A0204 v_mad_f32 v5, -v20, s5, v5 ; D2820005 24140B14 exp 15, 33, 0, 0, 0, v0, v2, v3, v5 ; F800021F 05030200 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v5, v2, v4 ; 100A0902 v_mad_f32 v5, v10, v3, -v5 ; D2820005 8416070A v_mul_f32_e32 v3, v3, v9 ; 10061303 v_mad_f32 v3, v4, v0, -v3 ; D2820003 840E0104 v_mul_f32_e32 v0, v0, v10 ; 10001500 v_mad_f32 v0, v9, v2, -v0 ; D2820000 84020509 v_mul_f32_e32 v2, v15, v5 ; 10040B0F v_mul_f32_e32 v3, v15, v3 ; 1006070F v_mul_f32_e32 v0, v15, v0 ; 1000010F v_mov_b32_e32 v5, s6 ; 7E0A0206 v_mad_f32 v5, -v21, s5, v5 ; D2820005 24140B15 exp 15, 34, 0, 0, 0, v2, v3, v0, v5 ; F800022F 05000302 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_buffer_load_dword s6, s[0:3], 0x0 ; C2030100 s_buffer_load_dword s11, s[0:3], 0x1 ; C2058101 s_buffer_load_dword s12, s[0:3], 0x2 ; C2060102 s_buffer_load_dword s13, s[0:3], 0x40 ; C2068140 s_buffer_load_dword s14, s[0:3], 0x41 ; C2070141 s_buffer_load_dword s15, s[0:3], 0x42 ; C2078142 s_buffer_load_dword s16, s[0:3], 0x39 ; C2080139 s_buffer_load_dword s17, s[0:3], 0x3a ; C208813A s_buffer_load_dword s18, s[0:3], 0x3c ; C209013C s_buffer_load_dword s19, s[0:3], 0x3d ; C209813D s_buffer_load_dword s0, s[0:3], 0x3e ; C200013E s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mov_b32_e32 v0, s4 ; 7E000204 v_mad_f32 v0, -v1, s5, v0 ; D2820000 24000B01 v_subrev_f32_e32 v2, s6, v20 ; 0A042806 v_mul_f32_e32 v3, s7, v20 ; 10062807 v_mul_f32_e32 v5, s8, v20 ; 100A2808 v_mul_f32_e32 v8, s9, v20 ; 10102809 v_mac_f32_e32 v3, s10, v21 ; 3E062A0A v_mac_f32_e32 v5, s16, v21 ; 3E0A2A10 v_mac_f32_e32 v8, s17, v21 ; 3E102A11 v_subrev_f32_e32 v11, s11, v21 ; 0A162A0B v_mac_f32_e32 v3, s18, v1 ; 3E060212 v_mac_f32_e32 v5, s19, v1 ; 3E0A0213 v_mac_f32_e32 v8, s0, v1 ; 3E100200 v_subrev_f32_e32 v1, s12, v1 ; 0A02020C v_mac_f32_e32 v3, s13, v19 ; 3E06260D v_mac_f32_e32 v5, s14, v19 ; 3E0A260E v_mac_f32_e32 v8, s15, v19 ; 3E10260F exp 15, 35, 0, 0, 0, v9, v10, v4, v0 ; F800023F 00040A09 exp 15, 36, 0, 0, 0, v17, v2, v11, v1 ; F800024F 010B0211 exp 15, 37, 0, 0, 0, v3, v5, v8, v19 ; F800025F 13080503 exp 15, 12, 0, 1, 0, v6, v7, v17, v18 ; F80008CF 12110706 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 24 Code Size: 920 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL IN[4], GENERIC[4], PERSPECTIVE DCL IN[5], GENERIC[5], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL CONST[0..5] DCL CONST[8..10] DCL TEMP[0..13], LOCAL IMM[0] FLT32 { 1.0000, 2.0000, -1.0000, 0.0000} IMM[1] FLT32 { 10.0000, 0.9680, 0.0300, 0.5000} IMM[2] FLT32 { 0.0001, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].x, IN[1].xxxx 1: MOV TEMP[0].y, IN[2].xxxx 2: MOV TEMP[0].z, IN[3].xxxx 3: MOV TEMP[1].x, IN[1].yyyy 4: MOV TEMP[1].y, IN[2].yyyy 5: MOV TEMP[1].z, IN[3].yyyy 6: MOV TEMP[2].x, IN[1].zzzz 7: MOV TEMP[2].y, IN[2].zzzz 8: MOV TEMP[2].z, IN[3].zzzz 9: MOV TEMP[3].xy, IN[0].xyyy 10: TEX TEMP[3], TEMP[3], SAMP[0], 2D 11: MUL TEMP[4].x, TEMP[3].wwww, CONST[4].wwww 12: FSLT TEMP[4].x, TEMP[4].xxxx, CONST[5].xxxx 13: AND TEMP[4].x, TEMP[4].xxxx, IMM[0].xxxx 14: KILL_IF -TEMP[4].xxxx 15: MOV TEMP[4].xy, IN[0].xyyy 16: TEX TEMP[4].yw, TEMP[4], SAMP[1], 2D 17: MAD TEMP[4].xy, TEMP[4].wyyy, IMM[0].yyyy, IMM[0].zzzz 18: MUL TEMP[4].xy, TEMP[4].xyyy, CONST[8].xxxx 19: DP2 TEMP[5].x, TEMP[4].xyyy, TEMP[4].xyyy 20: MOV_SAT TEMP[5].x, TEMP[5].xxxx 21: ADD TEMP[5].x, IMM[0].xxxx, -TEMP[5].xxxx 22: SQRT TEMP[5].x, TEMP[5].xxxx 23: MOV TEMP[4].z, TEMP[5].xxxx 24: DP3 TEMP[0].x, TEMP[4].xyzz, TEMP[0].xyzz 25: DP3 TEMP[1].x, TEMP[4].xyzz, TEMP[1].xyzz 26: MOV TEMP[0].y, TEMP[1].xxxx 27: DP3 TEMP[1].x, TEMP[4].xyzz, TEMP[2].xyzz 28: MOV TEMP[0].z, TEMP[1].xxxx 29: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[0].xyzz 30: RSQ TEMP[1].x, TEMP[1].xxxx 31: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xxxx 32: MUL TEMP[1].xyz, CONST[4].xyzz, TEMP[3].xyzz 33: LRP TEMP[2].xyz, CONST[9].xxxx, TEMP[1].xyzz, CONST[1].xyzz 34: MOV TEMP[3].x, IN[1].wwww 35: MOV TEMP[3].y, IN[2].wwww 36: MOV TEMP[3].z, IN[3].wwww 37: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz 38: RSQ TEMP[4].x, TEMP[4].xxxx 39: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx 40: DP3 TEMP[4].x, TEMP[0].xyzz, TEMP[3].xyzz 41: MAX TEMP[4].x, IMM[0].wwww, TEMP[4].xxxx 42: DP3 TEMP[5].x, IN[5].xyzz, IN[5].xyzz 43: MOV TEMP[5].xy, TEMP[5].xxxx 44: TEX TEMP[5].w, TEMP[5], SAMP[2], 2D 45: MUL TEMP[5].xyz, CONST[2].xyzz, TEMP[5].wwww 46: DP3 TEMP[6].x, IN[4].yzww, IN[4].yzww 47: RSQ TEMP[6].x, TEMP[6].xxxx 48: MUL TEMP[6].xyz, IN[4].yzww, TEMP[6].xxxx 49: MOV TEMP[6].xyz, -TEMP[6].xyzx 50: ADD TEMP[7].x, IMM[0].xxxx, -CONST[10].xxxx 51: ADD TEMP[8].xyz, TEMP[3].xyzz, TEMP[6].xyzz 52: DP3 TEMP[9].x, TEMP[8].xyzz, TEMP[8].xyzz 53: RSQ TEMP[9].x, TEMP[9].xxxx 54: MUL TEMP[8].xyz, TEMP[8].xyzz, TEMP[9].xxxx 55: DP3 TEMP[6].x, TEMP[0].xyzz, TEMP[6].xyzz 56: MAX TEMP[6].x, IMM[0].wwww, TEMP[6].xxxx 57: DP3 TEMP[3].x, TEMP[3].xyzz, TEMP[8].xyzz 58: MAX TEMP[3].x, IMM[0].wwww, TEMP[3].xxxx 59: MUL TEMP[9].x, TEMP[7].xxxx, TEMP[7].xxxx 60: MUL TEMP[9].x, TEMP[9].xxxx, CONST[3].wwww 61: ADD TEMP[10].x, IMM[0].xxxx, -TEMP[7].xxxx 62: MAD TEMP[10].x, TEMP[10].xxxx, IMM[1].yyyy, IMM[1].zzzz 63: LG2 TEMP[10].x, TEMP[10].xxxx 64: RCP TEMP[10].x, TEMP[10].xxxx 65: MUL TEMP[10].x, IMM[1].xxxx, TEMP[10].xxxx 66: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[10].xxxx 67: ADD TEMP[11].x, IMM[0].xxxx, -TEMP[4].xxxx 68: ADD TEMP[12].x, IMM[0].xxxx, -TEMP[6].xxxx 69: MUL TEMP[13].x, IMM[0].yyyy, TEMP[3].xxxx 70: MUL TEMP[7].x, TEMP[3].xxxx, TEMP[7].xxxx 71: MAD TEMP[7].x, TEMP[13].xxxx, TEMP[7].xxxx, IMM[1].wwww 72: ADD TEMP[3].x, IMM[0].xxxx, -TEMP[3].xxxx 73: LRP TEMP[13].x, TEMP[4].xxxx, IMM[0].xxxx, TEMP[9].xxxx 74: LRP TEMP[6].x, TEMP[6].xxxx, IMM[0].xxxx, TEMP[9].xxxx 75: MAD TEMP[6].x, TEMP[13].xxxx, TEMP[6].xxxx, IMM[2].xxxx 76: RCP TEMP[6].x, TEMP[6].xxxx 77: DP3 TEMP[8].x, TEMP[0].xyzz, TEMP[8].xyzz 78: MAX TEMP[8].x, IMM[0].wwww, TEMP[8].xxxx 79: POW TEMP[8].x, TEMP[8].xxxx, TEMP[10].xxxx 80: ADD TEMP[9].x, TEMP[10].xxxx, IMM[0].xxxx 81: MUL TEMP[9].x, TEMP[9].xxxx, CONST[3].yyyy 82: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[9].xxxx 83: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[8].xxxx 84: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[4].xxxx 85: MUL TEMP[6].x, TEMP[6].xxxx, CONST[3].xxxx 86: MAX TEMP[6].x, IMM[0].wwww, TEMP[6].xxxx 87: MUL TEMP[6].xyz, TEMP[6].xxxx, TEMP[5].xyzz 88: ADD TEMP[8].xyz, IMM[0].xxxx, -TEMP[2].xyzz 89: MUL TEMP[9].x, TEMP[3].xxxx, TEMP[3].xxxx 90: MUL TEMP[10].x, TEMP[3].xxxx, TEMP[3].xxxx 91: MUL TEMP[3].x, TEMP[10].xxxx, TEMP[3].xxxx 92: MUL TEMP[3].x, TEMP[9].xxxx, TEMP[3].xxxx 93: MAD TEMP[2].xyz, TEMP[8].xyzz, TEMP[3].xxxx, TEMP[2].xyzz 94: MUL TEMP[3].x, CONST[9].xxxx, CONST[1].wwww 95: ADD TEMP[3].x, CONST[1].wwww, -TEMP[3].xxxx 96: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xxxx 97: ADD TEMP[3].x, TEMP[7].xxxx, IMM[0].zzzz 98: MUL TEMP[8].x, TEMP[11].xxxx, TEMP[11].xxxx 99: MUL TEMP[9].x, TEMP[11].xxxx, TEMP[11].xxxx 100: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[11].xxxx 101: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[9].xxxx 102: MAD TEMP[3].x, TEMP[3].xxxx, TEMP[8].xxxx, IMM[0].xxxx 103: ADD TEMP[7].x, TEMP[7].xxxx, IMM[0].zzzz 104: MUL TEMP[8].x, TEMP[12].xxxx, TEMP[12].xxxx 105: MUL TEMP[9].x, TEMP[12].xxxx, TEMP[12].xxxx 106: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[12].xxxx 107: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[9].xxxx 108: MAD TEMP[7].x, TEMP[7].xxxx, TEMP[8].xxxx, IMM[0].xxxx 109: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[7].xxxx 110: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[4].xxxx 111: MUL TEMP[3].xyz, TEMP[5].xyzz, TEMP[3].xxxx 112: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xyzz 113: MAD TEMP[0].xyz, TEMP[6].xyzz, TEMP[2].xyzz, TEMP[1].xyzz 114: MAD TEMP[1].x, IN[4].xxxx, CONST[0].zzzz, CONST[0].wwww 115: MOV_SAT TEMP[1].x, TEMP[1].xxxx 116: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xxxx 117: MOV TEMP[0].xyz, TEMP[0].xyzx 118: MOV TEMP[0].w, IMM[0].xxxx 119: MOV OUT[0], TEMP[0] 120: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %44 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %45 = load <32 x i8>, <32 x i8> addrspace(2)* %44, align 32, !tbaa !0 %46 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %47 = load <16 x i8>, <16 x i8> addrspace(2)* %46, align 16, !tbaa !0 %48 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %49 = bitcast <8 x i32> addrspace(2)* %48 to <32 x i8> addrspace(2)* %50 = load <32 x i8>, <32 x i8> addrspace(2)* %49, align 32, !tbaa !0 %51 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %52 = bitcast <4 x i32> addrspace(2)* %51 to <16 x i8> addrspace(2)* %53 = load <16 x i8>, <16 x i8> addrspace(2)* %52, align 16, !tbaa !0 %54 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %55 = bitcast <8 x i32> addrspace(2)* %54 to <32 x i8> addrspace(2)* %56 = load <32 x i8>, <32 x i8> addrspace(2)* %55, align 32, !tbaa !0 %57 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %58 = bitcast <4 x i32> addrspace(2)* %57 to <16 x i8> addrspace(2)* %59 = load <16 x i8>, <16 x i8> addrspace(2)* %58, align 16, !tbaa !0 %60 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %61 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %62 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %63 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %64 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %65 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %66 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %67 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %68 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %69 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %70 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %71 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %72 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %73 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7) %74 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %75 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %76 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %77 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7) %78 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7) %79 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %80 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7) %81 = bitcast float %60 to i32 %82 = bitcast float %61 to i32 %83 = insertelement <2 x i32> undef, i32 %81, i32 0 %84 = insertelement <2 x i32> %83, i32 %82, i32 1 %85 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %84, <32 x i8> %45, <16 x i8> %47, i32 2) %86 = extractelement <4 x float> %85, i32 0 %87 = extractelement <4 x float> %85, i32 1 %88 = extractelement <4 x float> %85, i32 2 %89 = extractelement <4 x float> %85, i32 3 %90 = fmul float %89, %39 %91 = fcmp olt float %90, %40 %92 = select i1 %91, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %92) %93 = bitcast float %60 to i32 %94 = bitcast float %61 to i32 %95 = insertelement <2 x i32> undef, i32 %93, i32 0 %96 = insertelement <2 x i32> %95, i32 %94, i32 1 %97 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %96, <32 x i8> %50, <16 x i8> %53, i32 2) %98 = extractelement <4 x float> %97, i32 1 %99 = extractelement <4 x float> %97, i32 3 %100 = fmul float %99, 2.000000e+00 %101 = fadd float %100, -1.000000e+00 %102 = fmul float %98, 2.000000e+00 %103 = fadd float %102, -1.000000e+00 %104 = fmul float %101, %41 %105 = fmul float %103, %41 %106 = fmul float %104, %104 %107 = fmul float %105, %105 %108 = fadd float %106, %107 %109 = call float @llvm.AMDIL.clamp.(float %108, float 0.000000e+00, float 1.000000e+00) %110 = fsub float 1.000000e+00, %109 %111 = call float @llvm.sqrt.f32(float %110) %112 = fmul float %104, %62 %113 = fmul float %105, %66 %114 = fadd float %113, %112 %115 = fmul float %111, %70 %116 = fadd float %114, %115 %117 = fmul float %104, %63 %118 = fmul float %105, %67 %119 = fadd float %118, %117 %120 = fmul float %111, %71 %121 = fadd float %119, %120 %122 = fmul float %104, %64 %123 = fmul float %105, %68 %124 = fadd float %123, %122 %125 = fmul float %111, %72 %126 = fadd float %124, %125 %127 = fmul float %116, %116 %128 = fmul float %121, %121 %129 = fadd float %128, %127 %130 = fmul float %126, %126 %131 = fadd float %129, %130 %132 = call float @llvm.AMDGPU.rsq.clamped.f32(float %131) %133 = fmul float %116, %132 %134 = fmul float %121, %132 %135 = fmul float %126, %132 %136 = fmul float %36, %86 %137 = fmul float %37, %87 %138 = fmul float %38, %88 %139 = call float @llvm.AMDGPU.lrp(float %42, float %136, float %26) %140 = call float @llvm.AMDGPU.lrp(float %42, float %137, float %27) %141 = call float @llvm.AMDGPU.lrp(float %42, float %138, float %28) %142 = fmul float %65, %65 %143 = fmul float %69, %69 %144 = fadd float %143, %142 %145 = fmul float %73, %73 %146 = fadd float %144, %145 %147 = call float @llvm.AMDGPU.rsq.clamped.f32(float %146) %148 = fmul float %65, %147 %149 = fmul float %69, %147 %150 = fmul float %73, %147 %151 = fmul float %133, %148 %152 = fmul float %134, %149 %153 = fadd float %152, %151 %154 = fmul float %135, %150 %155 = fadd float %153, %154 %156 = call float @llvm.maxnum.f32(float %155, float 0.000000e+00) %157 = fmul float %78, %78 %158 = fmul float %79, %79 %159 = fadd float %158, %157 %160 = fmul float %80, %80 %161 = fadd float %159, %160 %162 = bitcast float %161 to i32 %163 = bitcast float %161 to i32 %164 = insertelement <2 x i32> undef, i32 %162, i32 0 %165 = insertelement <2 x i32> %164, i32 %163, i32 1 %166 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %165, <32 x i8> %56, <16 x i8> %59, i32 2) %167 = extractelement <4 x float> %166, i32 3 %168 = fmul float %30, %167 %169 = fmul float %31, %167 %170 = fmul float %32, %167 %171 = fmul float %75, %75 %172 = fmul float %76, %76 %173 = fadd float %172, %171 %174 = fmul float %77, %77 %175 = fadd float %173, %174 %176 = call float @llvm.AMDGPU.rsq.clamped.f32(float %175) %177 = fmul float %75, %176 %178 = fmul float %76, %176 %179 = fmul float %77, %176 %180 = fsub float 1.000000e+00, %43 %181 = fsub float %148, %177 %182 = fsub float %149, %178 %183 = fsub float %150, %179 %184 = fmul float %181, %181 %185 = fmul float %182, %182 %186 = fadd float %185, %184 %187 = fmul float %183, %183 %188 = fadd float %186, %187 %189 = call float @llvm.AMDGPU.rsq.clamped.f32(float %188) %190 = fmul float %181, %189 %191 = fmul float %182, %189 %192 = fmul float %183, %189 %193 = fmul float %177, %133 %194 = fsub float -0.000000e+00, %193 %195 = fmul float %178, %134 %196 = fsub float %194, %195 %197 = fmul float %179, %135 %198 = fsub float %196, %197 %199 = call float @llvm.maxnum.f32(float %198, float 0.000000e+00) %200 = fmul float %148, %190 %201 = fmul float %149, %191 %202 = fadd float %201, %200 %203 = fmul float %150, %192 %204 = fadd float %202, %203 %205 = call float @llvm.maxnum.f32(float %204, float 0.000000e+00) %206 = fmul float %180, %180 %207 = fmul float %206, %35 %208 = fsub float 1.000000e+00, %180 %209 = fmul float %208, 0x3FEEF9DB20000000 %210 = fadd float %209, 0x3F9EB851E0000000 %211 = call float @llvm.log2.f32(float %210) %212 = fdiv float 1.000000e+00, %211 %213 = fmul float %212, 1.000000e+01 %214 = fmul float %213, %213 %215 = fsub float 1.000000e+00, %156 %216 = fsub float 1.000000e+00, %199 %217 = fmul float %205, 2.000000e+00 %218 = fmul float %205, %180 %219 = fmul float %217, %218 %220 = fadd float %219, 5.000000e-01 %221 = fsub float 1.000000e+00, %205 %222 = call float @llvm.AMDGPU.lrp(float %156, float 1.000000e+00, float %207) %223 = call float @llvm.AMDGPU.lrp(float %199, float 1.000000e+00, float %207) %224 = fmul float %222, %223 %225 = fadd float %224, 0x3F1A36E2E0000000 %226 = fdiv float 1.000000e+00, %225 %227 = fmul float %133, %190 %228 = fmul float %134, %191 %229 = fadd float %228, %227 %230 = fmul float %135, %192 %231 = fadd float %229, %230 %232 = call float @llvm.maxnum.f32(float %231, float 0.000000e+00) %233 = call float @llvm.pow.f32(float %232, float %214) %234 = fadd float %214, 1.000000e+00 %235 = fmul float %234, %34 %236 = fmul float %233, %235 %237 = fmul float %226, %236 %238 = fmul float %237, %156 %239 = fmul float %238, %33 %240 = call float @llvm.maxnum.f32(float %239, float 0.000000e+00) %241 = fmul float %240, %168 %242 = fmul float %240, %169 %243 = fmul float %240, %170 %244 = fsub float 1.000000e+00, %139 %245 = fsub float 1.000000e+00, %140 %246 = fsub float 1.000000e+00, %141 %247 = fmul float %221, %221 %248 = fmul float %221, %221 %249 = fmul float %248, %221 %250 = fmul float %247, %249 %251 = fmul float %244, %250 %252 = fadd float %251, %139 %253 = fmul float %245, %250 %254 = fadd float %253, %140 %255 = fmul float %246, %250 %256 = fadd float %255, %141 %257 = fmul float %42, %29 %258 = fsub float %29, %257 %259 = fmul float %136, %258 %260 = fmul float %137, %258 %261 = fmul float %138, %258 %262 = fadd float %220, -1.000000e+00 %263 = fmul float %215, %215 %264 = fmul float %215, %215 %265 = fmul float %264, %215 %266 = fmul float %263, %265 %267 = fmul float %262, %266 %268 = fadd float %267, 1.000000e+00 %269 = fadd float %220, -1.000000e+00 %270 = fmul float %216, %216 %271 = fmul float %216, %216 %272 = fmul float %271, %216 %273 = fmul float %270, %272 %274 = fmul float %269, %273 %275 = fadd float %274, 1.000000e+00 %276 = fmul float %268, %275 %277 = fmul float %276, %156 %278 = fmul float %168, %277 %279 = fmul float %169, %277 %280 = fmul float %170, %277 %281 = fmul float %259, %278 %282 = fmul float %260, %279 %283 = fmul float %261, %280 %284 = fmul float %241, %252 %285 = fadd float %284, %281 %286 = fmul float %242, %254 %287 = fadd float %286, %282 %288 = fmul float %243, %256 %289 = fadd float %288, %283 %290 = fmul float %74, %24 %291 = fadd float %290, %25 %292 = call float @llvm.AMDIL.clamp.(float %291, float 0.000000e+00, float 1.000000e+00) %293 = fmul float %285, %292 %294 = fmul float %287, %292 %295 = fmul float %289, %292 %296 = call i32 @llvm.SI.packf16(float %293, float %294) %297 = bitcast i32 %296 to float %298 = call i32 @llvm.SI.packf16(float %295, float 1.000000e+00) %299 = bitcast i32 %298 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %297, float %299, float %297, float %299) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400 v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401 v_interp_p1_f32 v5, v0, 1, 1, [m0] ; C8140500 v_interp_p2_f32 v5, [v5], v1, 1, 1, [m0] ; C8150501 v_interp_p1_f32 v6, v0, 2, 1, [m0] ; C8180600 v_interp_p2_f32 v6, [v6], v1, 2, 1, [m0] ; C8190601 v_interp_p1_f32 v7, v0, 3, 1, [m0] ; C81C0700 v_interp_p2_f32 v7, [v7], v1, 3, 1, [m0] ; C81D0701 v_interp_p1_f32 v8, v0, 0, 2, [m0] ; C8200800 v_interp_p2_f32 v8, [v8], v1, 0, 2, [m0] ; C8210801 v_interp_p1_f32 v9, v0, 1, 2, [m0] ; C8240900 v_interp_p2_f32 v9, [v9], v1, 1, 2, [m0] ; C8250901 v_interp_p1_f32 v10, v0, 2, 2, [m0] ; C8280A00 v_interp_p2_f32 v10, [v10], v1, 2, 2, [m0] ; C8290A01 v_interp_p1_f32 v11, v0, 3, 2, [m0] ; C82C0B00 v_interp_p2_f32 v11, [v11], v1, 3, 2, [m0] ; C82D0B01 v_interp_p1_f32 v12, v0, 0, 3, [m0] ; C8300C00 v_interp_p2_f32 v12, [v12], v1, 0, 3, [m0] ; C8310C01 v_interp_p1_f32 v13, v0, 1, 3, [m0] ; C8340D00 v_interp_p2_f32 v13, [v13], v1, 1, 3, [m0] ; C8350D01 v_interp_p1_f32 v14, v0, 2, 3, [m0] ; C8380E00 v_interp_p2_f32 v14, [v14], v1, 2, 3, [m0] ; C8390E01 v_interp_p1_f32 v15, v0, 3, 3, [m0] ; C83C0F00 v_interp_p2_f32 v15, [v15], v1, 3, 3, [m0] ; C83D0F01 v_interp_p1_f32 v16, v0, 0, 4, [m0] ; C8401000 v_interp_p2_f32 v16, [v16], v1, 0, 4, [m0] ; C8411001 v_interp_p1_f32 v17, v0, 1, 4, [m0] ; C8441100 v_interp_p2_f32 v17, [v17], v1, 1, 4, [m0] ; C8451101 v_interp_p1_f32 v18, v0, 2, 4, [m0] ; C8481200 v_interp_p2_f32 v18, [v18], v1, 2, 4, [m0] ; C8491201 v_interp_p1_f32 v19, v0, 3, 4, [m0] ; C84C1300 s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300 v_interp_p2_f32 v19, [v19], v1, 3, 4, [m0] ; C84D1301 v_interp_p1_f32 v20, v0, 0, 5, [m0] ; C8501400 v_interp_p2_f32 v20, [v20], v1, 0, 5, [m0] ; C8511401 v_interp_p1_f32 v21, v0, 1, 5, [m0] ; C8541500 v_interp_p2_f32 v21, [v21], v1, 1, 5, [m0] ; C8551501 v_interp_p1_f32 v0, v0, 2, 5, [m0] ; C8001600 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[36:43], s[6:7], 0x0 ; C0D20700 v_interp_p2_f32 v0, [v0], v1, 2, 5, [m0] ; C8011601 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s44, s[8:11], 0x10 ; C2160910 s_buffer_load_dword s45, s[8:11], 0x11 ; C2168911 s_buffer_load_dword s46, s[8:11], 0x13 ; C2170913 s_buffer_load_dword s47, s[8:11], 0x14 ; C2178914 s_load_dwordx4 s[24:27], s[4:5], 0x4 ; C08C0504 s_load_dwordx4 s[12:15], s[4:5], 0x8 ; C0860508 s_load_dwordx8 s[28:35], s[6:7], 0x8 ; C0CE0708 s_load_dwordx8 s[16:23], s[6:7], 0x10 ; C0C80710 image_sample v[22:25], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[36:43], s[0:3] ; F0800F00 00091602 s_buffer_load_dword s3, s[8:11], 0x12 ; C2018912 s_buffer_load_dword s2, s[8:11], 0x20 ; C2010920 s_buffer_load_dword s0, s[8:11], 0x24 ; C2000924 s_buffer_load_dword s1, s[8:11], 0x28 ; C2008928 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v1, s46, v25 ; 1002322E v_cmp_gt_f32_e32 vcc, s47, v1 ; 7C08022F v_cndmask_b32_e64 v1, 0, -1.0, vcc ; D2000001 01A9E680 v_cmpx_le_f32_e32 vcc, 0, v1 ; 7C260280 v_mul_f32_e32 v1, s44, v22 ; 10022C2C v_mul_f32_e32 v22, s45, v23 ; 102C2E2D v_mul_f32_e32 v23, s3, v24 ; 102E3003 s_buffer_load_dword s3, s[8:11], 0x2 ; C2018902 s_buffer_load_dword s4, s[8:11], 0x3 ; C2020903 v_mul_f32_e32 v24, v20, v20 ; 10302914 v_mac_f32_e32 v24, v21, v21 ; 3E302B15 v_mac_f32_e32 v24, v0, v0 ; 3E300100 image_sample v[2:3], 10, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[28:35], s[24:27] ; F0800A00 00C70202 v_mov_b32_e32 v25, v24 ; 7E320318 image_sample v0, 8, 0, 0, 0, 0, 0, 0, 0, v[24:25], s[16:23], s[12:15] ; F0800800 00640018 s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 v_mad_f32 v3, 2.0, v3, -1.0 ; D2820003 03CE06F4 v_mad_f32 v2, 2.0, v2, -1.0 ; D2820002 03CE04F4 s_buffer_load_dword s5, s[8:11], 0x4 ; C2028904 s_buffer_load_dword s6, s[8:11], 0x5 ; C2030905 s_buffer_load_dword s7, s[8:11], 0x6 ; C2038906 v_mov_b32_e32 v20, s4 ; 7E280204 v_mac_f32_e32 v20, s3, v16 ; 3E282003 s_buffer_load_dword s3, s[8:11], 0xd ; C201890D s_buffer_load_dword s4, s[8:11], 0xf ; C202090F s_buffer_load_dword s12, s[8:11], 0x7 ; C2060907 s_buffer_load_dword s13, s[8:11], 0x8 ; C2068908 s_buffer_load_dword s14, s[8:11], 0x9 ; C2070909 s_buffer_load_dword s15, s[8:11], 0xa ; C207890A s_buffer_load_dword s8, s[8:11], 0xc ; C204090C v_mul_f32_e32 v3, s2, v3 ; 10060602 v_mul_f32_e32 v2, s2, v2 ; 10040402 v_mul_f32_e32 v4, v4, v3 ; 10080704 v_mac_f32_e32 v4, v8, v2 ; 3E080508 v_mul_f32_e32 v5, v5, v3 ; 100A0705 v_mac_f32_e32 v5, v9, v2 ; 3E0A0509 v_mul_f32_e32 v6, v6, v3 ; 100C0706 v_mac_f32_e32 v6, v10, v2 ; 3E0C050A v_mul_f32_e32 v2, v2, v2 ; 10040502 v_mac_f32_e32 v2, v3, v3 ; 3E040703 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_sub_f32_e32 v2, 1.0, v2 ; 080404F2 v_sqrt_f32_e32 v2, v2 ; 7E046702 v_mac_f32_e32 v4, v12, v2 ; 3E08050C v_mac_f32_e32 v5, v13, v2 ; 3E0A050D v_mac_f32_e32 v6, v14, v2 ; 3E0C050E v_mul_f32_e32 v2, v4, v4 ; 10040904 v_mac_f32_e32 v2, v5, v5 ; 3E040B05 v_mac_f32_e32 v2, v6, v6 ; 3E040D06 v_rsq_clamp_f32_e32 v2, v2 ; 7E045902 v_mul_f32_e32 v3, v17, v17 ; 10062311 v_mac_f32_e32 v3, v18, v18 ; 3E062512 v_mac_f32_e32 v3, v19, v19 ; 3E062713 v_rsq_clamp_f32_e32 v3, v3 ; 7E065903 v_mul_f32_e32 v8, v7, v7 ; 10100F07 v_mac_f32_e32 v8, v11, v11 ; 3E10170B v_mac_f32_e32 v8, v15, v15 ; 3E101F0F v_rsq_clamp_f32_e32 v8, v8 ; 7E105908 v_mul_f32_e32 v9, v3, v17 ; 10122303 v_mul_f32_e32 v10, v3, v18 ; 10142503 v_mul_f32_e32 v3, v3, v19 ; 10062703 v_mul_f32_e32 v4, v2, v4 ; 10080902 v_mul_f32_e32 v5, v2, v5 ; 100A0B02 v_mul_f32_e32 v12, v8, v7 ; 10180F08 v_mad_f32 v7, v7, v8, -v9 ; D2820007 84261107 v_mul_f32_e32 v9, v4, v9 ; 10121304 v_mad_f32 v9, -v10, v5, -v9 ; D2820009 A4260B0A v_mad_f32 v10, v11, v8, -v10 ; D282000A 842A110B v_mul_f32_e32 v13, v7, v7 ; 101A0F07 v_mac_f32_e32 v13, v10, v10 ; 3E1A150A v_mad_f32 v14, v15, v8, -v3 ; D282000E 840E110F v_mac_f32_e32 v13, v14, v14 ; 3E1A1D0E v_rsq_clamp_f32_e32 v13, v13 ; 7E1A590D v_mul_f32_e32 v11, v8, v11 ; 10161708 v_mul_f32_e32 v16, v12, v4 ; 1020090C v_mac_f32_e32 v16, v11, v5 ; 3E200B0B v_mul_f32_e32 v7, v13, v7 ; 100E0F0D v_mul_f32_e32 v10, v13, v10 ; 1014150D v_mul_f32_e32 v12, v7, v12 ; 10181907 v_mac_f32_e32 v12, v10, v11 ; 3E18170A v_mul_f32_e32 v4, v7, v4 ; 10080907 v_mac_f32_e32 v4, v10, v5 ; 3E080B0A v_mul_f32_e32 v2, v2, v6 ; 10040D02 v_mul_f32_e32 v5, v8, v15 ; 100A1F08 v_mul_f32_e32 v6, v13, v14 ; 100C1D0D v_mad_f32 v3, -v3, v2, v9 ; D2820003 24260503 v_mac_f32_e32 v16, v5, v2 ; 3E200505 v_mac_f32_e32 v12, v6, v5 ; 3E180B06 v_mac_f32_e32 v4, v6, v2 ; 3E080506 v_sub_f32_e64 v2, 1.0, s0 ; D2080002 000000F2 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v5, s5, v2 ; 100A0405 v_mad_f32 v6, -v2, s5, 1.0 ; D2820006 23C80B02 v_mul_f32_e32 v7, s6, v2 ; 100E0406 v_mad_f32 v8, -v2, s6, 1.0 ; D2820008 23C80D02 v_mul_f32_e32 v9, s7, v2 ; 10120407 v_mad_f32 v2, -v2, s7, 1.0 ; D2820002 23C80F02 v_sub_f32_e64 v10, 1.0, s1 ; D208000A 000002F2 v_sub_f32_e32 v11, 1.0, v10 ; 081614F2 v_mov_b32_e32 v13, 0x3cf5c28f ; 7E1A02FF 3CF5C28F v_madmk_f32_e32 v11, v11, v13, 0x3f77ced9 ; 40161B0B 3F77CED9 v_max_f32_e32 v12, 0, v12 ; 20181880 v_sub_f32_e32 v13, 1.0, v12 ; 081A18F2 v_mul_f32_e32 v14, v13, v13 ; 101C1B0D v_mul_f32_e32 v13, v13, v14 ; 101A1D0D v_mul_f32_e32 v13, v13, v14 ; 101A1D0D v_mac_f32_e32 v5, s0, v1 ; 3E0A0200 v_mad_f32 v6, -s0, v1, v6 ; D2820006 241A0200 v_mac_f32_e32 v5, v13, v6 ; 3E0A0D0D v_mac_f32_e32 v7, s0, v22 ; 3E0E2C00 v_mad_f32 v6, -s0, v22, v8 ; D2820006 24222C00 v_mac_f32_e32 v7, v13, v6 ; 3E0E0D0D v_mac_f32_e32 v9, s0, v23 ; 3E122E00 v_log_f32_e32 v6, v11 ; 7E0C4F0B v_mad_f32 v2, -s0, v23, v2 ; D2820002 240A2E00 v_mac_f32_e32 v9, v13, v2 ; 3E12050D v_mul_f32_e32 v2, s13, v0 ; 1004000D v_mul_f32_e32 v8, s14, v0 ; 1010000E v_rcp_f32_e32 v6, v6 ; 7E0C5506 v_mul_f32_e32 v0, s15, v0 ; 1000000F v_mul_f32_e32 v11, v10, v10 ; 1016150A v_mul_f32_e32 v11, s4, v11 ; 10161604 v_mul_f32_e32 v6, 0x41200000, v6 ; 100C0CFF 41200000 v_mad_f32 v13, v6, v6, 1.0 ; D282000D 03CA0D06 v_mul_f32_e32 v13, s3, v13 ; 101A1A03 v_max_f32_e32 v3, 0, v3 ; 20060680 v_sub_f32_e32 v14, 1.0, v3 ; 081C06F2 v_mul_f32_e32 v15, v11, v14 ; 101E1D0B v_mac_f32_e32 v15, 1.0, v3 ; 3E1E06F2 v_max_f32_e32 v3, 0, v16 ; 20062080 v_sub_f32_e32 v16, 1.0, v3 ; 082006F2 v_mul_f32_e32 v11, v11, v16 ; 1016210B v_mac_f32_e32 v11, 1.0, v3 ; 3E1606F2 v_max_f32_e32 v4, 0, v4 ; 20080880 v_log_f32_e32 v4, v4 ; 7E084F04 v_madak_f32_e32 v11, v11, v15, 0x38d1b717 ; 42161F0B 38D1B717 v_mul_f32_e32 v6, v6, v6 ; 100C0D06 v_rcp_f32_e32 v11, v11 ; 7E16550B v_mul_legacy_f32_e32 v4, v6, v4 ; 0E080906 v_exp_f32_e32 v4, v4 ; 7E084B04 v_mul_f32_e32 v4, v13, v4 ; 1008090D v_mul_f32_e32 v4, v4, v11 ; 10081704 v_mul_f32_e32 v4, v3, v4 ; 10080903 v_mul_f32_e32 v4, s8, v4 ; 10080808 v_mov_b32_e32 v6, s0 ; 7E0C0200 v_mad_f32 v6, -v6, s12, s12 ; D2820006 20301906 v_mul_f32_e32 v10, v10, v12 ; 1014190A v_add_f32_e32 v11, v12, v12 ; 0616190C v_mad_f32 v10, v11, v10, 0.5 ; D282000A 03C2150B v_mul_f32_e32 v11, v16, v16 ; 10162110 v_mul_f32_e32 v12, v16, v11 ; 10181710 v_mul_f32_e32 v11, v12, v11 ; 1016170C v_mul_f32_e32 v12, v14, v14 ; 10181D0E v_mul_f32_e32 v13, v14, v12 ; 101A190E v_mul_f32_e32 v12, v13, v12 ; 1018190D v_add_f32_e32 v10, -1.0, v10 ; 061414F3 v_mad_f32 v11, v10, v11, 1.0 ; D282000B 03CA170A v_mad_f32 v10, v10, v12, 1.0 ; D282000A 03CA190A v_mul_f32_e32 v10, v10, v11 ; 1014170A v_mul_f32_e32 v1, v6, v1 ; 10020306 v_mul_f32_e32 v3, v3, v10 ; 10061503 v_mul_f32_e32 v10, v3, v2 ; 10140503 v_mul_f32_e32 v1, v10, v1 ; 1002030A v_max_f32_e32 v4, 0, v4 ; 20080880 v_mul_f32_e32 v2, v2, v4 ; 10040902 v_mac_f32_e32 v1, v5, v2 ; 3E020505 v_mul_f32_e32 v2, v6, v22 ; 10042D06 v_mul_f32_e32 v5, v3, v8 ; 100A1103 v_mul_f32_e32 v2, v5, v2 ; 10040505 v_mul_f32_e32 v5, v8, v4 ; 100A0908 v_mac_f32_e32 v2, v7, v5 ; 3E040B07 v_mul_f32_e32 v5, v6, v23 ; 100A2F06 v_mul_f32_e32 v4, v0, v4 ; 10080900 v_mul_f32_e32 v0, v3, v0 ; 10000103 v_mul_f32_e32 v0, v0, v5 ; 10000B00 v_mac_f32_e32 v0, v9, v4 ; 3E000909 v_add_f32_e64 v3, 0, v20 clamp ; D2060803 00022880 v_mul_f32_e32 v1, v3, v1 ; 10020303 v_mul_f32_e32 v2, v3, v2 ; 10040503 v_mul_f32_e32 v0, v3, v0 ; 10000103 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_cvt_pkrtz_f16_f32_e64 v0, v0, 1.0 ; D25E0000 0001E500 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 56 VGPRS: 28 Code Size: 1112 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL CONST[0..12] DCL CONST[16..27] DCL TEMP[0..14], LOCAL IMM[0] FLT32 { 1.0000, 0.5000, 0.0000, 0.4990} IMM[1] FLT32 { 1.4990, 2.0000, 0.8000, 1.5000} IMM[2] FLT32 { -0.5000, -1.0000, 0.2000, 8.0000} IMM[3] FLT32 { 0.1000, 0.0000, -4.0000, 0.0700} IMM[4] FLT32 { -0.0150, 100.0000, 180.0000, 0.7000} 0: MUL TEMP[0].x, IN[3].xxxx, CONST[16].xxxx 1: FLR TEMP[0].x, TEMP[0].xxxx 2: ADD TEMP[1].xy, IN[0].xyyy, IMM[0].xxxx 3: MUL TEMP[1].xy, TEMP[1].xyyy, IMM[0].yyyy 4: MOV TEMP[1].z, TEMP[0].xxxx 5: RCP TEMP[2].x, CONST[16].xxxx 6: ADD TEMP[3].x, TEMP[0].xxxx, IMM[0].wwww 7: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[2].xxxx 8: RCP TEMP[4].x, CONST[16].yyyy 9: MUL TEMP[5].x, IMM[0].wwww, TEMP[4].xxxx 10: MOV TEMP[3].y, TEMP[5].xxxx 11: MOV TEMP[3].xy, TEMP[3].xyyy 12: MOV TEMP[3].w, IMM[0].zzzz 13: TXL TEMP[3], TEMP[3], SAMP[0], 2D 14: ADD TEMP[5].x, TEMP[0].xxxx, IMM[0].wwww 15: MUL TEMP[0].x, TEMP[5].xxxx, TEMP[2].xxxx 16: MUL TEMP[2].x, IMM[1].xxxx, TEMP[4].xxxx 17: MOV TEMP[0].y, TEMP[2].xxxx 18: MOV TEMP[0].xy, TEMP[0].xyyy 19: MOV TEMP[0].w, IMM[0].zzzz 20: TXL TEMP[0].xy, TEMP[0], SAMP[0], 2D 21: MUL TEMP[2].xy, TEMP[3].xyyy, IMM[1].yyyy 22: MUL TEMP[3].xy, TEMP[3].zwww, IMM[1].yyyy 23: MOV TEMP[4].xy, IN[1].xzzz 24: MOV TEMP[4].w, IMM[0].zzzz 25: TXL TEMP[4].xy, TEMP[4], SAMP[1], 2D 26: LRP TEMP[3].xy, TEMP[4].xyyy, TEMP[3].xyyy, TEMP[2].xyyy 27: MOV TEMP[2].x, IN[2].xxxx 28: MUL TEMP[4].x, IN[2].xxxx, IN[2].xxxx 29: MAD TEMP[4].x, IN[2].yyyy, IN[2].yyyy, TEMP[4].xxxx 30: SQRT TEMP[4].x, TEMP[4].xxxx 31: ADD TEMP[4].x, IMM[0].xxxx, -TEMP[4].xxxx 32: MOV TEMP[2].y, TEMP[4].xxxx 33: MOV TEMP[2].z, IN[2].yyyy 34: MOV TEMP[4].y, TEMP[4].xxxx 35: MOV TEMP[5].x, CONST[9].zzzz 36: MOV TEMP[5].y, CONST[10].zzzz 37: MOV TEMP[5].z, CONST[11].zzzz 38: MOV TEMP[5].xyz, -TEMP[5].xyzx 39: MUL TEMP[6].xyz, IMM[0].zzxx, TEMP[5].yzxx 40: MAD TEMP[6].xyz, IMM[0].xzzz, TEMP[5].zxyy, -TEMP[6].xyzz 41: DP3 TEMP[7].x, TEMP[6].xyzz, TEMP[6].xyzz 42: RSQ TEMP[7].x, TEMP[7].xxxx 43: MUL TEMP[6].xyz, TEMP[6].xyzz, TEMP[7].xxxx 44: MUL TEMP[7].xyz, TEMP[5].zxyy, TEMP[6].yzxx 45: MAD TEMP[7].xyz, TEMP[5].yzxx, TEMP[6].zxyy, -TEMP[7].xyzz 46: DP3 TEMP[8].x, TEMP[7].xyzz, TEMP[7].xyzz 47: RSQ TEMP[8].x, TEMP[8].xxxx 48: MUL TEMP[7].xz, TEMP[7].xyzz, TEMP[8].xxxx 49: MOV TEMP[7].xz, TEMP[7].xxzx 50: MOV TEMP[7].y, IMM[0].xxxx 51: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[2].xyzz 52: ABS TEMP[5].x, TEMP[5].xxxx 53: MAD TEMP[5].x, IMM[0].yyyy, TEMP[5].xxxx, IMM[0].yyyy 54: LRP TEMP[2].xyz, TEMP[5].xxxx, TEMP[7].xyzz, TEMP[2].xyzz 55: DP3 TEMP[5].x, TEMP[2].xyzz, TEMP[2].xyzz 56: RSQ TEMP[5].x, TEMP[5].xxxx 57: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[5].xxxx 58: MUL TEMP[5], CONST[19].xxyy, CONST[17].xxyy 59: MUL TEMP[5], TEMP[5], CONST[21] 60: MUL TEMP[7], CONST[19].zzww, CONST[17].zzww 61: MUL TEMP[7], TEMP[7], CONST[22] 62: DP2 TEMP[8].x, CONST[21].xyyy, IN[1].xzzz 63: DP2 TEMP[9].x, CONST[21].zwww, IN[1].xzzz 64: MOV TEMP[8].y, TEMP[9].xxxx 65: DP2 TEMP[9].x, CONST[22].xyyy, IN[1].xzzz 66: MOV TEMP[8].z, TEMP[9].xxxx 67: DP2 TEMP[9].x, CONST[22].zwww, IN[1].xzzz 68: MOV TEMP[8].w, TEMP[9].xxxx 69: MUL TEMP[8], CONST[18], TEMP[8] 70: MUL TEMP[9], CONST[0].yyyy, CONST[20] 71: ADD TEMP[8], TEMP[8], TEMP[9] 72: COS TEMP[10].x, TEMP[8].xxxx 73: COS TEMP[10].y, TEMP[8].yyyy 74: COS TEMP[10].z, TEMP[8].zzzz 75: COS TEMP[10].w, TEMP[8].wwww 76: MOV TEMP[8].xy, TEMP[5].xzxx 77: MOV TEMP[8].zw, TEMP[7].zzxz 78: DP4 TEMP[8].x, TEMP[10], TEMP[8] 79: MOV TEMP[5].xy, TEMP[5].ywyy 80: MOV TEMP[5].zw, TEMP[7].wwyw 81: DP4 TEMP[5].x, TEMP[10], TEMP[5] 82: MOV TEMP[8].z, TEMP[5].xxxx 83: ADD TEMP[5].xy, IN[1].xzzz, TEMP[8].xzzz 84: MOV TEMP[7].y, IMM[1].yyyy 85: MUL TEMP[8], CONST[18].xxyy, CONST[17].xxyy 86: MUL TEMP[8], TEMP[8], CONST[21] 87: MUL TEMP[10], CONST[18].zzww, CONST[17].zzww 88: MUL TEMP[10], TEMP[10], CONST[22] 89: DP2 TEMP[11].x, CONST[21].xyyy, TEMP[5].xyyy 90: DP2 TEMP[12].x, CONST[21].zwww, TEMP[5].xyyy 91: MOV TEMP[11].y, TEMP[12].xxxx 92: DP2 TEMP[12].x, CONST[22].xyyy, TEMP[5].xyyy 93: MOV TEMP[11].z, TEMP[12].xxxx 94: DP2 TEMP[5].x, CONST[22].zwww, TEMP[5].xyyy 95: MOV TEMP[11].w, TEMP[5].xxxx 96: MAD TEMP[5], CONST[18], TEMP[11], TEMP[9] 97: COS TEMP[9].x, TEMP[5].xxxx 98: COS TEMP[9].y, TEMP[5].yyyy 99: COS TEMP[9].z, TEMP[5].zzzz 100: COS TEMP[9].w, TEMP[5].wwww 101: MOV TEMP[5].xy, TEMP[8].xzxx 102: MOV TEMP[5].zw, TEMP[10].zzxz 103: DP4 TEMP[5].x, TEMP[9], TEMP[5] 104: MOV TEMP[7].x, -TEMP[5].xxxx 105: MOV TEMP[5].xy, TEMP[8].ywyy 106: MOV TEMP[5].zw, TEMP[10].wwyw 107: DP4 TEMP[5].x, TEMP[9], TEMP[5] 108: MOV TEMP[7].z, -TEMP[5].xxxx 109: DP3 TEMP[5].x, TEMP[7].xyzz, TEMP[7].xyzz 110: RSQ TEMP[5].x, TEMP[5].xxxx 111: MUL TEMP[5].xyz, TEMP[7].xyzz, TEMP[5].xxxx 112: MUL TEMP[7].x, TEMP[0].xxxx, IMM[0].yyyy 113: MUL TEMP[5].xz, TEMP[5].xyzz, IMM[1].yyyy 114: MOV TEMP[8].xz, TEMP[5].xxzx 115: MOV TEMP[8].y, IMM[0].zzzz 116: ADD TEMP[10].xy, IN[1].xzzz, -CONST[23].xyyy 117: MAD TEMP[10].xy, CONST[23].zwww, IMM[0].yyyy, TEMP[10].xyyy 118: RCP TEMP[11].x, CONST[23].zzzz 119: RCP TEMP[11].y, CONST[23].wwww 120: MUL TEMP[9].xy, TEMP[10].xyyy, TEMP[11].xyyy 121: MOV TEMP[10].w, IMM[0].zzzz 122: MOV TEMP[10].xyz, IN[1].xyzx 123: MOV TEMP[11].w, IMM[0].xxxx 124: ADD TEMP[12].x, IN[0].yyyy, IMM[1].zzzz 125: MUL TEMP[13].x, TEMP[12].xxxx, TEMP[3].yyyy 126: MUL TEMP[14].x, IN[0].xxxx, TEMP[3].xxxx 127: MUL TEMP[6].xyz, TEMP[14].xxxx, TEMP[6].xyzz 128: MAD TEMP[11].xyz, TEMP[13].xxxx, TEMP[2].xyzz, TEMP[6].xyzz 129: MOV TEMP[2].y, IMM[0].zzzz 130: MOV TEMP[6].xy, TEMP[9].xyyy 131: MOV TEMP[6].w, IMM[0].zzzz 132: TXL TEMP[6].xyz, TEMP[6], SAMP[2], 2D 133: ADD TEMP[6].xz, TEMP[6].xzyy, IMM[2].xyxx 134: MOV TEMP[2].xz, TEMP[6].xxzx 135: MOV TEMP[6].w, IMM[0].zzzz 136: ADD TEMP[2].xyz, TEMP[8].xyzz, TEMP[2].xyzz 137: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[7].xxxx 138: MUL TEMP[3].xyz, TEMP[2].xyzz, TEMP[3].yyyy 139: MUL TEMP[6].xyz, TEMP[3].xyzz, TEMP[12].xxxx 140: MUL TEMP[3].xy, IN[2].xyyy, IMM[1].wwww 141: MOV TEMP[4].x, TEMP[3].xxyx 142: MOV TEMP[2].xy, TEMP[4].xyxx 143: MUL TEMP[4].xy, TEMP[5].xzzz, TEMP[7].xxxx 144: MUL TEMP[4].x, TEMP[4].xyyy, IMM[2].zzzz 145: ADD TEMP[3].x, TEMP[3].yyyy, TEMP[4].xxxx 146: MOV TEMP[2].z, TEMP[3].xxxx 147: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz 148: RSQ TEMP[3].x, TEMP[3].xxxx 149: MUL TEMP[3].xyz, TEMP[2].xyzz, TEMP[3].xxxx 150: MUL TEMP[2].xyz, IMM[3].yyxx, TEMP[3].yzxx 151: MAD TEMP[2].xyz, IMM[3].xyyy, TEMP[3].zxyy, -TEMP[2].xyzz 152: MUL TEMP[4].xyz, TEMP[3].zxyy, TEMP[2].yzxx 153: MAD TEMP[4].xyz, TEMP[3].yzxx, TEMP[2].zxyy, -TEMP[4].xyzz 154: MOV TEMP[1].xyz, TEMP[1].xyzx 155: MAD TEMP[0].x, TEMP[0].yyyy, IMM[2].wwww, IMM[3].zzzz 156: MOV TEMP[1].w, TEMP[0].xxxx 157: MOV TEMP[0].x, CONST[5].xxxx 158: MOV TEMP[0].y, CONST[6].xxxx 159: MOV TEMP[0].z, CONST[7].xxxx 160: MOV TEMP[5].x, CONST[5].yyyy 161: MOV TEMP[5].y, CONST[6].yyyy 162: MOV TEMP[5].z, CONST[7].yyyy 163: MOV TEMP[7].x, CONST[5].zzzz 164: MOV TEMP[7].y, CONST[6].zzzz 165: MOV TEMP[7].z, CONST[7].zzzz 166: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[3].xxxx 167: MAD TEMP[0].xyz, TEMP[5].xyzz, TEMP[3].yyyy, TEMP[0].xyzz 168: MAD TEMP[0].xyz, TEMP[7].xyzz, TEMP[3].zzzz, TEMP[0].xyzz 169: DP3 TEMP[5].x, TEMP[0].xyzz, TEMP[0].xyzz 170: RSQ TEMP[5].x, TEMP[5].xxxx 171: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[5].xxxx 172: MUL TEMP[5].xyz, CONST[1].xyzz, TEMP[4].xxxx 173: MAD TEMP[5].xyz, CONST[2].xyzz, TEMP[4].yyyy, TEMP[5].xyzz 174: MAD TEMP[5].xyz, CONST[3].xyzz, TEMP[4].zzzz, TEMP[5].xyzz 175: DP3 TEMP[7].x, TEMP[5].xyzz, TEMP[5].xyzz 176: RSQ TEMP[7].x, TEMP[7].xxxx 177: MUL TEMP[5].xyz, TEMP[5].xyzz, TEMP[7].xxxx 178: MUL TEMP[7].xyz, TEMP[0].zxyy, TEMP[5].yzxx 179: MAD TEMP[7].xyz, TEMP[0].yzxx, TEMP[5].zxyy, -TEMP[7].xyzz 180: MUL TEMP[8].xyz, TEMP[3].zxyy, TEMP[4].yzxx 181: MAD TEMP[3].xyz, TEMP[3].yzxx, TEMP[4].zxyy, -TEMP[8].xyzz 182: DP3 TEMP[3].x, TEMP[3].xyzz, TEMP[2].xyzz 183: SSG TEMP[3].x, TEMP[3].xxxx 184: MUL TEMP[3].xyz, TEMP[7].xyzz, TEMP[3].xxxx 185: MOV TEMP[2].x, TEMP[5].xxxx 186: MOV TEMP[2].y, TEMP[3].xxxx 187: MOV TEMP[2].z, TEMP[0].xxxx 188: MOV TEMP[4].y, TEMP[3].yyyy 189: MOV TEMP[4].z, TEMP[0].yyyy 190: MOV TEMP[7].x, TEMP[5].zzzz 191: MOV TEMP[7].y, TEMP[3].zzzz 192: ADD TEMP[3], TEMP[10], TEMP[11] 193: SSG TEMP[8], TEMP[6] 194: ABS TEMP[9], TEMP[6] 195: MAD TEMP[9], TEMP[9], IMM[3].wwww, IMM[4].xxxx 196: MAX TEMP[9], TEMP[9], IMM[0].zzzz 197: MUL TEMP[8], TEMP[8], TEMP[9] 198: DP2 TEMP[9].x, IN[2].xyyy, IN[2].xyyy 199: SQRT TEMP[9].x, TEMP[9].xxxx 200: ADD TEMP[9].x, TEMP[9].xxxx, IMM[0].xxxx 201: ADD TEMP[10].x, CONST[0].xxxx, IMM[4].yyyy 202: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[10].xxxx 203: MUL TEMP[9].x, TEMP[9].xxxx, IMM[4].zzzz 204: SIN TEMP[9].x, TEMP[9].xxxx 205: MUL TEMP[9].x, TEMP[9].xxxx, IMM[4].wwww 206: MAD TEMP[6], TEMP[8], TEMP[9].xxxx, TEMP[6] 207: ADD TEMP[3], TEMP[3], TEMP[6] 208: MUL TEMP[6], CONST[5], TEMP[3].xxxx 209: MAD TEMP[6], CONST[6], TEMP[3].yyyy, TEMP[6] 210: MAD TEMP[6], CONST[7], TEMP[3].zzzz, TEMP[6] 211: MAD TEMP[3], CONST[8], TEMP[3].wwww, TEMP[6] 212: MUL TEMP[6], CONST[24], TEMP[3].xxxx 213: MAD TEMP[6], CONST[25], TEMP[3].yyyy, TEMP[6] 214: MAD TEMP[6], CONST[26], TEMP[3].zzzz, TEMP[6] 215: MAD TEMP[3], CONST[27], TEMP[3].wwww, TEMP[6] 216: MOV TEMP[2].xyz, TEMP[2].xyzx 217: MOV TEMP[2].w, TEMP[5].yyyy 218: MOV TEMP[4].xy, TEMP[4].yzyy 219: MOV TEMP[4].zw, TEMP[7].yyxy 220: MOV TEMP[0].x, TEMP[0].zzzz 221: MOV OUT[4], TEMP[0] 222: MOV OUT[3], TEMP[4] 223: MOV OUT[1], TEMP[1] 224: MOV OUT[2], TEMP[2] 225: MOV OUT[0], TEMP[3] 226: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 324) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 328) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 332) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 336) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 340) %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 344) %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 348) %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 352) %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 356) %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 360) %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 364) %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 368) %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 372) %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 376) %72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 380) %73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 384) %74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 388) %75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 392) %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 396) %77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 400) %78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 404) %79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 408) %80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 412) %81 = call float @llvm.SI.load.const(<16 x i8> %12, i32 416) %82 = call float @llvm.SI.load.const(<16 x i8> %12, i32 420) %83 = call float @llvm.SI.load.const(<16 x i8> %12, i32 424) %84 = call float @llvm.SI.load.const(<16 x i8> %12, i32 428) %85 = call float @llvm.SI.load.const(<16 x i8> %12, i32 432) %86 = call float @llvm.SI.load.const(<16 x i8> %12, i32 436) %87 = call float @llvm.SI.load.const(<16 x i8> %12, i32 440) %88 = call float @llvm.SI.load.const(<16 x i8> %12, i32 444) %89 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %90 = load <8 x i32>, <8 x i32> addrspace(2)* %89, align 32, !tbaa !0 %91 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %92 = load <4 x i32>, <4 x i32> addrspace(2)* %91, align 16, !tbaa !0 %93 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %94 = bitcast <8 x i32> addrspace(2)* %93 to <32 x i8> addrspace(2)* %95 = load <32 x i8>, <32 x i8> addrspace(2)* %94, align 32, !tbaa !0 %96 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %97 = bitcast <4 x i32> addrspace(2)* %96 to <16 x i8> addrspace(2)* %98 = load <16 x i8>, <16 x i8> addrspace(2)* %97, align 16, !tbaa !0 %99 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %100 = bitcast <8 x i32> addrspace(2)* %99 to <32 x i8> addrspace(2)* %101 = load <32 x i8>, <32 x i8> addrspace(2)* %100, align 32, !tbaa !0 %102 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %103 = bitcast <4 x i32> addrspace(2)* %102 to <16 x i8> addrspace(2)* %104 = load <16 x i8>, <16 x i8> addrspace(2)* %103, align 16, !tbaa !0 %105 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %106 = load <16 x i8>, <16 x i8> addrspace(2)* %105, align 16, !tbaa !0 %107 = add i32 %5, %7 %108 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %106, i32 0, i32 %107) %109 = extractelement <4 x float> %108, i32 0 %110 = extractelement <4 x float> %108, i32 1 %111 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %112 = load <16 x i8>, <16 x i8> addrspace(2)* %111, align 16, !tbaa !0 %113 = add i32 %5, %7 %114 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %112, i32 0, i32 %113) %115 = extractelement <4 x float> %114, i32 0 %116 = extractelement <4 x float> %114, i32 1 %117 = extractelement <4 x float> %114, i32 2 %118 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %119 = load <16 x i8>, <16 x i8> addrspace(2)* %118, align 16, !tbaa !0 %120 = add i32 %5, %7 %121 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %119, i32 0, i32 %120) %122 = extractelement <4 x float> %121, i32 0 %123 = extractelement <4 x float> %121, i32 1 %124 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %125 = load <16 x i8>, <16 x i8> addrspace(2)* %124, align 16, !tbaa !0 %126 = add i32 %5, %7 %127 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %125, i32 0, i32 %126) %128 = extractelement <4 x float> %127, i32 0 %129 = fmul float %128, %43 %130 = call float @llvm.floor.f32(float %129) %131 = fadd float %109, 1.000000e+00 %132 = fadd float %110, 1.000000e+00 %133 = fmul float %131, 5.000000e-01 %134 = fmul float %132, 5.000000e-01 %135 = fdiv float 1.000000e+00, %43 %136 = fadd float %130, 0x3FDFEF9DC0000000 %137 = fmul float %136, %135 %138 = fdiv float 1.000000e+00, %44 %139 = fmul float %138, 0x3FDFEF9DC0000000 %140 = bitcast float %137 to i32 %141 = bitcast float %139 to i32 %142 = insertelement <4 x i32> undef, i32 %140, i32 0 %143 = insertelement <4 x i32> %142, i32 %141, i32 1 %144 = insertelement <4 x i32> %143, i32 0, i32 2 %145 = bitcast <8 x i32> %90 to <32 x i8> %146 = bitcast <4 x i32> %92 to <16 x i8> %147 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %144, <32 x i8> %145, <16 x i8> %146, i32 2) %148 = extractelement <4 x float> %147, i32 0 %149 = extractelement <4 x float> %147, i32 1 %150 = extractelement <4 x float> %147, i32 2 %151 = extractelement <4 x float> %147, i32 3 %152 = fadd float %130, 0x3FDFEF9DC0000000 %153 = fmul float %152, %135 %154 = fmul float %138, 0x3FF7FBE760000000 %155 = bitcast float %153 to i32 %156 = bitcast float %154 to i32 %157 = insertelement <4 x i32> undef, i32 %155, i32 0 %158 = insertelement <4 x i32> %157, i32 %156, i32 1 %159 = insertelement <4 x i32> %158, i32 0, i32 2 %160 = bitcast <8 x i32> %90 to <32 x i8> %161 = bitcast <4 x i32> %92 to <16 x i8> %162 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %159, <32 x i8> %160, <16 x i8> %161, i32 2) %163 = extractelement <4 x float> %162, i32 0 %164 = extractelement <4 x float> %162, i32 1 %165 = fmul float %148, 2.000000e+00 %166 = fmul float %149, 2.000000e+00 %167 = fmul float %150, 2.000000e+00 %168 = fmul float %151, 2.000000e+00 %169 = bitcast float %115 to i32 %170 = bitcast float %117 to i32 %171 = insertelement <4 x i32> undef, i32 %169, i32 0 %172 = insertelement <4 x i32> %171, i32 %170, i32 1 %173 = insertelement <4 x i32> %172, i32 0, i32 2 %174 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %173, <32 x i8> %95, <16 x i8> %98, i32 2) %175 = extractelement <4 x float> %174, i32 0 %176 = extractelement <4 x float> %174, i32 1 %177 = call float @llvm.AMDGPU.lrp(float %175, float %167, float %165) %178 = call float @llvm.AMDGPU.lrp(float %176, float %168, float %166) %179 = fmul float %122, %122 %180 = fmul float %123, %123 %181 = fadd float %180, %179 %182 = call float @llvm.sqrt.f32(float %181) %183 = fsub float 1.000000e+00, %182 %184 = fsub float -0.000000e+00, %40 %185 = fsub float -0.000000e+00, %41 %186 = fsub float -0.000000e+00, %42 %187 = fmul float %41, -0.000000e+00 %188 = fmul float %42, -0.000000e+00 %189 = fsub float -0.000000e+00, %187 %190 = fsub float %189, %42 %191 = fmul float %40, -0.000000e+00 %192 = fsub float %191, %188 %193 = fmul float %41, -0.000000e+00 %194 = fadd float %193, %40 %195 = fmul float %190, %190 %196 = fmul float %192, %192 %197 = fadd float %196, %195 %198 = fmul float %194, %194 %199 = fadd float %197, %198 %200 = call float @llvm.AMDGPU.rsq.clamped.f32(float %199) %201 = fmul float %190, %200 %202 = fmul float %192, %200 %203 = fmul float %194, %200 %204 = fmul float %202, %186 %205 = fmul float %203, %184 %206 = fmul float %201, %185 %207 = fmul float %203, %185 %208 = fsub float %207, %204 %209 = fmul float %201, %186 %210 = fsub float %209, %205 %211 = fmul float %202, %184 %212 = fsub float %211, %206 %213 = fmul float %208, %208 %214 = fmul float %210, %210 %215 = fadd float %214, %213 %216 = fmul float %212, %212 %217 = fadd float %215, %216 %218 = call float @llvm.AMDGPU.rsq.clamped.f32(float %217) %219 = fmul float %208, %218 %220 = fmul float %212, %218 %221 = fmul float %122, %184 %222 = fmul float %183, %185 %223 = fadd float %222, %221 %224 = fmul float %123, %186 %225 = fadd float %223, %224 %226 = call float @llvm.fabs.f32(float %225) %227 = fmul float %226, 5.000000e-01 %228 = fadd float %227, 5.000000e-01 %229 = call float @llvm.AMDGPU.lrp(float %228, float %219, float %122) %230 = call float @llvm.AMDGPU.lrp(float %228, float 1.000000e+00, float %183) %231 = call float @llvm.AMDGPU.lrp(float %228, float %220, float %123) %232 = fmul float %229, %229 %233 = fmul float %230, %230 %234 = fadd float %233, %232 %235 = fmul float %231, %231 %236 = fadd float %234, %235 %237 = call float @llvm.AMDGPU.rsq.clamped.f32(float %236) %238 = fmul float %229, %237 %239 = fmul float %230, %237 %240 = fmul float %231, %237 %241 = fmul float %53, %45 %242 = fmul float %53, %45 %243 = fmul float %54, %46 %244 = fmul float %54, %46 %245 = fmul float %241, %61 %246 = fmul float %242, %62 %247 = fmul float %243, %63 %248 = fmul float %244, %64 %249 = fmul float %55, %47 %250 = fmul float %55, %47 %251 = fmul float %56, %48 %252 = fmul float %56, %48 %253 = fmul float %249, %65 %254 = fmul float %250, %66 %255 = fmul float %251, %67 %256 = fmul float %252, %68 %257 = fmul float %61, %115 %258 = fmul float %62, %117 %259 = fadd float %257, %258 %260 = fmul float %63, %115 %261 = fmul float %64, %117 %262 = fadd float %260, %261 %263 = fmul float %65, %115 %264 = fmul float %66, %117 %265 = fadd float %263, %264 %266 = fmul float %67, %115 %267 = fmul float %68, %117 %268 = fadd float %266, %267 %269 = fmul float %49, %259 %270 = fmul float %50, %262 %271 = fmul float %51, %265 %272 = fmul float %52, %268 %273 = fmul float %14, %57 %274 = fmul float %14, %58 %275 = fmul float %14, %59 %276 = fmul float %14, %60 %277 = fadd float %269, %273 %278 = fadd float %270, %274 %279 = fadd float %271, %275 %280 = fadd float %272, %276 %281 = call float @llvm.cos.f32(float %277) %282 = call float @llvm.cos.f32(float %278) %283 = call float @llvm.cos.f32(float %279) %284 = call float @llvm.cos.f32(float %280) %285 = fmul float %281, %245 %286 = fmul float %282, %247 %287 = fadd float %285, %286 %288 = fmul float %283, %253 %289 = fadd float %287, %288 %290 = fmul float %284, %255 %291 = fadd float %289, %290 %292 = fmul float %281, %246 %293 = fmul float %282, %248 %294 = fadd float %292, %293 %295 = fmul float %283, %254 %296 = fadd float %294, %295 %297 = fmul float %284, %256 %298 = fadd float %296, %297 %299 = fadd float %115, %291 %300 = fadd float %117, %298 %301 = fmul float %49, %45 %302 = fmul float %49, %45 %303 = fmul float %50, %46 %304 = fmul float %50, %46 %305 = fmul float %301, %61 %306 = fmul float %302, %62 %307 = fmul float %303, %63 %308 = fmul float %304, %64 %309 = fmul float %51, %47 %310 = fmul float %51, %47 %311 = fmul float %52, %48 %312 = fmul float %52, %48 %313 = fmul float %309, %65 %314 = fmul float %310, %66 %315 = fmul float %311, %67 %316 = fmul float %312, %68 %317 = fmul float %61, %299 %318 = fmul float %62, %300 %319 = fadd float %317, %318 %320 = fmul float %63, %299 %321 = fmul float %64, %300 %322 = fadd float %320, %321 %323 = fmul float %65, %299 %324 = fmul float %66, %300 %325 = fadd float %323, %324 %326 = fmul float %67, %299 %327 = fmul float %68, %300 %328 = fadd float %326, %327 %329 = fmul float %49, %319 %330 = fadd float %329, %273 %331 = fmul float %50, %322 %332 = fadd float %331, %274 %333 = fmul float %51, %325 %334 = fadd float %333, %275 %335 = fmul float %52, %328 %336 = fadd float %335, %276 %337 = call float @llvm.cos.f32(float %330) %338 = call float @llvm.cos.f32(float %332) %339 = call float @llvm.cos.f32(float %334) %340 = call float @llvm.cos.f32(float %336) %341 = fmul float %337, %305 %342 = fmul float %338, %307 %343 = fadd float %341, %342 %344 = fmul float %339, %313 %345 = fadd float %343, %344 %346 = fmul float %340, %315 %347 = fadd float %345, %346 %348 = fmul float %337, %306 %349 = fmul float %338, %308 %350 = fadd float %348, %349 %351 = fmul float %339, %314 %352 = fadd float %350, %351 %353 = fmul float %340, %316 %354 = fadd float %352, %353 %355 = fmul float %347, %347 %356 = fadd float %355, 4.000000e+00 %357 = fmul float %354, %354 %358 = fadd float %356, %357 %359 = call float @llvm.AMDGPU.rsq.clamped.f32(float %358) %360 = fmul float %347, %359 %361 = fmul float %354, %359 %362 = fmul float %163, 5.000000e-01 %363 = fmul float %360, -2.000000e+00 %364 = fmul float %361, -2.000000e+00 %365 = fsub float %115, %69 %366 = fsub float %117, %70 %367 = fmul float %71, 5.000000e-01 %368 = fadd float %367, %365 %369 = fmul float %72, 5.000000e-01 %370 = fadd float %369, %366 %371 = fdiv float 1.000000e+00, %71 %372 = fdiv float 1.000000e+00, %72 %373 = fmul float %368, %371 %374 = fmul float %370, %372 %375 = fadd float %110, 0x3FE99999A0000000 %376 = fmul float %375, %178 %377 = fmul float %109, %177 %378 = fmul float %377, %201 %379 = fmul float %377, %202 %380 = fmul float %377, %203 %381 = fmul float %376, %238 %382 = fadd float %381, %378 %383 = fmul float %376, %239 %384 = fadd float %383, %379 %385 = fmul float %376, %240 %386 = fadd float %385, %380 %387 = bitcast float %373 to i32 %388 = bitcast float %374 to i32 %389 = insertelement <4 x i32> undef, i32 %387, i32 0 %390 = insertelement <4 x i32> %389, i32 %388, i32 1 %391 = insertelement <4 x i32> %390, i32 0, i32 2 %392 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %391, <32 x i8> %101, <16 x i8> %104, i32 2) %393 = extractelement <4 x float> %392, i32 0 %394 = extractelement <4 x float> %392, i32 1 %395 = fadd float %393, -5.000000e-01 %396 = fadd float %394, -5.000000e-01 %397 = fadd float %363, %395 %398 = fadd float %364, %396 %399 = fmul float %397, %362 %400 = fmul float %362, 0.000000e+00 %401 = fmul float %398, %362 %402 = fmul float %399, %178 %403 = fmul float %400, %178 %404 = fmul float %401, %178 %405 = fmul float %402, %375 %406 = fmul float %403, %375 %407 = fmul float %404, %375 %408 = fmul float %122, 1.500000e+00 %409 = fmul float %123, 1.500000e+00 %410 = fmul float %363, %362 %411 = fmul float %410, 0x3FC99999A0000000 %412 = fadd float %409, %411 %413 = fmul float %408, %408 %414 = fmul float %183, %183 %415 = fadd float %414, %413 %416 = fmul float %412, %412 %417 = fadd float %415, %416 %418 = call float @llvm.AMDGPU.rsq.clamped.f32(float %417) %419 = fmul float %408, %418 %420 = fmul float %183, %418 %421 = fmul float %412, %418 %422 = fmul float %420, 0.000000e+00 %423 = fmul float %421, 0.000000e+00 %424 = fmul float %419, 0x3FB99999A0000000 %425 = fmul float %421, 0x3FB99999A0000000 %426 = fsub float %425, %422 %427 = fmul float %419, 0.000000e+00 %428 = fsub float %427, %423 %429 = fmul float %420, 0.000000e+00 %430 = fsub float %429, %424 %431 = fmul float %421, %428 %432 = fmul float %419, %430 %433 = fmul float %420, %426 %434 = fmul float %420, %430 %435 = fsub float %434, %431 %436 = fmul float %421, %426 %437 = fsub float %436, %432 %438 = fmul float %419, %428 %439 = fsub float %438, %433 %440 = fmul float %164, 8.000000e+00 %441 = fadd float %440, -4.000000e+00 %442 = fmul float %24, %419 %443 = fmul float %28, %419 %444 = fmul float %32, %419 %445 = fmul float %25, %420 %446 = fadd float %445, %442 %447 = fmul float %29, %420 %448 = fadd float %447, %443 %449 = fmul float %33, %420 %450 = fadd float %449, %444 %451 = fmul float %26, %421 %452 = fadd float %451, %446 %453 = fmul float %30, %421 %454 = fadd float %453, %448 %455 = fmul float %34, %421 %456 = fadd float %455, %450 %457 = fmul float %452, %452 %458 = fmul float %454, %454 %459 = fadd float %458, %457 %460 = fmul float %456, %456 %461 = fadd float %459, %460 %462 = call float @llvm.AMDGPU.rsq.clamped.f32(float %461) %463 = fmul float %452, %462 %464 = fmul float %454, %462 %465 = fmul float %456, %462 %466 = fmul float %15, %435 %467 = fmul float %16, %435 %468 = fmul float %17, %435 %469 = fmul float %18, %437 %470 = fadd float %469, %466 %471 = fmul float %19, %437 %472 = fadd float %471, %467 %473 = fmul float %20, %437 %474 = fadd float %473, %468 %475 = fmul float %21, %439 %476 = fadd float %475, %470 %477 = fmul float %22, %439 %478 = fadd float %477, %472 %479 = fmul float %23, %439 %480 = fadd float %479, %474 %481 = fmul float %476, %476 %482 = fmul float %478, %478 %483 = fadd float %482, %481 %484 = fmul float %480, %480 %485 = fadd float %483, %484 %486 = call float @llvm.AMDGPU.rsq.clamped.f32(float %485) %487 = fmul float %476, %486 %488 = fmul float %478, %486 %489 = fmul float %480, %486 %490 = fmul float %465, %488 %491 = fmul float %463, %489 %492 = fmul float %464, %487 %493 = fmul float %464, %489 %494 = fsub float %493, %490 %495 = fmul float %465, %487 %496 = fsub float %495, %491 %497 = fmul float %463, %488 %498 = fsub float %497, %492 %499 = fmul float %421, %437 %500 = fmul float %419, %439 %501 = fmul float %420, %435 %502 = fmul float %420, %439 %503 = fsub float %502, %499 %504 = fmul float %421, %435 %505 = fsub float %504, %500 %506 = fmul float %419, %437 %507 = fsub float %506, %501 %508 = fmul float %503, %426 %509 = fmul float %505, %428 %510 = fadd float %509, %508 %511 = fmul float %507, %430 %512 = fadd float %510, %511 %513 = fcmp ogt float %512, 0.000000e+00 %514 = select i1 %513, float 1.000000e+00, float %512 %515 = fcmp oge float %514, 0.000000e+00 %516 = select i1 %515, float %514, float -1.000000e+00 %517 = fmul float %494, %516 %518 = fmul float %496, %516 %519 = fmul float %498, %516 %520 = fadd float %115, %382 %521 = fadd float %116, %384 %522 = fadd float %117, %386 %523 = fcmp ogt float %405, 0.000000e+00 %524 = select i1 %523, float 1.000000e+00, float %405 %525 = fcmp oge float %524, 0.000000e+00 %526 = select i1 %525, float %524, float -1.000000e+00 %527 = fcmp ogt float %406, 0.000000e+00 %528 = select i1 %527, float 1.000000e+00, float %406 %529 = fcmp oge float %528, 0.000000e+00 %530 = select i1 %529, float %528, float -1.000000e+00 %531 = fcmp ogt float %407, 0.000000e+00 %532 = select i1 %531, float 1.000000e+00, float %407 %533 = fcmp oge float %532, 0.000000e+00 %534 = select i1 %533, float %532, float -1.000000e+00 %535 = call float @llvm.fabs.f32(float %405) %536 = call float @llvm.fabs.f32(float %406) %537 = call float @llvm.fabs.f32(float %407) %538 = fmul float %535, 0x3FB1EB8520000000 %539 = fadd float %538, 0xBF8EB851E0000000 %540 = fmul float %536, 0x3FB1EB8520000000 %541 = fadd float %540, 0xBF8EB851E0000000 %542 = fmul float %537, 0x3FB1EB8520000000 %543 = fadd float %542, 0xBF8EB851E0000000 %544 = call float @llvm.maxnum.f32(float %539, float 0.000000e+00) %545 = call float @llvm.maxnum.f32(float %541, float 0.000000e+00) %546 = call float @llvm.maxnum.f32(float %543, float 0.000000e+00) %547 = fmul float %526, %544 %548 = fmul float %530, %545 %549 = fmul float %534, %546 %550 = fmul float %122, %122 %551 = fmul float %123, %123 %552 = fadd float %550, %551 %553 = call float @llvm.sqrt.f32(float %552) %554 = fadd float %553, 1.000000e+00 %555 = fadd float %13, 1.000000e+02 %556 = fmul float %554, %555 %557 = fmul float %556, 1.800000e+02 %558 = call float @llvm.sin.f32(float %557) %559 = fmul float %558, 0x3FE6666660000000 %560 = fmul float %547, %559 %561 = fadd float %560, %405 %562 = fmul float %548, %559 %563 = fadd float %562, %406 %564 = fmul float %549, %559 %565 = fadd float %564, %407 %566 = fmul float %559, 0.000000e+00 %567 = fadd float %566, 0.000000e+00 %568 = fadd float %520, %561 %569 = fadd float %521, %563 %570 = fadd float %522, %565 %571 = fadd float %567, 1.000000e+00 %572 = fmul float %24, %568 %573 = fmul float %25, %568 %574 = fmul float %26, %568 %575 = fmul float %27, %568 %576 = fmul float %28, %569 %577 = fadd float %576, %572 %578 = fmul float %29, %569 %579 = fadd float %578, %573 %580 = fmul float %30, %569 %581 = fadd float %580, %574 %582 = fmul float %31, %569 %583 = fadd float %582, %575 %584 = fmul float %32, %570 %585 = fadd float %584, %577 %586 = fmul float %33, %570 %587 = fadd float %586, %579 %588 = fmul float %34, %570 %589 = fadd float %588, %581 %590 = fmul float %35, %570 %591 = fadd float %590, %583 %592 = fmul float %36, %571 %593 = fadd float %592, %585 %594 = fmul float %37, %571 %595 = fadd float %594, %587 %596 = fmul float %38, %571 %597 = fadd float %596, %589 %598 = fmul float %39, %571 %599 = fadd float %598, %591 %600 = fmul float %73, %593 %601 = fmul float %74, %593 %602 = fmul float %75, %593 %603 = fmul float %76, %593 %604 = fmul float %77, %595 %605 = fadd float %604, %600 %606 = fmul float %78, %595 %607 = fadd float %606, %601 %608 = fmul float %79, %595 %609 = fadd float %608, %602 %610 = fmul float %80, %595 %611 = fadd float %610, %603 %612 = fmul float %81, %597 %613 = fadd float %612, %605 %614 = fmul float %82, %597 %615 = fadd float %614, %607 %616 = fmul float %83, %597 %617 = fadd float %616, %609 %618 = fmul float %84, %597 %619 = fadd float %618, %611 %620 = fmul float %85, %599 %621 = fadd float %620, %613 %622 = fmul float %86, %599 %623 = fadd float %622, %615 %624 = fmul float %87, %599 %625 = fadd float %624, %617 %626 = fmul float %88, %599 %627 = fadd float %626, %619 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %133, float %134, float %130, float %441) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %487, float %517, float %463, float %488) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %518, float %464, float %489, float %519) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %465, float %464, float %465, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %621, float %623, float %625, float %627) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.floor.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.cos.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sin.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[36:39], s[8:9], 0x0 ; C0920900 v_add_i32_e32 v15, s10, v0 ; 4A1E000A v_mov_b32_e32 v0, 0x3e22f983 ; 7E0002FF 3E22F983 v_mov_b32_e32 v5, 0x80000000 ; 7E0A02FF 80000000 v_mov_b32_e32 v16, 0x3eff7cee ; 7E2002FF 3EFF7CEE v_mov_b32_e32 v22, 0x3fc00000 ; 7E2C02FF 3FC00000 v_mov_b32_e32 v14, 0xbdcccccd ; 7E1C02FF BDCCCCCD v_mov_b32_e32 v23, 0x41000000 ; 7E2E02FF 41000000 s_load_dwordx4 s[44:47], s[8:9], 0x4 ; C0960904 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s43, s[0:3], 0x56 ; C2158156 s_buffer_load_dword s52, s[0:3], 0x57 ; C21A0157 s_buffer_load_dword s35, s[0:3], 0x58 ; C2118158 s_buffer_load_dword s42, s[0:3], 0x59 ; C2150159 s_buffer_load_dword s33, s[0:3], 0x5a ; C210815A s_buffer_load_dword s10, s[0:3], 0x51 ; C2050151 s_buffer_load_dword s11, s[0:3], 0x52 ; C2058152 s_buffer_load_dword s12, s[0:3], 0x53 ; C2060153 s_buffer_load_dword s55, s[0:3], 0x54 ; C21B8154 s_buffer_load_dword s56, s[0:3], 0x55 ; C21C0155 s_buffer_load_dword s13, s[0:3], 0x47 ; C2068147 s_buffer_load_dword s54, s[0:3], 0x48 ; C21B0148 s_buffer_load_dword s53, s[0:3], 0x49 ; C21A8149 s_buffer_load_dword s40, s[0:3], 0x4a ; C214014A s_buffer_load_dword s34, s[0:3], 0x4b ; C211014B s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v6, s10 ; 7E0C020A s_buffer_load_dword s10, s[0:3], 0x4c ; C205014C v_mov_b32_e32 v7, s11 ; 7E0E020B s_buffer_load_dword s11, s[0:3], 0x4d ; C205814D v_mov_b32_e32 v8, s12 ; 7E10020C s_buffer_load_dword s16, s[0:3], 0x4e ; C208014E s_buffer_load_dword s12, s[0:3], 0x4f ; C206014F s_buffer_load_dword s14, s[0:3], 0x50 ; C2070150 v_mov_b32_e32 v1, s13 ; 7E02020D v_mov_b32_e32 v2, s13 ; 7E04020D s_buffer_load_dword s41, s[0:3], 0x5b ; C214815B s_buffer_load_dword s57, s[0:3], 0x5c ; C21C815C s_buffer_load_dword s58, s[0:3], 0x5d ; C21D015D s_buffer_load_dword s59, s[0:3], 0x5e ; C21D815E v_mul_f32_e32 v1, s34, v1 ; 10020222 v_mul_f32_e32 v24, s33, v1 ; 10300221 s_buffer_load_dword s76, s[0:3], 0x5f ; C226015F s_buffer_load_dword s77, s[0:3], 0x40 ; C2268140 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s12, v2 ; 1004040C v_mov_b32_e32 v9, s14 ; 7E12020E s_buffer_load_dword s12, s[0:3], 0x41 ; C2060141 v_mul_f32_e32 v25, s41, v1 ; 10320229 s_buffer_load_dword s13, s[0:3], 0x44 ; C2068144 v_mul_f32_e32 v26, s41, v2 ; 10340429 v_mul_f32_e32 v27, s33, v2 ; 10360421 v_rcp_f32_e32 v28, s59 ; 7E38543B s_buffer_load_dword s17, s[0:3], 0x45 ; C2088145 v_rcp_f32_e32 v29, s76 ; 7E3A544C s_buffer_load_dword s18, s[0:3], 0x46 ; C2090146 v_rcp_f32_e32 v17, s77 ; 7E22544D s_waitcnt lgkmcnt(0) ; BF8C007F v_rcp_f32_e32 v30, s12 ; 7E3C540C s_buffer_load_dword s24, s[0:3], 0xe ; C20C010E v_mov_b32_e32 v1, s13 ; 7E02020D v_mov_b32_e32 v2, s13 ; 7E04020D v_mul_f32_e32 v2, s10, v2 ; 1004040A v_mul_f32_e32 v1, s54, v1 ; 10020236 s_buffer_load_dword s13, s[0:3], 0x14 ; C2068114 s_buffer_load_dword s12, s[0:3], 0x15 ; C2060115 v_mov_b32_e32 v3, s17 ; 7E060211 v_mul_f32_e32 v3, s53, v3 ; 10060635 s_buffer_load_dword s15, s[0:3], 0x16 ; C2078116 s_buffer_load_dword s14, s[0:3], 0x17 ; C2070117 v_mov_b32_e32 v4, s17 ; 7E080211 v_mul_f32_e32 v4, s11, v4 ; 1008080B s_buffer_load_dword s26, s[0:3], 0x8 ; C20D0108 s_buffer_load_dword s25, s[0:3], 0x9 ; C20C8109 v_mul_f32_e32 v31, s52, v4 ; 103E0834 v_mul_f32_e32 v32, s43, v4 ; 1040082B v_mul_f32_e32 v33, s56, v2 ; 10420438 v_mul_f32_e32 v34, s55, v2 ; 10440437 v_mov_b32_e32 v2, s18 ; 7E040212 v_mul_f32_e32 v2, s40, v2 ; 10040428 v_mov_b32_e32 v4, s18 ; 7E080212 v_mul_f32_e32 v4, s16, v4 ; 10080810 s_buffer_load_dword s28, s[0:3], 0xa ; C20E010A s_buffer_load_dword s27, s[0:3], 0xc ; C20D810C v_mul_f32_e32 v35, s42, v4 ; 1046082A v_mul_f32_e32 v36, s35, v4 ; 10480823 v_mul_f32_e32 v37, s43, v3 ; 104A062B v_mul_f32_e32 v38, s52, v3 ; 104C0634 v_mul_f32_e32 v39, s55, v1 ; 104E0237 v_mul_f32_e32 v40, s56, v1 ; 10500238 v_mul_f32_e32 v41, s35, v2 ; 10520423 v_mul_f32_e32 v42, s42, v2 ; 1054042A s_buffer_load_dword s29, s[0:3], 0xd ; C20E810D s_buffer_load_dword s22, s[0:3], 0x18 ; C20B0118 s_buffer_load_dword s23, s[0:3], 0x19 ; C20B8119 s_buffer_load_dword s20, s[0:3], 0x1a ; C20A011A s_buffer_load_dword s21, s[0:3], 0x1b ; C20A811B s_buffer_load_dword s18, s[0:3], 0x1c ; C209011C s_buffer_load_dword s19, s[0:3], 0x1d ; C209811D s_buffer_load_dword s16, s[0:3], 0x1e ; C208011E s_buffer_load_dword s17, s[0:3], 0x1f ; C208811F s_buffer_load_dword s10, s[0:3], 0x20 ; C2050120 s_buffer_load_dword s11, s[0:3], 0x21 ; C2058121 s_buffer_load_dword s48, s[0:3], 0x1 ; C2180101 s_buffer_load_dword s30, s[0:3], 0x4 ; C20F0104 s_buffer_load_dword s31, s[0:3], 0x5 ; C20F8105 s_buffer_load_dword s32, s[0:3], 0x6 ; C2100106 s_load_dwordx4 s[68:71], s[8:9], 0x8 ; C0A20908 s_load_dwordx4 s[72:75], s[8:9], 0xc ; C0A4090C buffer_load_format_xyzw v[10:13], v15, s[36:39], 0 idxen ; E00C2000 80090A0F buffer_load_format_xyzw v[1:4], v15, s[44:47], 0 idxen ; E00C2000 800B010F s_load_dwordx4 s[64:67], s[4:5], 0x0 ; C0A00500 s_load_dwordx4 s[60:63], s[4:5], 0x4 ; C09E0504 s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 v_mul_f32_e32 v12, s48, v6 ; 10180C30 v_mul_f32_e32 v13, s48, v9 ; 101A1230 v_mul_f32_e32 v43, s48, v7 ; 10560E30 v_mul_f32_e32 v44, s48, v8 ; 10581030 buffer_load_format_xyzw v[6:9], v15, s[68:71], 0 idxen ; E00C2000 8011060F buffer_load_format_xyzw v[18:21], v15, s[72:75], 0 idxen ; E00C2000 8012120F s_load_dwordx4 s[36:39], s[4:5], 0x8 ; C0920508 s_load_dwordx8 s[80:87], s[6:7], 0x0 ; C0E80700 s_load_dwordx8 s[68:75], s[6:7], 0x8 ; C0E20708 s_load_dwordx8 s[44:51], s[6:7], 0x10 ; C0D60710 s_waitcnt vmcnt(2) ; BF8C0772 v_add_f32_e32 v4, 1.0, v10 ; 060814F2 v_subrev_f32_e32 v45, s57, v1 ; 0A5A0239 v_mac_f32_e64 v45, 0.5, s59 ; D23E002D 000076F0 v_subrev_f32_e32 v46, s58, v3 ; 0A5C063A v_mac_f32_e64 v46, 0.5, s76 ; D23E002E 000098F0 v_mul_f32_e32 v4, 0.5, v4 ; 100808F0 v_mul_f32_e32 v47, s56, v3 ; 105E0638 v_mac_f32_e32 v47, s55, v1 ; 3E5E0237 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v8, s77, v18 ; 1010244D v_floor_f32_e32 v48, v8 ; 7E604908 v_add_f32_e32 v8, v16, v48 ; 06106110 v_mul_f32_e32 v15, v17, v8 ; 101E1111 v_mul_f32_e32 v16, v16, v30 ; 10203D10 v_mov_b32_e32 v17, 0 ; 7E220280 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[18:21], 15, 0, 0, 0, 0, 0, 0, 0, v[15:18], s[80:87], s[64:67] ; F0900F00 0214120F v_mul_f32_e32 v16, 0x3fbfdf3b, v30 ; 10203CFF 3FBFDF3B s_waitcnt vmcnt(0) ; BF8C0770 image_sample_l v[49:50], 3, 0, 0, 0, 0, 0, 0, 0, v[15:18], s[80:87], s[64:67] ; F0900300 0214310F v_mov_b32_e32 v15, v1 ; 7E1E0301 v_mov_b32_e32 v16, v3 ; 7E200303 image_sample_l v[8:9], 3, 0, 0, 0, 0, 0, 0, 0, v[15:18], s[68:75], s[60:63] ; F0900300 01F1080F v_mul_f32_e32 v15, s52, v3 ; 101E0634 v_mac_f32_e32 v15, s43, v1 ; 3E1E022B v_mad_f32 v16, s54, v47, v13 ; D2820010 04365E36 v_mad_f32 v15, s53, v15, v12 ; D282000F 04321E35 v_mul_f32_e32 v16, v0, v16 ; 10202100 v_mul_f32_e32 v15, v0, v15 ; 101E1F00 v_fract_f32_e32 v16, v16 ; 7E204110 v_fract_f32_e32 v15, v15 ; 7E1E410F v_cos_f32_e32 v16, v16 ; 7E206D10 v_cos_f32_e32 v15, v15 ; 7E1E6D0F v_mul_f32_e32 v30, v31, v15 ; 103C1F1F v_mac_f32_e32 v30, v33, v16 ; 3E3C2121 v_mul_f32_e32 v31, s42, v3 ; 103E062A v_mac_f32_e32 v31, s35, v1 ; 3E3E0223 v_mad_f32 v31, s40, v31, v43 ; D282001F 04AE3E28 v_mul_f32_e32 v31, v0, v31 ; 103E3F00 v_fract_f32_e32 v31, v31 ; 7E3E411F v_cos_f32_e32 v31, v31 ; 7E3E6D1F v_mac_f32_e32 v30, v35, v31 ; 3E3C3F23 v_mul_f32_e32 v33, s41, v3 ; 10420629 v_mac_f32_e32 v33, s33, v1 ; 3E420221 v_mad_f32 v33, s34, v33, v44 ; D2820021 04B24222 v_mul_f32_e32 v33, v0, v33 ; 10424300 v_fract_f32_e32 v33, v33 ; 7E424121 v_cos_f32_e32 v33, v33 ; 7E426D21 v_mac_f32_e32 v30, v26, v33 ; 3E3C431A v_add_f32_e32 v26, v30, v3 ; 0634071E v_mul_f32_e32 v30, s56, v26 ; 103C3438 v_mul_f32_e32 v15, v32, v15 ; 101E1F20 v_mac_f32_e32 v15, v34, v16 ; 3E1E2122 v_mac_f32_e32 v15, v36, v31 ; 3E1E3F24 v_mac_f32_e32 v15, v27, v33 ; 3E1E431B v_add_f32_e32 v16, v15, v1 ; 0620030F v_mac_f32_e32 v30, s55, v16 ; 3E3C2037 v_mac_f32_e32 v13, s54, v30 ; 3E1A3C36 v_mul_f32_e32 v15, s52, v26 ; 101E3434 v_mac_f32_e32 v15, s43, v16 ; 3E1E202B v_mac_f32_e32 v12, s53, v15 ; 3E181E35 v_mul_f32_e32 v27, s42, v26 ; 1036342A v_mul_f32_e32 v15, v28, v45 ; 101E5B1C v_mul_f32_e32 v12, v0, v12 ; 10181900 v_fract_f32_e32 v12, v12 ; 7E18410C v_cos_f32_e32 v12, v12 ; 7E186D0C v_mul_f32_e32 v28, v37, v12 ; 10381925 v_mul_f32_e32 v12, v38, v12 ; 10181926 v_mul_f32_e32 v13, v0, v13 ; 101A1B00 v_fract_f32_e32 v13, v13 ; 7E1A410D v_cos_f32_e32 v13, v13 ; 7E1A6D0D v_mac_f32_e32 v28, v39, v13 ; 3E381B27 v_mac_f32_e32 v12, v40, v13 ; 3E181B28 v_mac_f32_e32 v27, s35, v16 ; 3E362023 v_mac_f32_e32 v43, s40, v27 ; 3E563628 v_mul_f32_e32 v13, v0, v43 ; 101A5700 v_fract_f32_e32 v13, v13 ; 7E1A410D v_cos_f32_e32 v13, v13 ; 7E1A6D0D v_mac_f32_e32 v28, v41, v13 ; 3E381B29 v_mac_f32_e32 v12, v42, v13 ; 3E181B2A v_mul_f32_e32 v13, s41, v26 ; 101A3429 v_mac_f32_e32 v13, s33, v16 ; 3E1A2021 v_mac_f32_e32 v44, s34, v13 ; 3E581A22 v_mul_f32_e32 v13, v0, v44 ; 101A5900 v_fract_f32_e32 v13, v13 ; 7E1A410D v_cos_f32_e32 v13, v13 ; 7E1A6D0D v_mac_f32_e32 v28, v24, v13 ; 3E381B18 v_mac_f32_e32 v12, v25, v13 ; 3E181B19 v_mad_f32 v13, v28, v28, 4.0 ; D282000D 03DA391C v_mac_f32_e32 v13, v12, v12 ; 3E1A190C v_rsq_clamp_f32_e32 v13, v13 ; 7E1A590D v_mul_f32_e32 v16, v29, v46 ; 10205D1D s_waitcnt vmcnt(1) ; BF8C0771 v_mad_f32 v23, v23, v50, -4.0 ; D2820017 03DE6517 v_mul_f32_e32 v24, 0.5, v49 ; 103062F0 v_mul_f32_e32 v25, v13, v28 ; 1032390D v_mul_f32_e32 v26, -2.0, v25 ; 103432F5 v_mul_f32_e32 v26, v24, v26 ; 10343518 v_mul_f32_e32 v26, 0x3e4ccccd, v26 ; 103434FF 3E4CCCCD v_mac_f32_e32 v26, v22, v7 ; 3E340F16 v_mul_f32_e32 v22, v22, v6 ; 102C0D16 v_mul_f32_e32 v27, v6, v6 ; 10360D06 v_mac_f32_e32 v27, v7, v7 ; 3E360F07 v_sqrt_f32_e32 v27, v27 ; 7E36671B v_mul_f32_e32 v28, v22, v22 ; 10382D16 v_sub_f32_e32 v29, 1.0, v27 ; 083A36F2 v_mac_f32_e32 v28, v29, v29 ; 3E383B1D v_mac_f32_e32 v28, v26, v26 ; 3E38351A v_rsq_clamp_f32_e32 v28, v28 ; 7E38591C v_add_f32_e32 v30, 1.0, v11 ; 063C16F2 v_mul_f32_e32 v30, 0.5, v30 ; 103C3CF0 image_sample_l v[15:16], 3, 0, 0, 0, 0, 0, 0, 0, v[15:18], s[44:51], s[36:39] ; F0900300 012B0F0F exp 15, 32, 0, 0, 0, v4, v30, v48, v23 ; F800020F 17301E04 s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700 v_mul_f32_e32 v4, v28, v22 ; 10082D1C v_mul_f32_e32 v22, v28, v26 ; 102C351C v_mul_f32_e32 v23, v28, v29 ; 102E3B1C v_mul_f32_e32 v14, v4, v14 ; 101C1D04 v_mul_f32_e32 v26, v5, v23 ; 10342F05 v_madmk_f32_e32 v26, v22, v26, 0x3dcccccd ; 40343516 3DCCCCCD v_mul_f32_e32 v28, v5, v22 ; 10382D05 v_mac_f32_e32 v28, 0, v4 ; 3E380880 v_mac_f32_e32 v14, 0, v23 ; 3E1C2E80 v_mul_f32_e32 v30, v14, v4 ; 103C090E v_mad_f32 v30, v22, v26, -v30 ; D282001E 847A3516 v_mul_f32_e32 v31, v26, v23 ; 103E2F1A v_mad_f32 v31, v4, v28, -v31 ; D282001F 847E3904 v_mul_f32_e32 v32, v30, v22 ; 10402D1E v_mad_f32 v32, v23, v31, -v32 ; D2820020 84823F17 v_mul_f32_e32 v26, v26, v32 ; 1034411A v_mul_f32_e32 v32, v28, v22 ; 10402D1C v_mad_f32 v32, v23, v14, -v32 ; D2820020 84821D17 v_mul_f32_e32 v33, v31, v4 ; 1042091F v_mad_f32 v33, v22, v32, -v33 ; D2820021 84864116 v_mac_f32_e32 v26, v28, v33 ; 3E34431C v_mul_f32_e32 v28, v32, v23 ; 10382F20 v_mad_f32 v28, v4, v30, -v28 ; D282001C 84723D04 v_mac_f32_e32 v26, v14, v28 ; 3E34390E v_mul_f32_e32 v14, s13, v4 ; 101C080D v_mul_f32_e32 v28, s22, v4 ; 10380816 v_mul_f32_e32 v4, s18, v4 ; 10080812 v_mac_f32_e32 v14, s12, v23 ; 3E1C2E0C v_mac_f32_e32 v28, s23, v23 ; 3E382E17 v_mac_f32_e32 v4, s19, v23 ; 3E082E13 v_mac_f32_e32 v14, s15, v22 ; 3E1C2C0F v_mac_f32_e32 v28, s20, v22 ; 3E382C14 v_mac_f32_e32 v4, s16, v22 ; 3E082C10 v_mul_f32_e32 v22, s30, v32 ; 102C401E v_mul_f32_e32 v23, s31, v32 ; 102E401F v_mul_f32_e32 v32, s32, v32 ; 10404020 v_mac_f32_e32 v22, s26, v30 ; 3E2C3C1A v_mac_f32_e32 v23, s25, v30 ; 3E2E3C19 v_mac_f32_e32 v32, s28, v30 ; 3E403C1C v_mac_f32_e32 v22, s27, v31 ; 3E2C3E1B v_mac_f32_e32 v23, s29, v31 ; 3E2E3E1D v_mac_f32_e32 v32, s24, v31 ; 3E403E18 v_mul_f32_e32 v30, v14, v14 ; 103C1D0E v_mac_f32_e32 v30, v28, v28 ; 3E3C391C v_mac_f32_e32 v30, v4, v4 ; 3E3C0904 v_rsq_clamp_f32_e32 v30, v30 ; 7E3C591E v_mul_f32_e32 v31, v22, v22 ; 103E2D16 v_mac_f32_e32 v31, v23, v23 ; 3E3E2F17 v_mac_f32_e32 v31, v32, v32 ; 3E3E4120 v_rsq_clamp_f32_e32 v31, v31 ; 7E3E591F v_mul_f32_e32 v14, v30, v14 ; 101C1D1E v_mul_f32_e32 v28, v30, v28 ; 1038391E v_mul_f32_e32 v4, v30, v4 ; 1008091E v_mul_f32_e32 v22, v31, v22 ; 102C2D1F v_mul_f32_e32 v23, v31, v23 ; 102E2F1F v_mul_f32_e32 v30, v31, v32 ; 103C411F v_cmp_lt_f32_e32 vcc, 0, v26 ; 7C023480 v_cndmask_b32_e64 v26, v26, 1.0, vcc ; D200001A 01A9E51A v_cmp_le_f32_e32 vcc, 0, v26 ; 7C063480 v_cndmask_b32_e32 v26, -1.0, v26 ; 003434F3 v_mul_f32_e32 v31, v23, v4 ; 103E0917 v_mad_f32 v31, v28, v30, -v31 ; D282001F 847E3D1C v_mul_f32_e32 v31, v26, v31 ; 103E3F1A exp 15, 33, 0, 0, 0, v22, v31, v14, v23 ; F800021F 170E1F16 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v31, v22, v28 ; 103E3916 v_mad_f32 v23, v14, v23, -v31 ; D2820017 847E2F0E v_mul_f32_e32 v14, v30, v14 ; 101C1D1E v_mad_f32 v14, v4, v22, -v14 ; D282000E 843A2D04 v_mul_f32_e32 v14, v26, v14 ; 101C1D1A v_mul_f32_e32 v22, v26, v23 ; 102C2F1A exp 15, 34, 0, 0, 0, v14, v28, v30, v22 ; F800022F 161E1C0E exp 15, 35, 0, 0, 0, v4, v28, v4, v17 ; F800023F 11041C04 s_buffer_load_dword s4, s[0:3], 0x2e ; C202012E s_buffer_load_dword s5, s[0:3], 0x26 ; C2028126 s_buffer_load_dword s6, s[0:3], 0x2a ; C203012A s_waitcnt expcnt(0) ; BF8C070F v_add_f32_e32 v4, v18, v18 ; 06082512 v_sub_f32_e32 v14, 1.0, v8 ; 081C10F2 v_mul_f32_e32 v4, v4, v14 ; 10081D04 v_add_f32_e32 v14, v20, v20 ; 061C2914 v_mac_f32_e32 v4, v14, v8 ; 3E08110E v_mul_f32_e32 v4, v4, v10 ; 10081504 v_add_f32_e32 v8, 0x3f4ccccd, v11 ; 061016FF 3F4CCCCD v_add_f32_e32 v10, v19, v19 ; 06142713 v_add_f32_e32 v11, v21, v21 ; 06162B15 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v14, s4 ; 7E1C0204 v_mad_f32 v14, -s6, v5, -v14 ; D282000E A43A0A06 v_mov_b32_e32 v17, s5 ; 7E220205 v_mad_f32 v17, s6, v5, v17 ; D2820011 04460A06 v_mul_f32_e64 v18, 0, s4 ; D2100012 00000880 v_mac_f32_e32 v18, s5, v5 ; 3E240A05 v_mul_f32_e32 v5, v14, v14 ; 100A1D0E v_mac_f32_e32 v5, v18, v18 ; 3E0A2512 v_mac_f32_e32 v5, v17, v17 ; 3E0A2311 v_rsq_clamp_f32_e32 v5, v5 ; 7E0A5905 v_mul_f32_e32 v19, s5, v6 ; 10260C05 v_mad_f32 v19, v29, -s6, -v19 ; D2820013 C44C0D1D v_mad_f32 v19, -v7, s4, v19 ; D2820013 244C0907 v_mad_f32 v19, 0.5, |v19|, 0.5 ; D2820213 03C226F0 v_sub_f32_e32 v20, 1.0, v19 ; 082826F2 v_mul_f32_e32 v6, v6, v20 ; 100C2906 v_mul_f32_e32 v7, v7, v20 ; 100E2907 v_mul_f32_e32 v14, v5, v14 ; 101C1D05 v_mul_f32_e32 v18, v5, v18 ; 10242505 v_mul_f32_e32 v5, v5, v17 ; 100A2305 v_mul_f32_e32 v17, s6, v5 ; 10220A06 v_mad_f32 v17, v18, s4, -v17 ; D2820011 84440912 v_mul_f32_e32 v21, s4, v14 ; 102A1C04 v_mad_f32 v21, v5, s5, -v21 ; D2820015 84540B05 v_mul_f32_e32 v22, v17, v17 ; 102C2311 v_mac_f32_e32 v22, v21, v21 ; 3E2C2B15 v_mul_f32_e32 v21, s5, v18 ; 102A2405 v_mad_f32 v21, v14, s6, -v21 ; D2820015 84540D0E v_mac_f32_e32 v22, v21, v21 ; 3E2C2B15 v_rsq_clamp_f32_e32 v22, v22 ; 7E2C5916 v_sub_f32_e32 v23, 1.0, v9 ; 082E12F2 v_mul_f32_e32 v10, v10, v23 ; 10142F0A v_mac_f32_e32 v10, v11, v9 ; 3E14130B v_mul_f32_e32 v9, v22, v17 ; 10122316 v_mul_f32_e32 v11, v22, v21 ; 10162B16 v_mul_f32_e32 v17, v29, v20 ; 1022291D v_mac_f32_e32 v6, v9, v19 ; 3E0C2709 v_mac_f32_e32 v7, v11, v19 ; 3E0E270B v_mac_f32_e32 v17, 1.0, v19 ; 3E2226F2 v_mul_f32_e32 v9, v6, v6 ; 10120D06 v_mac_f32_e32 v9, v17, v17 ; 3E122311 v_mac_f32_e32 v9, v7, v7 ; 3E120F07 v_rsq_clamp_f32_e32 v9, v9 ; 7E125909 v_mad_f32 v1, v14, v4, v1 ; D2820001 0406090E v_mad_f32 v2, v18, v4, v2 ; D2820002 040A0912 v_mac_f32_e32 v3, v5, v4 ; 3E060905 v_mul_f32_e32 v4, v9, v6 ; 10080D09 v_mul_f32_e32 v5, v9, v17 ; 100A2309 v_mul_f32_e32 v6, v9, v7 ; 100C0F09 v_mul_f32_e32 v7, v10, v8 ; 100E110A v_mac_f32_e32 v1, v4, v7 ; 3E020F04 v_mul_f32_e32 v4, 0, v24 ; 10083080 v_mul_f32_e32 v4, v10, v4 ; 1008090A v_mul_f32_e32 v9, v8, v4 ; 10120908 v_cmp_lt_f32_e32 vcc, 0, v9 ; 7C021280 v_cndmask_b32_e64 v11, v9, 1.0, vcc ; D200000B 01A9E509 v_cmp_le_f32_e32 vcc, 0, v11 ; 7C061680 v_cndmask_b32_e32 v11, -1.0, v11 ; 001616F3 v_mac_f32_e32 v2, v5, v7 ; 3E040F05 v_mac_f32_e32 v3, v6, v7 ; 3E060F06 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 v_add_f32_e32 v5, -0.5, v15 ; 060A1EF1 v_mac_f32_e32 v5, -2.0, v25 ; 3E0A32F5 v_mul_f32_e32 v5, v24, v5 ; 100A0B18 v_mul_f32_e32 v5, v10, v5 ; 100A0B0A v_mul_f32_e32 v6, v8, v5 ; 100C0B08 v_cmp_lt_f32_e32 vcc, 0, v6 ; 7C020C80 v_cndmask_b32_e64 v7, v6, 1.0, vcc ; D2000007 01A9E506 v_cmp_le_f32_e32 vcc, 0, v7 ; 7C060E80 v_cndmask_b32_e32 v7, -1.0, v7 ; 000E0EF3 v_mov_b32_e32 v14, 0x42c80000 ; 7E1C02FF 42C80000 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v14, s4, v14 ; 061C1C04 v_add_f32_e32 v15, 1.0, v27 ; 061E36F2 v_mul_f32_e32 v14, v14, v15 ; 101C1F0E v_mul_f32_e32 v14, 0x43340000, v14 ; 101C1CFF 43340000 v_mul_f32_e32 v0, v0, v14 ; 10001D00 v_mov_b32_e32 v14, 0xbc75c28f ; 7E1C02FF BC75C28F v_mov_b32_e32 v15, 0x3d8f5c29 ; 7E1E02FF 3D8F5C29 v_mad_f32 v9, |v9|, v15, v14 ; D2820109 043A1F09 v_max_f32_e32 v9, 0, v9 ; 20121280 v_mul_f32_e32 v9, v9, v11 ; 10121709 v_mac_f32_e32 v2, v8, v4 ; 3E040908 v_fract_f32_e32 v0, v0 ; 7E004100 v_sin_f32_e32 v0, v0 ; 7E006B00 v_mul_f32_e32 v0, 0x3f333333, v0 ; 100000FF 3F333333 v_mac_f32_e32 v2, v0, v9 ; 3E041300 v_mac_f32_e32 v1, v8, v5 ; 3E020B08 v_mad_f32 v4, |v6|, v15, v14 ; D2820104 043A1F06 v_max_f32_e32 v4, 0, v4 ; 20080880 v_mul_f32_e32 v4, v4, v7 ; 10080F04 v_mac_f32_e32 v1, v0, v4 ; 3E020900 v_mul_f32_e32 v4, s13, v1 ; 1008020D v_mul_f32_e32 v5, s12, v1 ; 100A020C v_mul_f32_e32 v6, s15, v1 ; 100C020F v_mul_f32_e32 v1, s14, v1 ; 1002020E v_mac_f32_e32 v4, s22, v2 ; 3E080416 v_mac_f32_e32 v5, s23, v2 ; 3E0A0417 v_mac_f32_e32 v6, s20, v2 ; 3E0C0414 v_mac_f32_e32 v1, s21, v2 ; 3E020415 v_add_f32_e32 v2, -0.5, v16 ; 060420F1 v_mul_f32_e32 v7, v13, v12 ; 100E190D v_mac_f32_e32 v2, -2.0, v7 ; 3E040EF5 v_mul_f32_e32 v2, v24, v2 ; 10040518 v_mul_f32_e32 v2, v10, v2 ; 1004050A v_mac_f32_e32 v3, v8, v2 ; 3E060508 v_mul_f32_e32 v2, v8, v2 ; 10040508 v_mad_f32 v7, |v2|, v15, v14 ; D2820107 043A1F02 v_cmp_lt_f32_e32 vcc, 0, v2 ; 7C020480 v_cndmask_b32_e64 v2, v2, 1.0, vcc ; D2000002 01A9E502 v_cmp_le_f32_e32 vcc, 0, v2 ; 7C060480 v_cndmask_b32_e32 v2, -1.0, v2 ; 000404F3 v_max_f32_e32 v7, 0, v7 ; 200E0E80 v_mul_f32_e32 v2, v7, v2 ; 10040507 v_mac_f32_e32 v3, v0, v2 ; 3E060500 v_mac_f32_e32 v4, s18, v3 ; 3E080612 v_mac_f32_e32 v5, s19, v3 ; 3E0A0613 v_mac_f32_e32 v6, s16, v3 ; 3E0C0610 v_mac_f32_e32 v1, s17, v3 ; 3E020611 s_buffer_load_dword s4, s[0:3], 0x22 ; C2020122 s_buffer_load_dword s5, s[0:3], 0x23 ; C2028123 s_buffer_load_dword s6, s[0:3], 0x60 ; C2030160 s_buffer_load_dword s7, s[0:3], 0x61 ; C2038161 s_buffer_load_dword s8, s[0:3], 0x62 ; C2040162 s_buffer_load_dword s9, s[0:3], 0x63 ; C2048163 s_buffer_load_dword s12, s[0:3], 0x64 ; C2060164 s_buffer_load_dword s13, s[0:3], 0x65 ; C2068165 s_buffer_load_dword s14, s[0:3], 0x66 ; C2070166 s_buffer_load_dword s15, s[0:3], 0x67 ; C2078167 s_buffer_load_dword s16, s[0:3], 0x68 ; C2080168 s_buffer_load_dword s17, s[0:3], 0x69 ; C2088169 s_buffer_load_dword s18, s[0:3], 0x6a ; C209016A s_buffer_load_dword s19, s[0:3], 0x6b ; C209816B s_buffer_load_dword s20, s[0:3], 0x6c ; C20A016C s_buffer_load_dword s21, s[0:3], 0x6d ; C20A816D s_buffer_load_dword s22, s[0:3], 0x6e ; C20B016E s_buffer_load_dword s0, s[0:3], 0x6f ; C200016F v_mad_f32 v0, 0, v0, 0 ; D2820000 02020080 v_add_f32_e32 v0, 1.0, v0 ; 060000F2 v_mac_f32_e32 v4, s10, v0 ; 3E08000A v_mac_f32_e32 v5, s11, v0 ; 3E0A000B s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v6, s4, v0 ; 3E0C0004 v_mac_f32_e32 v1, s5, v0 ; 3E020005 v_mul_f32_e32 v0, s6, v4 ; 10000806 v_mul_f32_e32 v2, s7, v4 ; 10040807 v_mul_f32_e32 v3, s8, v4 ; 10060808 v_mul_f32_e32 v4, s9, v4 ; 10080809 v_mac_f32_e32 v0, s12, v5 ; 3E000A0C v_mac_f32_e32 v2, s13, v5 ; 3E040A0D v_mac_f32_e32 v3, s14, v5 ; 3E060A0E v_mac_f32_e32 v4, s15, v5 ; 3E080A0F v_mac_f32_e32 v0, s16, v6 ; 3E000C10 v_mac_f32_e32 v2, s17, v6 ; 3E040C11 v_mac_f32_e32 v3, s18, v6 ; 3E060C12 v_mac_f32_e32 v4, s19, v6 ; 3E080C13 v_mac_f32_e32 v0, s20, v1 ; 3E000214 v_mac_f32_e32 v2, s21, v1 ; 3E040215 v_mac_f32_e32 v3, s22, v1 ; 3E060216 v_mac_f32_e32 v4, s0, v1 ; 3E080200 exp 15, 12, 0, 1, 0, v0, v2, v3, v4 ; F80008CF 04030200 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 96 VGPRS: 52 Code Size: 2212 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1..3] DCL TEMP[0..5], LOCAL IMM[0] FLT32 { 0.5000, -1.0000, 0.0000, 5.0000} IMM[1] FLT32 { 1.0000, 2.0000, 0.0000, 0.0000} 0: ADD TEMP[0].x, IN[0].zzzz, IMM[0].xxxx 1: FLR TEMP[0].x, TEMP[0].xxxx 2: MUL TEMP[0].x, TEMP[0].xxxx, CONST[1].xxxx 3: FLR TEMP[1].x, TEMP[0].xxxx 4: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1].yyyy 5: FRC TEMP[0].x, TEMP[0].xxxx 6: FRC TEMP[1].x, TEMP[1].xxxx 7: MOV TEMP[0].y, TEMP[1].xxxx 8: DDX TEMP[1].xy, IN[0].xyyy 9: MUL TEMP[2], CONST[3].xxxx, IN[0].xyyy 10: DDY TEMP[2].xy, TEMP[2] 11: DP2 TEMP[1].x, TEMP[1].xyyy, TEMP[1].xyyy 12: SQRT TEMP[1].x, TEMP[1].xxxx 13: DP2 TEMP[2].x, TEMP[2].xyyy, TEMP[2].xyyy 14: SQRT TEMP[2].x, TEMP[2].xxxx 15: MOV TEMP[1].y, TEMP[2].xxxx 16: DP2 TEMP[1].x, TEMP[1].xyyy, TEMP[1].xyyy 17: SQRT TEMP[1].x, TEMP[1].xxxx 18: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1].wwww 19: LG2 TEMP[1].x, TEMP[1].xxxx 20: ADD TEMP[1].x, TEMP[1].xxxx, IMM[0].yyyy 21: ADD TEMP[1].x, TEMP[1].xxxx, IN[0].wwww 22: MAX TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz 23: MIN TEMP[1].x, TEMP[1].xxxx, IMM[0].wwww 24: CEIL TEMP[2].x, TEMP[1].xxxx 25: EX2 TEMP[2].x, TEMP[2].xxxx 26: MUL TEMP[2].x, TEMP[2].xxxx, CONST[1].zzzz 27: RCP TEMP[3].x, CONST[1].wwww 28: MUL TEMP[3].x, IMM[0].xxxx, TEMP[3].xxxx 29: FRC TEMP[4].xy, IN[0].xyyy 30: MUL TEMP[5].x, TEMP[2].xxxx, IMM[1].yyyy 31: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[3].xxxx 32: ADD TEMP[5].x, IMM[1].xxxx, -TEMP[5].xxxx 33: MUL TEMP[2].x, TEMP[3].xxxx, TEMP[2].xxxx 34: MAD TEMP[2].xy, TEMP[4].xyyy, TEMP[5].xxxx, TEMP[2].xxxx 35: MAD TEMP[0].xy, TEMP[2].xyyy, CONST[1].xyyy, TEMP[0].xyyy 36: MOV TEMP[0].xy, TEMP[0].xyyy 37: MOV TEMP[0].w, TEMP[1].xxxx 38: TXL TEMP[0].w, TEMP[0], SAMP[0], 2D 39: FSLT TEMP[0].x, TEMP[0].wwww, CONST[2].xxxx 40: AND TEMP[0].x, TEMP[0].xxxx, IMM[1].xxxx 41: KILL_IF -TEMP[0].xxxx 42: MOV TEMP[0].x, IN[1].zzzz 43: MOV TEMP[0].y, IN[2].yyyy 44: MOV TEMP[0].z, IN[3].xxxx 45: MAD TEMP[0].xyz, TEMP[0].xyzz, IMM[0].xxxx, IMM[0].xxxx 46: MOV TEMP[0].w, IMM[0].zzzz 47: MOV OUT[0], TEMP[0] 48: END ; ModuleID = 'tgsi' @ddxy_lds = external addrspace(3) global [64 x i32] define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %30 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %31 = load <32 x i8>, <32 x i8> addrspace(2)* %30, align 32, !tbaa !0 %32 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %33 = load <16 x i8>, <16 x i8> addrspace(2)* %32, align 16, !tbaa !0 %34 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %35 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %36 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %37 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %38 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %39 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %40 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %41 = fadd float %36, 5.000000e-01 %42 = call float @llvm.floor.f32(float %41) %43 = fmul float %42, %24 %44 = call float @llvm.floor.f32(float %43) %45 = fmul float %44, %25 %46 = call float @llvm.floor.f32(float %43) %47 = fsub float %43, %46 %48 = call float @llvm.floor.f32(float %45) %49 = fsub float %45, %48 %50 = call i32 @llvm.SI.tid() %51 = sext i32 %50 to i64 %52 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i64 0, i64 %51 %53 = bitcast i32 addrspace(3)* %52 to float addrspace(3)* store float %35, float addrspace(3)* %53, align 4 %54 = call i32 @llvm.SI.tid() %55 = sext i32 %54 to i64 %56 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i64 0, i64 %55 %57 = and i32 %54, -4 %58 = sext i32 %57 to i64 %59 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i64 0, i64 %58 %60 = or i32 %57, 1 %61 = sext i32 %60 to i64 %62 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i64 0, i64 %61 %63 = bitcast i32 addrspace(3)* %56 to float addrspace(3)* store float %34, float addrspace(3)* %63, align 4 %64 = bitcast i32 addrspace(3)* %59 to float addrspace(3)* %65 = load float, float addrspace(3)* %64, align 4 %66 = bitcast i32 addrspace(3)* %62 to float addrspace(3)* %67 = load float, float addrspace(3)* %66, align 4 %68 = fsub float %67, %65 %69 = bitcast i32 addrspace(3)* %56 to float addrspace(3)* store float %35, float addrspace(3)* %69, align 4 %70 = bitcast i32 addrspace(3)* %59 to float addrspace(3)* %71 = load float, float addrspace(3)* %70, align 4 %72 = bitcast i32 addrspace(3)* %62 to float addrspace(3)* %73 = load float, float addrspace(3)* %72, align 4 %74 = fsub float %73, %71 %75 = fmul float %29, %34 %76 = fmul float %29, %35 %77 = fmul float %29, %35 %78 = call i32 @llvm.SI.tid() %79 = sext i32 %78 to i64 %80 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i64 0, i64 %79 %81 = bitcast i32 addrspace(3)* %80 to float addrspace(3)* store float %77, float addrspace(3)* %81, align 4 %82 = call i32 @llvm.SI.tid() %83 = sext i32 %82 to i64 %84 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i64 0, i64 %83 %85 = and i32 %82, -4 %86 = sext i32 %85 to i64 %87 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i64 0, i64 %86 %88 = or i32 %85, 2 %89 = sext i32 %88 to i64 %90 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i64 0, i64 %89 %91 = bitcast i32 addrspace(3)* %84 to float addrspace(3)* store float %75, float addrspace(3)* %91, align 4 %92 = bitcast i32 addrspace(3)* %87 to float addrspace(3)* %93 = load float, float addrspace(3)* %92, align 4 %94 = bitcast i32 addrspace(3)* %90 to float addrspace(3)* %95 = load float, float addrspace(3)* %94, align 4 %96 = fsub float %95, %93 %97 = bitcast i32 addrspace(3)* %84 to float addrspace(3)* store float %76, float addrspace(3)* %97, align 4 %98 = bitcast i32 addrspace(3)* %87 to float addrspace(3)* %99 = load float, float addrspace(3)* %98, align 4 %100 = bitcast i32 addrspace(3)* %90 to float addrspace(3)* %101 = load float, float addrspace(3)* %100, align 4 %102 = fsub float %101, %99 %103 = bitcast i32 addrspace(3)* %84 to float addrspace(3)* store float %77, float addrspace(3)* %103, align 4 %104 = fmul float %68, %68 %105 = fmul float %74, %74 %106 = fadd float %104, %105 %107 = call float @llvm.sqrt.f32(float %106) %108 = fmul float %96, %96 %109 = fmul float %102, %102 %110 = fadd float %108, %109 %111 = call float @llvm.sqrt.f32(float %110) %112 = fmul float %107, %107 %113 = fmul float %111, %111 %114 = fadd float %112, %113 %115 = call float @llvm.sqrt.f32(float %114) %116 = fmul float %115, %27 %117 = call float @llvm.log2.f32(float %116) %118 = fadd float %117, -1.000000e+00 %119 = fadd float %118, %37 %120 = call float @llvm.maxnum.f32(float %119, float 0.000000e+00) %121 = call float @llvm.minnum.f32(float %120, float 5.000000e+00) %122 = call float @llvm.ceil.f32(float %121) %123 = call float @llvm.AMDIL.exp.(float %122) %124 = fmul float %123, %26 %125 = fdiv float 1.000000e+00, %27 %126 = fmul float %125, 5.000000e-01 %127 = call float @llvm.floor.f32(float %34) %128 = fsub float %34, %127 %129 = call float @llvm.floor.f32(float %35) %130 = fsub float %35, %129 %131 = fmul float %124, 2.000000e+00 %132 = fmul float %131, %126 %133 = fsub float 1.000000e+00, %132 %134 = fmul float %126, %124 %135 = fmul float %128, %133 %136 = fadd float %135, %134 %137 = fmul float %130, %133 %138 = fadd float %137, %134 %139 = fmul float %136, %24 %140 = fadd float %139, %47 %141 = fmul float %138, %25 %142 = fadd float %141, %49 %143 = bitcast float %140 to i32 %144 = bitcast float %142 to i32 %145 = bitcast float %121 to i32 %146 = insertelement <4 x i32> undef, i32 %143, i32 0 %147 = insertelement <4 x i32> %146, i32 %144, i32 1 %148 = insertelement <4 x i32> %147, i32 %145, i32 2 %149 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %148, <32 x i8> %31, <16 x i8> %33, i32 2) %150 = extractelement <4 x float> %149, i32 3 %151 = fcmp olt float %150, %28 %152 = select i1 %151, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %152) %153 = fmul float %38, 5.000000e-01 %154 = fadd float %153, 5.000000e-01 %155 = fmul float %39, 5.000000e-01 %156 = fadd float %155, 5.000000e-01 %157 = fmul float %40, 5.000000e-01 %158 = fadd float %157, 5.000000e-01 %159 = call i32 @llvm.SI.packf16(float %154, float %156) %160 = bitcast i32 %159 to float %161 = call i32 @llvm.SI.packf16(float %158, float 0.000000e+00) %162 = bitcast i32 %161 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %160, float %162, float %160, float %162) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.floor.f32(float) #1 ; Function Attrs: readnone declare i32 @llvm.SI.tid() #2 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.ceil.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 2, 1, [m0] ; C8180600 v_interp_p2_f32 v6, [v6], v1, 2, 1, [m0] ; C8190601 v_interp_p1_f32 v7, v0, 1, 2, [m0] ; C81C0900 v_interp_p2_f32 v7, [v7], v1, 1, 2, [m0] ; C81D0901 v_interp_p1_f32 v0, v0, 0, 3, [m0] ; C8000C00 v_interp_p2_f32 v0, [v0], v1, 0, 3, [m0] ; C8010C01 v_mbcnt_lo_u32_b32_e64 v1, -1, 0 ; D2460001 000100C1 v_mbcnt_hi_u32_b32_e32 v1, -1, v1 ; 480202C1 v_lshlrev_b32_e32 v8, 2, v1 ; 34100282 s_mov_b32 m0, -1 ; BEFC03C1 ds_write_b32 v8, v3 ; D8340000 00000308 ds_write_b32 v8, v2 ; D8340000 00000208 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0xc ; C204010C v_and_b32_e32 v1, -4, v1 ; 360202C4 v_or_b32_e32 v9, 1, v1 ; 38120281 v_lshlrev_b32_e32 v9, 2, v9 ; 34121282 v_lshlrev_b32_e32 v10, 2, v1 ; 34140282 ds_read_b32 v11, v10 ; D8D80000 0B00000A ds_read_b32 v12, v9 ; D8D80000 0C000009 ds_write_b32 v8, v3 ; D8340000 00000308 ds_read_b32 v9, v9 ; D8D80000 09000009 ds_read_b32 v13, v10 ; D8D80000 0D00000A s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v14, s8, v3 ; 101C0608 ds_write_b32 v8, v14 ; D8340000 00000E08 v_mul_f32_e32 v15, s8, v2 ; 101E0408 ds_write_b32 v8, v15 ; D8340000 00000F08 v_or_b32_e32 v1, 2, v1 ; 38020282 v_lshlrev_b32_e32 v1, 2, v1 ; 34020282 s_waitcnt lgkmcnt(0) ; BF8C007F ds_read_b32 v15, v10 ; D8D80000 0F00000A ds_read_b32 v16, v1 ; D8D80000 10000001 ds_write_b32 v8, v14 ; D8340000 00000E08 ds_read_b32 v10, v10 ; D8D80000 0A00000A ds_read_b32 v1, v1 ; D8D80000 01000001 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 s_buffer_load_dword s0, s[0:3], 0x8 ; C2000108 ds_write_b32 v8, v14 ; D8340000 00000E08 v_subrev_f32_e32 v8, v11, v12 ; 0A10190B v_subrev_f32_e32 v9, v13, v9 ; 0A12130D v_mul_f32_e32 v9, v9, v9 ; 10121309 v_mac_f32_e32 v9, v8, v8 ; 3E121108 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v8, v15, v16 ; 0A10210F v_subrev_f32_e32 v1, v10, v1 ; 0A02030A v_mul_f32_e32 v1, v1, v1 ; 10020301 v_mac_f32_e32 v1, v8, v8 ; 3E021108 v_add_f32_e32 v4, 0.5, v4 ; 060808F0 v_floor_f32_e32 v4, v4 ; 7E084904 v_sqrt_f32_e32 v8, v9 ; 7E106709 v_sqrt_f32_e32 v1, v1 ; 7E026701 v_mul_f32_e32 v1, v1, v1 ; 10020301 v_mac_f32_e32 v1, v8, v8 ; 3E021108 v_mul_f32_e32 v8, s8, v4 ; 10100808 v_floor_f32_e32 v8, v8 ; 7E104908 v_sqrt_f32_e32 v1, v1 ; 7E026701 v_mul_f32_e32 v1, s11, v1 ; 1002020B v_log_f32_e32 v1, v1 ; 7E024F01 v_mul_f32_e32 v9, s9, v8 ; 10121009 v_floor_f32_e32 v9, v9 ; 7E124909 v_mad_f32 v10, v4, s8, -v8 ; D282000A 84201104 v_mad_f32 v11, v8, s9, -v9 ; D282000B 84241308 v_add_f32_e32 v1, -1.0, v1 ; 060202F3 v_add_f32_e32 v1, v5, v1 ; 06020305 v_max_f32_e32 v1, 0, v1 ; 20020280 v_min_f32_e32 v12, 0x40a00000, v1 ; 1E1802FF 40A00000 v_ceil_f32_e32 v1, v12 ; 7E02450C v_exp_f32_e32 v1, v1 ; 7E024B01 v_mul_f32_e32 v1, s10, v1 ; 1002020A v_floor_f32_e32 v4, v2 ; 7E084902 v_subrev_f32_e32 v2, v4, v2 ; 0A040504 v_rcp_f32_e32 v4, s11 ; 7E08540B v_floor_f32_e32 v5, v3 ; 7E0A4903 s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500 s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700 v_subrev_f32_e32 v3, v5, v3 ; 0A060705 v_mul_f32_e32 v4, 0.5, v4 ; 100808F0 v_mul_f32_e32 v5, -2.0, v1 ; 100A02F5 v_mad_f32 v5, v5, v4, 1.0 ; D2820005 03CA0905 v_mul_f32_e32 v1, v1, v4 ; 10020901 v_mad_f32 v2, v5, v2, v1 ; D2820002 04060505 v_mac_f32_e32 v1, v5, v3 ; 3E020705 v_mac_f32_e32 v10, s8, v2 ; 3E140408 v_mac_f32_e32 v11, s9, v1 ; 3E160209 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v1, 8, 0, 0, 0, 0, 0, 0, 0, v[10:13], s[16:23], s[12:15] ; F0900800 0064010A s_waitcnt vmcnt(0) ; BF8C0770 v_cmp_gt_f32_e32 vcc, s0, v1 ; 7C080200 v_cndmask_b32_e64 v1, 0, -1.0, vcc ; D2000001 01A9E680 v_cmpx_le_f32_e32 vcc, 0, v1 ; 7C260280 v_mad_f32 v1, 0.5, v6, 0.5 ; D2820001 03C20CF0 v_mad_f32 v2, 0.5, v7, 0.5 ; D2820002 03C20EF0 v_mad_f32 v0, 0.5, v0, 0.5 ; D2820000 03C200F0 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_cvt_pkrtz_f16_f32_e64 v0, v0, 0 ; D25E0000 00010100 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 20 Code Size: 560 bytes LDS: 1 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL CONST[0..16] DCL CONST[20..31] DCL TEMP[0..14], LOCAL IMM[0] FLT32 { 1.0000, 0.5000, 0.0000, 0.4990} IMM[1] FLT32 { 1.4990, 2.0000, 0.8000, 1.5000} IMM[2] FLT32 { -0.5000, -1.0000, 0.2000, 8.0000} IMM[3] FLT32 { -4.0000, 0.0700, -0.0150, 100.0000} IMM[4] FLT32 { 180.0000, 0.7000, 0.0000, 0.0000} 0: MUL TEMP[0].x, IN[4].xxxx, CONST[20].xxxx 1: FLR TEMP[0].x, TEMP[0].xxxx 2: ADD TEMP[1].xy, IN[0].xyyy, IMM[0].xxxx 3: MUL TEMP[1].xy, TEMP[1].xyyy, IMM[0].yyyy 4: MOV TEMP[1].z, TEMP[0].xxxx 5: RCP TEMP[2].x, CONST[20].xxxx 6: ADD TEMP[3].x, TEMP[0].xxxx, IMM[0].wwww 7: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[2].xxxx 8: RCP TEMP[4].x, CONST[20].yyyy 9: MUL TEMP[5].x, IMM[0].wwww, TEMP[4].xxxx 10: MOV TEMP[3].y, TEMP[5].xxxx 11: MOV TEMP[3].xy, TEMP[3].xyyy 12: MOV TEMP[3].w, IMM[0].zzzz 13: TXL TEMP[3], TEMP[3], SAMP[0], 2D 14: ADD TEMP[5].x, TEMP[0].xxxx, IMM[0].wwww 15: MUL TEMP[0].x, TEMP[5].xxxx, TEMP[2].xxxx 16: MUL TEMP[2].x, IMM[1].xxxx, TEMP[4].xxxx 17: MOV TEMP[0].y, TEMP[2].xxxx 18: MOV TEMP[0].xy, TEMP[0].xyyy 19: MOV TEMP[0].w, IMM[0].zzzz 20: TXL TEMP[0].xy, TEMP[0], SAMP[0], 2D 21: MUL TEMP[2].xy, TEMP[3].xyyy, IMM[1].yyyy 22: MUL TEMP[3].xy, TEMP[3].zwww, IMM[1].yyyy 23: MOV TEMP[4].xy, IN[1].xzzz 24: MOV TEMP[4].w, IMM[0].zzzz 25: TXL TEMP[4].xy, TEMP[4], SAMP[1], 2D 26: LRP TEMP[3].xy, TEMP[4].xyyy, TEMP[3].xyyy, TEMP[2].xyyy 27: MOV TEMP[2].x, IN[3].xxxx 28: MUL TEMP[4].x, IN[3].xxxx, IN[3].xxxx 29: MAD TEMP[4].x, IN[3].yyyy, IN[3].yyyy, TEMP[4].xxxx 30: SQRT TEMP[4].x, TEMP[4].xxxx 31: ADD TEMP[4].x, IMM[0].xxxx, -TEMP[4].xxxx 32: MOV TEMP[2].y, TEMP[4].xxxx 33: MOV TEMP[2].z, IN[3].yyyy 34: MOV TEMP[4].y, TEMP[4].xxxx 35: MOV TEMP[5].x, CONST[13].zzzz 36: MOV TEMP[5].y, CONST[14].zzzz 37: MOV TEMP[5].z, CONST[15].zzzz 38: MOV TEMP[5].xyz, -TEMP[5].xyzx 39: MUL TEMP[6].xyz, IMM[0].zzxx, TEMP[5].yzxx 40: MAD TEMP[6].xyz, IMM[0].xzzz, TEMP[5].zxyy, -TEMP[6].xyzz 41: DP3 TEMP[7].x, TEMP[6].xyzz, TEMP[6].xyzz 42: RSQ TEMP[7].x, TEMP[7].xxxx 43: MUL TEMP[6].xyz, TEMP[6].xyzz, TEMP[7].xxxx 44: MUL TEMP[7].xyz, TEMP[5].zxyy, TEMP[6].yzxx 45: MAD TEMP[7].xyz, TEMP[5].yzxx, TEMP[6].zxyy, -TEMP[7].xyzz 46: DP3 TEMP[8].x, TEMP[7].xyzz, TEMP[7].xyzz 47: RSQ TEMP[8].x, TEMP[8].xxxx 48: MUL TEMP[7].xz, TEMP[7].xyzz, TEMP[8].xxxx 49: MOV TEMP[7].xz, TEMP[7].xxzx 50: MOV TEMP[7].y, IMM[0].xxxx 51: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[2].xyzz 52: ABS TEMP[5].x, TEMP[5].xxxx 53: MAD TEMP[5].x, IMM[0].yyyy, TEMP[5].xxxx, IMM[0].yyyy 54: LRP TEMP[2].xyz, TEMP[5].xxxx, TEMP[7].xyzz, TEMP[2].xyzz 55: DP3 TEMP[5].x, TEMP[2].xyzz, TEMP[2].xyzz 56: RSQ TEMP[5].x, TEMP[5].xxxx 57: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[5].xxxx 58: MUL TEMP[5], CONST[23].xxyy, CONST[21].xxyy 59: MUL TEMP[5], TEMP[5], CONST[25] 60: MUL TEMP[7], CONST[23].zzww, CONST[21].zzww 61: MUL TEMP[7], TEMP[7], CONST[26] 62: DP2 TEMP[8].x, CONST[25].xyyy, IN[1].xzzz 63: DP2 TEMP[9].x, CONST[25].zwww, IN[1].xzzz 64: MOV TEMP[8].y, TEMP[9].xxxx 65: DP2 TEMP[9].x, CONST[26].xyyy, IN[1].xzzz 66: MOV TEMP[8].z, TEMP[9].xxxx 67: DP2 TEMP[9].x, CONST[26].zwww, IN[1].xzzz 68: MOV TEMP[8].w, TEMP[9].xxxx 69: MUL TEMP[8], CONST[22], TEMP[8] 70: MUL TEMP[9], CONST[0].yyyy, CONST[24] 71: ADD TEMP[8], TEMP[8], TEMP[9] 72: COS TEMP[10].x, TEMP[8].xxxx 73: COS TEMP[10].y, TEMP[8].yyyy 74: COS TEMP[10].z, TEMP[8].zzzz 75: COS TEMP[10].w, TEMP[8].wwww 76: MOV TEMP[8].xy, TEMP[5].xzxx 77: MOV TEMP[8].zw, TEMP[7].zzxz 78: DP4 TEMP[8].x, TEMP[10], TEMP[8] 79: MOV TEMP[5].xy, TEMP[5].ywyy 80: MOV TEMP[5].zw, TEMP[7].wwyw 81: DP4 TEMP[5].x, TEMP[10], TEMP[5] 82: MOV TEMP[8].z, TEMP[5].xxxx 83: ADD TEMP[5].xy, IN[1].xzzz, TEMP[8].xzzz 84: MOV TEMP[7].y, IMM[1].yyyy 85: MUL TEMP[8], CONST[22].xxyy, CONST[21].xxyy 86: MUL TEMP[8], TEMP[8], CONST[25] 87: MUL TEMP[10], CONST[22].zzww, CONST[21].zzww 88: MUL TEMP[10], TEMP[10], CONST[26] 89: DP2 TEMP[11].x, CONST[25].xyyy, TEMP[5].xyyy 90: DP2 TEMP[12].x, CONST[25].zwww, TEMP[5].xyyy 91: MOV TEMP[11].y, TEMP[12].xxxx 92: DP2 TEMP[12].x, CONST[26].xyyy, TEMP[5].xyyy 93: MOV TEMP[11].z, TEMP[12].xxxx 94: DP2 TEMP[5].x, CONST[26].zwww, TEMP[5].xyyy 95: MOV TEMP[11].w, TEMP[5].xxxx 96: MAD TEMP[5], CONST[22], TEMP[11], TEMP[9] 97: COS TEMP[9].x, TEMP[5].xxxx 98: COS TEMP[9].y, TEMP[5].yyyy 99: COS TEMP[9].z, TEMP[5].zzzz 100: COS TEMP[9].w, TEMP[5].wwww 101: MOV TEMP[5].xy, TEMP[8].xzxx 102: MOV TEMP[5].zw, TEMP[10].zzxz 103: DP4 TEMP[5].x, TEMP[9], TEMP[5] 104: MOV TEMP[7].x, -TEMP[5].xxxx 105: MOV TEMP[5].xy, TEMP[8].ywyy 106: MOV TEMP[5].zw, TEMP[10].wwyw 107: DP4 TEMP[5].x, TEMP[9], TEMP[5] 108: MOV TEMP[7].z, -TEMP[5].xxxx 109: DP3 TEMP[5].x, TEMP[7].xyzz, TEMP[7].xyzz 110: RSQ TEMP[5].x, TEMP[5].xxxx 111: MUL TEMP[5].xyz, TEMP[7].xyzz, TEMP[5].xxxx 112: MUL TEMP[7].x, TEMP[0].xxxx, IMM[0].yyyy 113: MUL TEMP[5].xz, TEMP[5].xyzz, IMM[1].yyyy 114: MOV TEMP[8].xz, TEMP[5].xxzx 115: MOV TEMP[8].y, IMM[0].zzzz 116: ADD TEMP[10].xy, IN[1].xzzz, -CONST[27].xyyy 117: MAD TEMP[10].xy, CONST[27].zwww, IMM[0].yyyy, TEMP[10].xyyy 118: RCP TEMP[11].x, CONST[27].zzzz 119: RCP TEMP[11].y, CONST[27].wwww 120: MUL TEMP[9].xy, TEMP[10].xyyy, TEMP[11].xyyy 121: MOV TEMP[10].w, IMM[0].zzzz 122: MOV TEMP[10].xyz, IN[1].xyzx 123: MOV TEMP[11].w, IMM[0].xxxx 124: ADD TEMP[12].x, IN[0].yyyy, IMM[1].zzzz 125: MUL TEMP[13].x, TEMP[12].xxxx, TEMP[3].yyyy 126: MUL TEMP[14].x, IN[0].xxxx, TEMP[3].xxxx 127: MUL TEMP[6].xyz, TEMP[14].xxxx, TEMP[6].xyzz 128: MAD TEMP[11].xyz, TEMP[13].xxxx, TEMP[2].xyzz, TEMP[6].xyzz 129: MOV TEMP[2].y, IMM[0].zzzz 130: MOV TEMP[6].xy, TEMP[9].xyyy 131: MOV TEMP[6].w, IMM[0].zzzz 132: TXL TEMP[6].xyz, TEMP[6], SAMP[2], 2D 133: ADD TEMP[6].xz, TEMP[6].xzyy, IMM[2].xyxx 134: MOV TEMP[2].xz, TEMP[6].xxzx 135: MOV TEMP[6].w, IMM[0].zzzz 136: ADD TEMP[2].xyz, TEMP[8].xyzz, TEMP[2].xyzz 137: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[7].xxxx 138: MUL TEMP[3].xyz, TEMP[2].xyzz, TEMP[3].yyyy 139: MUL TEMP[6].xyz, TEMP[3].xyzz, TEMP[12].xxxx 140: MUL TEMP[3].xy, IN[3].xyyy, IMM[1].wwww 141: MOV TEMP[4].x, TEMP[3].xxyx 142: MOV TEMP[2].xy, TEMP[4].xyxx 143: MUL TEMP[4].xy, TEMP[5].xzzz, TEMP[7].xxxx 144: MUL TEMP[4].x, TEMP[4].xyyy, IMM[2].zzzz 145: ADD TEMP[3].x, TEMP[3].yyyy, TEMP[4].xxxx 146: MOV TEMP[2].z, TEMP[3].xxxx 147: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz 148: RSQ TEMP[3].x, TEMP[3].xxxx 149: MUL TEMP[3].xyz, TEMP[2].xyzz, TEMP[3].xxxx 150: MOV TEMP[1].xyz, TEMP[1].xyzx 151: MAD TEMP[0].x, TEMP[0].yyyy, IMM[2].wwww, IMM[3].xxxx 152: MOV TEMP[1].w, TEMP[0].xxxx 153: ADD TEMP[0], TEMP[10], TEMP[11] 154: SSG TEMP[2], TEMP[6] 155: ABS TEMP[4], TEMP[6] 156: MAD TEMP[4], TEMP[4], IMM[3].yyyy, IMM[3].zzzz 157: MAX TEMP[4], TEMP[4], IMM[0].zzzz 158: MUL TEMP[2], TEMP[2], TEMP[4] 159: DP2 TEMP[4].x, IN[3].xyyy, IN[3].xyyy 160: SQRT TEMP[4].x, TEMP[4].xxxx 161: ADD TEMP[4].x, TEMP[4].xxxx, IMM[0].xxxx 162: ADD TEMP[5].x, CONST[0].xxxx, IMM[3].wwww 163: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx 164: MUL TEMP[4].x, TEMP[4].xxxx, IMM[4].xxxx 165: SIN TEMP[4].x, TEMP[4].xxxx 166: MUL TEMP[4].x, TEMP[4].xxxx, IMM[4].yyyy 167: MAD TEMP[2], TEMP[2], TEMP[4].xxxx, TEMP[6] 168: ADD TEMP[0], TEMP[0], TEMP[2] 169: MUL TEMP[2], CONST[9], TEMP[0].xxxx 170: MAD TEMP[2], CONST[10], TEMP[0].yyyy, TEMP[2] 171: MAD TEMP[2], CONST[11], TEMP[0].zzzz, TEMP[2] 172: MAD TEMP[0], CONST[12], TEMP[0].wwww, TEMP[2] 173: MUL TEMP[2], CONST[28], TEMP[0].xxxx 174: MAD TEMP[2], CONST[29], TEMP[0].yyyy, TEMP[2] 175: MAD TEMP[2], CONST[30], TEMP[0].zzzz, TEMP[2] 176: MAD TEMP[0], CONST[31], TEMP[0].wwww, TEMP[2] 177: MUL TEMP[2].xyw, TEMP[0], IMM[0].yyyy 178: MOV TEMP[4].x, TEMP[2].xxxx 179: MUL TEMP[5].x, TEMP[2].yyyy, CONST[1].xxxx 180: MOV TEMP[4].y, TEMP[5].xxxx 181: ADD TEMP[2].xy, TEMP[4].xyyy, TEMP[2].wwww 182: MOV TEMP[2].zw, TEMP[0].wwzw 183: MOV TEMP[4].x, CONST[9].xxxx 184: MOV TEMP[4].y, CONST[10].xxxx 185: MOV TEMP[4].z, CONST[11].xxxx 186: MOV TEMP[5].x, CONST[9].yyyy 187: MOV TEMP[5].y, CONST[10].yyyy 188: MOV TEMP[5].z, CONST[11].yyyy 189: MOV TEMP[6].x, CONST[9].zzzz 190: MOV TEMP[6].y, CONST[10].zzzz 191: MOV TEMP[6].z, CONST[11].zzzz 192: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[3].xxxx 193: MAD TEMP[4].xyz, TEMP[5].xyzz, TEMP[3].yyyy, TEMP[4].xyzz 194: MAD TEMP[3].xyz, TEMP[6].xyzz, TEMP[3].zzzz, TEMP[4].xyzz 195: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz 196: RSQ TEMP[4].x, TEMP[4].xxxx 197: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx 198: MOV TEMP[4].w, IMM[0].xxxx 199: MOV TEMP[4].xyz, TEMP[3].xyzx 200: DP4 TEMP[5].x, CONST[2], TEMP[4] 201: DP4 TEMP[6].x, CONST[3], TEMP[4] 202: MOV TEMP[5].y, TEMP[6].xxxx 203: DP4 TEMP[4].x, CONST[4], TEMP[4] 204: MOV TEMP[5].z, TEMP[4].xxxx 205: MUL TEMP[4], TEMP[3].xyzz, TEMP[3].yzzx 206: DP4 TEMP[6].x, CONST[5], TEMP[4] 207: DP4 TEMP[7].x, CONST[6], TEMP[4] 208: MOV TEMP[6].y, TEMP[7].xxxx 209: DP4 TEMP[4].x, CONST[7], TEMP[4] 210: MOV TEMP[6].z, TEMP[4].xxxx 211: MOV TEMP[4].yzw, IN[2].yxyz 212: MUL TEMP[7].x, TEMP[3].yyyy, TEMP[3].yyyy 213: MAD TEMP[3].x, TEMP[3].xxxx, TEMP[3].xxxx, -TEMP[7].xxxx 214: MAD TEMP[3].xyz, CONST[8].xyzz, TEMP[3].xxxx, TEMP[6].xyzz 215: ADD TEMP[3].xyz, TEMP[3].xyzz, TEMP[5].xyzz 216: MOV TEMP[4].x, TEMP[0].zzzz 217: MOV OUT[1], TEMP[1] 218: MOV OUT[2], TEMP[2] 219: MOV OUT[3], TEMP[4] 220: MOV OUT[4], TEMP[3] 221: MOV OUT[0], TEMP[0] 222: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 156) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 172) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 204) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320) %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 324) %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 336) %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 340) %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 344) %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 348) %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 352) %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 356) %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 360) %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 364) %72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 368) %73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 372) %74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 376) %75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 380) %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 384) %77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 388) %78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 392) %79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 396) %80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 400) %81 = call float @llvm.SI.load.const(<16 x i8> %12, i32 404) %82 = call float @llvm.SI.load.const(<16 x i8> %12, i32 408) %83 = call float @llvm.SI.load.const(<16 x i8> %12, i32 412) %84 = call float @llvm.SI.load.const(<16 x i8> %12, i32 416) %85 = call float @llvm.SI.load.const(<16 x i8> %12, i32 420) %86 = call float @llvm.SI.load.const(<16 x i8> %12, i32 424) %87 = call float @llvm.SI.load.const(<16 x i8> %12, i32 428) %88 = call float @llvm.SI.load.const(<16 x i8> %12, i32 432) %89 = call float @llvm.SI.load.const(<16 x i8> %12, i32 436) %90 = call float @llvm.SI.load.const(<16 x i8> %12, i32 440) %91 = call float @llvm.SI.load.const(<16 x i8> %12, i32 444) %92 = call float @llvm.SI.load.const(<16 x i8> %12, i32 448) %93 = call float @llvm.SI.load.const(<16 x i8> %12, i32 452) %94 = call float @llvm.SI.load.const(<16 x i8> %12, i32 456) %95 = call float @llvm.SI.load.const(<16 x i8> %12, i32 460) %96 = call float @llvm.SI.load.const(<16 x i8> %12, i32 464) %97 = call float @llvm.SI.load.const(<16 x i8> %12, i32 468) %98 = call float @llvm.SI.load.const(<16 x i8> %12, i32 472) %99 = call float @llvm.SI.load.const(<16 x i8> %12, i32 476) %100 = call float @llvm.SI.load.const(<16 x i8> %12, i32 480) %101 = call float @llvm.SI.load.const(<16 x i8> %12, i32 484) %102 = call float @llvm.SI.load.const(<16 x i8> %12, i32 488) %103 = call float @llvm.SI.load.const(<16 x i8> %12, i32 492) %104 = call float @llvm.SI.load.const(<16 x i8> %12, i32 496) %105 = call float @llvm.SI.load.const(<16 x i8> %12, i32 500) %106 = call float @llvm.SI.load.const(<16 x i8> %12, i32 504) %107 = call float @llvm.SI.load.const(<16 x i8> %12, i32 508) %108 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %109 = load <8 x i32>, <8 x i32> addrspace(2)* %108, align 32, !tbaa !0 %110 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %111 = load <4 x i32>, <4 x i32> addrspace(2)* %110, align 16, !tbaa !0 %112 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %113 = bitcast <8 x i32> addrspace(2)* %112 to <32 x i8> addrspace(2)* %114 = load <32 x i8>, <32 x i8> addrspace(2)* %113, align 32, !tbaa !0 %115 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %116 = bitcast <4 x i32> addrspace(2)* %115 to <16 x i8> addrspace(2)* %117 = load <16 x i8>, <16 x i8> addrspace(2)* %116, align 16, !tbaa !0 %118 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %119 = bitcast <8 x i32> addrspace(2)* %118 to <32 x i8> addrspace(2)* %120 = load <32 x i8>, <32 x i8> addrspace(2)* %119, align 32, !tbaa !0 %121 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %122 = bitcast <4 x i32> addrspace(2)* %121 to <16 x i8> addrspace(2)* %123 = load <16 x i8>, <16 x i8> addrspace(2)* %122, align 16, !tbaa !0 %124 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %125 = load <16 x i8>, <16 x i8> addrspace(2)* %124, align 16, !tbaa !0 %126 = add i32 %5, %7 %127 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %125, i32 0, i32 %126) %128 = extractelement <4 x float> %127, i32 0 %129 = extractelement <4 x float> %127, i32 1 %130 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %131 = load <16 x i8>, <16 x i8> addrspace(2)* %130, align 16, !tbaa !0 %132 = add i32 %5, %7 %133 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %131, i32 0, i32 %132) %134 = extractelement <4 x float> %133, i32 0 %135 = extractelement <4 x float> %133, i32 1 %136 = extractelement <4 x float> %133, i32 2 %137 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %138 = load <16 x i8>, <16 x i8> addrspace(2)* %137, align 16, !tbaa !0 %139 = add i32 %5, %7 %140 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %138, i32 0, i32 %139) %141 = extractelement <4 x float> %140, i32 0 %142 = extractelement <4 x float> %140, i32 1 %143 = extractelement <4 x float> %140, i32 2 %144 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %145 = load <16 x i8>, <16 x i8> addrspace(2)* %144, align 16, !tbaa !0 %146 = add i32 %5, %7 %147 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %145, i32 0, i32 %146) %148 = extractelement <4 x float> %147, i32 0 %149 = extractelement <4 x float> %147, i32 1 %150 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4 %151 = load <16 x i8>, <16 x i8> addrspace(2)* %150, align 16, !tbaa !0 %152 = add i32 %5, %7 %153 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %151, i32 0, i32 %152) %154 = extractelement <4 x float> %153, i32 0 %155 = fmul float %154, %62 %156 = call float @llvm.floor.f32(float %155) %157 = fadd float %128, 1.000000e+00 %158 = fadd float %129, 1.000000e+00 %159 = fmul float %157, 5.000000e-01 %160 = fmul float %158, 5.000000e-01 %161 = fdiv float 1.000000e+00, %62 %162 = fadd float %156, 0x3FDFEF9DC0000000 %163 = fmul float %162, %161 %164 = fdiv float 1.000000e+00, %63 %165 = fmul float %164, 0x3FDFEF9DC0000000 %166 = bitcast float %163 to i32 %167 = bitcast float %165 to i32 %168 = insertelement <4 x i32> undef, i32 %166, i32 0 %169 = insertelement <4 x i32> %168, i32 %167, i32 1 %170 = insertelement <4 x i32> %169, i32 0, i32 2 %171 = bitcast <8 x i32> %109 to <32 x i8> %172 = bitcast <4 x i32> %111 to <16 x i8> %173 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %170, <32 x i8> %171, <16 x i8> %172, i32 2) %174 = extractelement <4 x float> %173, i32 0 %175 = extractelement <4 x float> %173, i32 1 %176 = extractelement <4 x float> %173, i32 2 %177 = extractelement <4 x float> %173, i32 3 %178 = fadd float %156, 0x3FDFEF9DC0000000 %179 = fmul float %178, %161 %180 = fmul float %164, 0x3FF7FBE760000000 %181 = bitcast float %179 to i32 %182 = bitcast float %180 to i32 %183 = insertelement <4 x i32> undef, i32 %181, i32 0 %184 = insertelement <4 x i32> %183, i32 %182, i32 1 %185 = insertelement <4 x i32> %184, i32 0, i32 2 %186 = bitcast <8 x i32> %109 to <32 x i8> %187 = bitcast <4 x i32> %111 to <16 x i8> %188 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %185, <32 x i8> %186, <16 x i8> %187, i32 2) %189 = extractelement <4 x float> %188, i32 0 %190 = extractelement <4 x float> %188, i32 1 %191 = fmul float %174, 2.000000e+00 %192 = fmul float %175, 2.000000e+00 %193 = fmul float %176, 2.000000e+00 %194 = fmul float %177, 2.000000e+00 %195 = bitcast float %134 to i32 %196 = bitcast float %136 to i32 %197 = insertelement <4 x i32> undef, i32 %195, i32 0 %198 = insertelement <4 x i32> %197, i32 %196, i32 1 %199 = insertelement <4 x i32> %198, i32 0, i32 2 %200 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %199, <32 x i8> %114, <16 x i8> %117, i32 2) %201 = extractelement <4 x float> %200, i32 0 %202 = extractelement <4 x float> %200, i32 1 %203 = call float @llvm.AMDGPU.lrp(float %201, float %193, float %191) %204 = call float @llvm.AMDGPU.lrp(float %202, float %194, float %192) %205 = fmul float %148, %148 %206 = fmul float %149, %149 %207 = fadd float %206, %205 %208 = call float @llvm.sqrt.f32(float %207) %209 = fsub float 1.000000e+00, %208 %210 = fsub float -0.000000e+00, %59 %211 = fsub float -0.000000e+00, %60 %212 = fsub float -0.000000e+00, %61 %213 = fmul float %60, -0.000000e+00 %214 = fmul float %61, -0.000000e+00 %215 = fsub float -0.000000e+00, %213 %216 = fsub float %215, %61 %217 = fmul float %59, -0.000000e+00 %218 = fsub float %217, %214 %219 = fmul float %60, -0.000000e+00 %220 = fadd float %219, %59 %221 = fmul float %216, %216 %222 = fmul float %218, %218 %223 = fadd float %222, %221 %224 = fmul float %220, %220 %225 = fadd float %223, %224 %226 = call float @llvm.AMDGPU.rsq.clamped.f32(float %225) %227 = fmul float %216, %226 %228 = fmul float %218, %226 %229 = fmul float %220, %226 %230 = fmul float %228, %212 %231 = fmul float %229, %210 %232 = fmul float %227, %211 %233 = fmul float %229, %211 %234 = fsub float %233, %230 %235 = fmul float %227, %212 %236 = fsub float %235, %231 %237 = fmul float %228, %210 %238 = fsub float %237, %232 %239 = fmul float %234, %234 %240 = fmul float %236, %236 %241 = fadd float %240, %239 %242 = fmul float %238, %238 %243 = fadd float %241, %242 %244 = call float @llvm.AMDGPU.rsq.clamped.f32(float %243) %245 = fmul float %234, %244 %246 = fmul float %238, %244 %247 = fmul float %148, %210 %248 = fmul float %209, %211 %249 = fadd float %248, %247 %250 = fmul float %149, %212 %251 = fadd float %249, %250 %252 = call float @llvm.fabs.f32(float %251) %253 = fmul float %252, 5.000000e-01 %254 = fadd float %253, 5.000000e-01 %255 = call float @llvm.AMDGPU.lrp(float %254, float %245, float %148) %256 = call float @llvm.AMDGPU.lrp(float %254, float 1.000000e+00, float %209) %257 = call float @llvm.AMDGPU.lrp(float %254, float %246, float %149) %258 = fmul float %255, %255 %259 = fmul float %256, %256 %260 = fadd float %259, %258 %261 = fmul float %257, %257 %262 = fadd float %260, %261 %263 = call float @llvm.AMDGPU.rsq.clamped.f32(float %262) %264 = fmul float %255, %263 %265 = fmul float %256, %263 %266 = fmul float %257, %263 %267 = fmul float %72, %64 %268 = fmul float %72, %64 %269 = fmul float %73, %65 %270 = fmul float %73, %65 %271 = fmul float %267, %80 %272 = fmul float %268, %81 %273 = fmul float %269, %82 %274 = fmul float %270, %83 %275 = fmul float %74, %66 %276 = fmul float %74, %66 %277 = fmul float %75, %67 %278 = fmul float %75, %67 %279 = fmul float %275, %84 %280 = fmul float %276, %85 %281 = fmul float %277, %86 %282 = fmul float %278, %87 %283 = fmul float %80, %134 %284 = fmul float %81, %136 %285 = fadd float %283, %284 %286 = fmul float %82, %134 %287 = fmul float %83, %136 %288 = fadd float %286, %287 %289 = fmul float %84, %134 %290 = fmul float %85, %136 %291 = fadd float %289, %290 %292 = fmul float %86, %134 %293 = fmul float %87, %136 %294 = fadd float %292, %293 %295 = fmul float %68, %285 %296 = fmul float %69, %288 %297 = fmul float %70, %291 %298 = fmul float %71, %294 %299 = fmul float %14, %76 %300 = fmul float %14, %77 %301 = fmul float %14, %78 %302 = fmul float %14, %79 %303 = fadd float %295, %299 %304 = fadd float %296, %300 %305 = fadd float %297, %301 %306 = fadd float %298, %302 %307 = call float @llvm.cos.f32(float %303) %308 = call float @llvm.cos.f32(float %304) %309 = call float @llvm.cos.f32(float %305) %310 = call float @llvm.cos.f32(float %306) %311 = fmul float %307, %271 %312 = fmul float %308, %273 %313 = fadd float %311, %312 %314 = fmul float %309, %279 %315 = fadd float %313, %314 %316 = fmul float %310, %281 %317 = fadd float %315, %316 %318 = fmul float %307, %272 %319 = fmul float %308, %274 %320 = fadd float %318, %319 %321 = fmul float %309, %280 %322 = fadd float %320, %321 %323 = fmul float %310, %282 %324 = fadd float %322, %323 %325 = fadd float %134, %317 %326 = fadd float %136, %324 %327 = fmul float %68, %64 %328 = fmul float %68, %64 %329 = fmul float %69, %65 %330 = fmul float %69, %65 %331 = fmul float %327, %80 %332 = fmul float %328, %81 %333 = fmul float %329, %82 %334 = fmul float %330, %83 %335 = fmul float %70, %66 %336 = fmul float %70, %66 %337 = fmul float %71, %67 %338 = fmul float %71, %67 %339 = fmul float %335, %84 %340 = fmul float %336, %85 %341 = fmul float %337, %86 %342 = fmul float %338, %87 %343 = fmul float %80, %325 %344 = fmul float %81, %326 %345 = fadd float %343, %344 %346 = fmul float %82, %325 %347 = fmul float %83, %326 %348 = fadd float %346, %347 %349 = fmul float %84, %325 %350 = fmul float %85, %326 %351 = fadd float %349, %350 %352 = fmul float %86, %325 %353 = fmul float %87, %326 %354 = fadd float %352, %353 %355 = fmul float %68, %345 %356 = fadd float %355, %299 %357 = fmul float %69, %348 %358 = fadd float %357, %300 %359 = fmul float %70, %351 %360 = fadd float %359, %301 %361 = fmul float %71, %354 %362 = fadd float %361, %302 %363 = call float @llvm.cos.f32(float %356) %364 = call float @llvm.cos.f32(float %358) %365 = call float @llvm.cos.f32(float %360) %366 = call float @llvm.cos.f32(float %362) %367 = fmul float %363, %331 %368 = fmul float %364, %333 %369 = fadd float %367, %368 %370 = fmul float %365, %339 %371 = fadd float %369, %370 %372 = fmul float %366, %341 %373 = fadd float %371, %372 %374 = fmul float %363, %332 %375 = fmul float %364, %334 %376 = fadd float %374, %375 %377 = fmul float %365, %340 %378 = fadd float %376, %377 %379 = fmul float %366, %342 %380 = fadd float %378, %379 %381 = fmul float %373, %373 %382 = fadd float %381, 4.000000e+00 %383 = fmul float %380, %380 %384 = fadd float %382, %383 %385 = call float @llvm.AMDGPU.rsq.clamped.f32(float %384) %386 = fmul float %373, %385 %387 = fmul float %380, %385 %388 = fmul float %189, 5.000000e-01 %389 = fmul float %386, -2.000000e+00 %390 = fmul float %387, -2.000000e+00 %391 = fsub float %134, %88 %392 = fsub float %136, %89 %393 = fmul float %90, 5.000000e-01 %394 = fadd float %393, %391 %395 = fmul float %91, 5.000000e-01 %396 = fadd float %395, %392 %397 = fdiv float 1.000000e+00, %90 %398 = fdiv float 1.000000e+00, %91 %399 = fmul float %394, %397 %400 = fmul float %396, %398 %401 = fadd float %129, 0x3FE99999A0000000 %402 = fmul float %401, %204 %403 = fmul float %128, %203 %404 = fmul float %403, %227 %405 = fmul float %403, %228 %406 = fmul float %403, %229 %407 = fmul float %402, %264 %408 = fadd float %407, %404 %409 = fmul float %402, %265 %410 = fadd float %409, %405 %411 = fmul float %402, %266 %412 = fadd float %411, %406 %413 = bitcast float %399 to i32 %414 = bitcast float %400 to i32 %415 = insertelement <4 x i32> undef, i32 %413, i32 0 %416 = insertelement <4 x i32> %415, i32 %414, i32 1 %417 = insertelement <4 x i32> %416, i32 0, i32 2 %418 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %417, <32 x i8> %120, <16 x i8> %123, i32 2) %419 = extractelement <4 x float> %418, i32 0 %420 = extractelement <4 x float> %418, i32 1 %421 = fadd float %419, -5.000000e-01 %422 = fadd float %420, -5.000000e-01 %423 = fadd float %389, %421 %424 = fadd float %390, %422 %425 = fmul float %423, %388 %426 = fmul float %388, 0.000000e+00 %427 = fmul float %424, %388 %428 = fmul float %425, %204 %429 = fmul float %426, %204 %430 = fmul float %427, %204 %431 = fmul float %428, %401 %432 = fmul float %429, %401 %433 = fmul float %430, %401 %434 = fmul float %148, 1.500000e+00 %435 = fmul float %149, 1.500000e+00 %436 = fmul float %389, %388 %437 = fmul float %436, 0x3FC99999A0000000 %438 = fadd float %435, %437 %439 = fmul float %434, %434 %440 = fmul float %209, %209 %441 = fadd float %440, %439 %442 = fmul float %438, %438 %443 = fadd float %441, %442 %444 = call float @llvm.AMDGPU.rsq.clamped.f32(float %443) %445 = fmul float %434, %444 %446 = fmul float %209, %444 %447 = fmul float %438, %444 %448 = fmul float %190, 8.000000e+00 %449 = fadd float %448, -4.000000e+00 %450 = fadd float %134, %408 %451 = fadd float %135, %410 %452 = fadd float %136, %412 %453 = fcmp ogt float %431, 0.000000e+00 %454 = select i1 %453, float 1.000000e+00, float %431 %455 = fcmp oge float %454, 0.000000e+00 %456 = select i1 %455, float %454, float -1.000000e+00 %457 = fcmp ogt float %432, 0.000000e+00 %458 = select i1 %457, float 1.000000e+00, float %432 %459 = fcmp oge float %458, 0.000000e+00 %460 = select i1 %459, float %458, float -1.000000e+00 %461 = fcmp ogt float %433, 0.000000e+00 %462 = select i1 %461, float 1.000000e+00, float %433 %463 = fcmp oge float %462, 0.000000e+00 %464 = select i1 %463, float %462, float -1.000000e+00 %465 = call float @llvm.fabs.f32(float %431) %466 = call float @llvm.fabs.f32(float %432) %467 = call float @llvm.fabs.f32(float %433) %468 = fmul float %465, 0x3FB1EB8520000000 %469 = fadd float %468, 0xBF8EB851E0000000 %470 = fmul float %466, 0x3FB1EB8520000000 %471 = fadd float %470, 0xBF8EB851E0000000 %472 = fmul float %467, 0x3FB1EB8520000000 %473 = fadd float %472, 0xBF8EB851E0000000 %474 = call float @llvm.maxnum.f32(float %469, float 0.000000e+00) %475 = call float @llvm.maxnum.f32(float %471, float 0.000000e+00) %476 = call float @llvm.maxnum.f32(float %473, float 0.000000e+00) %477 = fmul float %456, %474 %478 = fmul float %460, %475 %479 = fmul float %464, %476 %480 = fmul float %148, %148 %481 = fmul float %149, %149 %482 = fadd float %480, %481 %483 = call float @llvm.sqrt.f32(float %482) %484 = fadd float %483, 1.000000e+00 %485 = fadd float %13, 1.000000e+02 %486 = fmul float %484, %485 %487 = fmul float %486, 1.800000e+02 %488 = call float @llvm.sin.f32(float %487) %489 = fmul float %488, 0x3FE6666660000000 %490 = fmul float %477, %489 %491 = fadd float %490, %431 %492 = fmul float %478, %489 %493 = fadd float %492, %432 %494 = fmul float %479, %489 %495 = fadd float %494, %433 %496 = fmul float %489, 0.000000e+00 %497 = fadd float %496, 0.000000e+00 %498 = fadd float %450, %491 %499 = fadd float %451, %493 %500 = fadd float %452, %495 %501 = fadd float %497, 1.000000e+00 %502 = fmul float %43, %498 %503 = fmul float %44, %498 %504 = fmul float %45, %498 %505 = fmul float %46, %498 %506 = fmul float %47, %499 %507 = fadd float %506, %502 %508 = fmul float %48, %499 %509 = fadd float %508, %503 %510 = fmul float %49, %499 %511 = fadd float %510, %504 %512 = fmul float %50, %499 %513 = fadd float %512, %505 %514 = fmul float %51, %500 %515 = fadd float %514, %507 %516 = fmul float %52, %500 %517 = fadd float %516, %509 %518 = fmul float %53, %500 %519 = fadd float %518, %511 %520 = fmul float %54, %500 %521 = fadd float %520, %513 %522 = fmul float %55, %501 %523 = fadd float %522, %515 %524 = fmul float %56, %501 %525 = fadd float %524, %517 %526 = fmul float %57, %501 %527 = fadd float %526, %519 %528 = fmul float %58, %501 %529 = fadd float %528, %521 %530 = fmul float %92, %523 %531 = fmul float %93, %523 %532 = fmul float %94, %523 %533 = fmul float %95, %523 %534 = fmul float %96, %525 %535 = fadd float %534, %530 %536 = fmul float %97, %525 %537 = fadd float %536, %531 %538 = fmul float %98, %525 %539 = fadd float %538, %532 %540 = fmul float %99, %525 %541 = fadd float %540, %533 %542 = fmul float %100, %527 %543 = fadd float %542, %535 %544 = fmul float %101, %527 %545 = fadd float %544, %537 %546 = fmul float %102, %527 %547 = fadd float %546, %539 %548 = fmul float %103, %527 %549 = fadd float %548, %541 %550 = fmul float %104, %529 %551 = fadd float %550, %543 %552 = fmul float %105, %529 %553 = fadd float %552, %545 %554 = fmul float %106, %529 %555 = fadd float %554, %547 %556 = fmul float %107, %529 %557 = fadd float %556, %549 %558 = fmul float %551, 5.000000e-01 %559 = fmul float %553, 5.000000e-01 %560 = fmul float %557, 5.000000e-01 %561 = fmul float %559, %15 %562 = fadd float %558, %560 %563 = fadd float %561, %560 %564 = fmul float %43, %445 %565 = fmul float %47, %445 %566 = fmul float %51, %445 %567 = fmul float %44, %446 %568 = fadd float %567, %564 %569 = fmul float %48, %446 %570 = fadd float %569, %565 %571 = fmul float %52, %446 %572 = fadd float %571, %566 %573 = fmul float %45, %447 %574 = fadd float %573, %568 %575 = fmul float %49, %447 %576 = fadd float %575, %570 %577 = fmul float %53, %447 %578 = fadd float %577, %572 %579 = fmul float %574, %574 %580 = fmul float %576, %576 %581 = fadd float %580, %579 %582 = fmul float %578, %578 %583 = fadd float %581, %582 %584 = call float @llvm.AMDGPU.rsq.clamped.f32(float %583) %585 = fmul float %574, %584 %586 = fmul float %576, %584 %587 = fmul float %578, %584 %588 = fmul float %16, %585 %589 = fmul float %17, %586 %590 = fadd float %588, %589 %591 = fmul float %18, %587 %592 = fadd float %590, %591 %593 = fadd float %592, %19 %594 = fmul float %20, %585 %595 = fmul float %21, %586 %596 = fadd float %594, %595 %597 = fmul float %22, %587 %598 = fadd float %596, %597 %599 = fadd float %598, %23 %600 = fmul float %24, %585 %601 = fmul float %25, %586 %602 = fadd float %600, %601 %603 = fmul float %26, %587 %604 = fadd float %602, %603 %605 = fadd float %604, %27 %606 = fmul float %585, %586 %607 = fmul float %586, %587 %608 = fmul float %587, %587 %609 = fmul float %587, %585 %610 = fmul float %28, %606 %611 = fmul float %29, %607 %612 = fadd float %610, %611 %613 = fmul float %30, %608 %614 = fadd float %612, %613 %615 = fmul float %31, %609 %616 = fadd float %614, %615 %617 = fmul float %32, %606 %618 = fmul float %33, %607 %619 = fadd float %617, %618 %620 = fmul float %34, %608 %621 = fadd float %619, %620 %622 = fmul float %35, %609 %623 = fadd float %621, %622 %624 = fmul float %36, %606 %625 = fmul float %37, %607 %626 = fadd float %624, %625 %627 = fmul float %38, %608 %628 = fadd float %626, %627 %629 = fmul float %39, %609 %630 = fadd float %628, %629 %631 = fmul float %586, %586 %632 = fmul float %585, %585 %633 = fsub float %632, %631 %634 = fmul float %40, %633 %635 = fadd float %634, %616 %636 = fmul float %41, %633 %637 = fadd float %636, %623 %638 = fmul float %42, %633 %639 = fadd float %638, %630 %640 = fadd float %635, %593 %641 = fadd float %637, %599 %642 = fadd float %639, %605 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %159, float %160, float %156, float %449) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %562, float %563, float %555, float %557) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %555, float %141, float %142, float %143) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %640, float %641, float %642, float %177) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %551, float %553, float %555, float %557) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.floor.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.cos.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sin.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v22, 0x3eff7cee ; 7E2C02FF 3EFF7CEE v_mov_b32_e32 v10, 0x3e22f983 ; 7E1402FF 3E22F983 v_mov_b32_e32 v23, 0x80000000 ; 7E2E02FF 80000000 v_mov_b32_e32 v11, 0x3fc00000 ; 7E1602FF 3FC00000 v_mov_b32_e32 v20, 0x42c80000 ; 7E2802FF 42C80000 v_mov_b32_e32 v8, 0xbc75c28f ; 7E1002FF BC75C28F v_mov_b32_e32 v9, 0x3d8f5c29 ; 7E1202FF 3D8F5C29 v_mov_b32_e32 v36, 0x41000000 ; 7E4802FF 41000000 v_add_i32_e32 v21, s10, v0 ; 4A2A000A s_load_dwordx4 s[12:15], s[2:3], 0x0 ; C0860300 s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[16:19], s[8:9], 0x4 ; C0880904 s_load_dwordx4 s[20:23], s[8:9], 0x8 ; C08A0908 s_load_dwordx4 s[24:27], s[8:9], 0xc ; C08C090C s_load_dwordx4 s[8:11], s[8:9], 0x10 ; C0840910 s_load_dwordx4 s[72:75], s[4:5], 0x0 ; C0A40500 s_load_dwordx4 s[68:71], s[4:5], 0x4 ; C0A20504 s_load_dwordx4 s[56:59], s[4:5], 0x8 ; C09C0508 s_load_dwordx8 s[76:83], s[6:7], 0x0 ; C0E60700 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s47, s[12:15], 0x67 ; C2178D67 buffer_load_format_xyzw v[16:19], v21, s[0:3], 0 idxen ; E00C2000 80001015 buffer_load_format_xyzw v[4:7], v21, s[16:19], 0 idxen ; E00C2000 80040415 buffer_load_format_xyzw v[0:3], v21, s[20:23], 0 idxen ; E00C2000 80050015 s_buffer_load_dword s46, s[12:15], 0x68 ; C2170D68 s_buffer_load_dword s48, s[12:15], 0x69 ; C2180D69 s_buffer_load_dword s20, s[12:15], 0x6a ; C20A0D6A s_buffer_load_dword s49, s[12:15], 0x6b ; C2188D6B s_buffer_load_dword s3, s[12:15], 0x0 ; C2018D00 s_buffer_load_dword s4, s[12:15], 0x1 ; C2020D01 s_buffer_load_dword s2, s[12:15], 0x4 ; C2010D04 s_buffer_load_dword s0, s[12:15], 0x8 ; C2000D08 s_buffer_load_dword s1, s[12:15], 0x9 ; C2008D09 buffer_load_format_xyzw v[12:15], v21, s[24:27], 0 idxen ; E00C2000 80060C15 s_buffer_load_dword s5, s[12:15], 0x5d ; C2028D5D s_buffer_load_dword s16, s[12:15], 0x5e ; C2080D5E s_buffer_load_dword s17, s[12:15], 0x5f ; C2088D5F s_buffer_load_dword s18, s[12:15], 0x60 ; C2090D60 s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 v_add_f32_e32 v3, s3, v20 ; 06062803 s_buffer_load_dword s3, s[12:15], 0x61 ; C2018D61 buffer_load_format_xyzw v[37:40], v21, s[8:11], 0 idxen ; E00C2000 80022515 s_buffer_load_dword s8, s[12:15], 0x62 ; C2040D62 s_buffer_load_dword s9, s[12:15], 0x63 ; C2048D63 s_buffer_load_dword s51, s[12:15], 0x64 ; C2198D64 s_buffer_load_dword s55, s[12:15], 0x65 ; C21B8D65 s_buffer_load_dword s53, s[12:15], 0x66 ; C21A8D66 s_buffer_load_dword s50, s[12:15], 0x58 ; C2190D58 s_waitcnt vmcnt(1) ; BF8C0771 v_mov_b32_e32 v7, s18 ; 7E0E0212 v_mul_f32_e32 v20, s4, v7 ; 10280E04 s_buffer_load_dword s52, s[12:15], 0x59 ; C21A0D59 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v7, s3 ; 7E0E0203 v_mul_f32_e32 v21, s4, v7 ; 102A0E04 s_buffer_load_dword s54, s[12:15], 0x5a ; C21B0D5A s_waitcnt vmcnt(0) ; BF8C0770 v_mov_b32_e32 v7, s8 ; 7E0E0208 v_mul_f32_e32 v19, s4, v7 ; 10260E04 v_mov_b32_e32 v7, s9 ; 7E0E0209 v_mul_f32_e32 v14, s4, v7 ; 101C0E04 s_buffer_load_dword s30, s[12:15], 0x5b ; C20F0D5B s_buffer_load_dword s4, s[12:15], 0x5c ; C2020D5C s_buffer_load_dword s8, s[12:15], 0x51 ; C2040D51 s_buffer_load_dword s9, s[12:15], 0x54 ; C2048D54 s_buffer_load_dword s10, s[12:15], 0x55 ; C2050D55 s_buffer_load_dword s11, s[12:15], 0x56 ; C2058D56 s_buffer_load_dword s18, s[12:15], 0x57 ; C2090D57 s_buffer_load_dword s3, s[12:15], 0x33 ; C2018D33 s_buffer_load_dword s29, s[12:15], 0x36 ; C20E8D36 s_buffer_load_dword s28, s[12:15], 0x3a ; C20E0D3A s_buffer_load_dword s27, s[12:15], 0x3e ; C20D8D3E s_buffer_load_dword s92, s[12:15], 0x50 ; C22E0D50 s_waitcnt lgkmcnt(0) ; BF8C007F v_rcp_f32_e32 v38, s8 ; 7E4C5408 v_mov_b32_e32 v7, s9 ; 7E0E0209 v_mov_b32_e32 v15, s9 ; 7E1E0209 v_mul_f32_e32 v15, s4, v15 ; 101E1E04 v_mul_f32_e32 v7, s50, v7 ; 100E0E32 v_mov_b32_e32 v18, s10 ; 7E24020A v_mov_b32_e32 v24, s10 ; 7E30020A v_mul_f32_e32 v24, s5, v24 ; 10303005 v_mul_f32_e32 v18, s52, v18 ; 10242434 v_mul_f32_e32 v39, s47, v24 ; 104E302F v_mul_f32_e32 v40, s53, v24 ; 10503035 v_mul_f32_e32 v41, s55, v15 ; 10521E37 v_mul_f32_e32 v42, s51, v15 ; 10541E33 v_mov_b32_e32 v15, s11 ; 7E1E020B v_mov_b32_e32 v24, s11 ; 7E30020B v_mul_f32_e32 v24, s16, v24 ; 10303010 v_mul_f32_e32 v25, s54, v15 ; 10321E36 v_mul_f32_e32 v43, s48, v24 ; 10563030 v_mul_f32_e32 v44, s46, v24 ; 1058302E v_mov_b32_e32 v15, s18 ; 7E1E0212 v_mov_b32_e32 v24, s18 ; 7E300212 v_mul_f32_e32 v24, s17, v24 ; 10303011 v_mul_f32_e32 v26, s30, v15 ; 10341E1E v_mul_f32_e32 v45, s49, v24 ; 105A3031 v_mul_f32_e32 v46, s20, v24 ; 105C3014 v_mov_b32_e32 v15, s27 ; 7E1E021B v_mad_f32 v15, -s28, v23, -v15 ; D282000F A43E2E1C v_mul_f32_e64 v24, 0, s27 ; D2100018 00003680 v_mac_f32_e32 v24, s29, v23 ; 3E302E1D v_mul_f32_e32 v27, v15, v15 ; 10361F0F v_mac_f32_e32 v27, v24, v24 ; 3E363118 v_mov_b32_e32 v28, s29 ; 7E38021D v_mad_f32 v23, s28, v23, v28 ; D2820017 04722E1C v_mac_f32_e32 v27, v23, v23 ; 3E362F17 v_rsq_clamp_f32_e32 v27, v27 ; 7E36591B v_mul_f32_e32 v35, s53, v18 ; 10462435 v_mul_f32_e32 v34, s47, v18 ; 1044242F v_mul_f32_e32 v33, s51, v7 ; 10420E33 v_mul_f32_e32 v32, s55, v7 ; 10400E37 v_mul_f32_e32 v7, v27, v23 ; 100E2F1B v_mul_f32_e32 v15, v27, v15 ; 101E1F1B v_mul_f32_e32 v18, v27, v24 ; 1024311B v_mul_f32_e32 v30, s46, v25 ; 103C322E v_mul_f32_e32 v31, s48, v25 ; 103E3230 v_rcp_f32_e32 v23, s92 ; 7E2E545C v_mul_f32_e32 v29, s20, v26 ; 103A3414 v_mul_f32_e32 v28, s49, v26 ; 10383431 v_mul_f32_e32 v24, s28, v7 ; 10300E1C v_mad_f32 v24, v18, s27, -v24 ; D2820018 84603712 v_mul_f32_e32 v25, s27, v15 ; 10321E1B v_mad_f32 v25, v7, s29, -v25 ; D2820019 84643B07 v_mul_f32_e32 v26, s29, v18 ; 1034241D v_mad_f32 v27, v15, s28, -v26 ; D282001B 8468390F v_mul_f32_e32 v26, v24, v24 ; 10343118 v_mac_f32_e32 v26, v25, v25 ; 3E343319 v_mac_f32_e32 v26, v27, v27 ; 3E34371B v_rsq_clamp_f32_e32 v25, v26 ; 7E32591A s_buffer_load_dword s93, s[12:15], 0x6c ; C22E8D6C s_buffer_load_dword s94, s[12:15], 0x6d ; C22F0D6D s_buffer_load_dword s95, s[12:15], 0x6e ; C22F8D6E v_mul_f32_e32 v26, v25, v24 ; 10343119 v_mul_f32_e32 v27, v25, v27 ; 10363719 s_buffer_load_dword s96, s[12:15], 0x6f ; C2300D6F s_buffer_load_dword s16, s[12:15], 0x70 ; C2080D70 s_buffer_load_dword s9, s[12:15], 0x24 ; C2048D24 s_buffer_load_dword s5, s[12:15], 0x25 ; C2028D25 s_buffer_load_dword s4, s[12:15], 0x26 ; C2020D26 s_buffer_load_dword s24, s[12:15], 0x27 ; C20C0D27 s_buffer_load_dword s17, s[12:15], 0x28 ; C2088D28 s_waitcnt lgkmcnt(0) ; BF8C007F v_rcp_f32_e32 v47, s95 ; 7E5E545F s_buffer_load_dword s10, s[12:15], 0x29 ; C2050D29 s_buffer_load_dword s8, s[12:15], 0x2a ; C2040D2A v_rcp_f32_e32 v48, s96 ; 7E605460 s_buffer_load_dword s26, s[12:15], 0x2b ; C20D0D2B s_buffer_load_dword s19, s[12:15], 0x2c ; C2098D2C s_buffer_load_dword s18, s[12:15], 0x2d ; C2090D2D s_buffer_load_dword s11, s[12:15], 0x2e ; C2058D2E s_buffer_load_dword s25, s[12:15], 0x2f ; C20C8D2F s_buffer_load_dword s21, s[12:15], 0x30 ; C20A8D30 s_buffer_load_dword s22, s[12:15], 0x31 ; C20B0D31 s_buffer_load_dword s23, s[12:15], 0x32 ; C20B8D32 s_buffer_load_dword s31, s[12:15], 0x71 ; C20F8D71 s_buffer_load_dword s32, s[12:15], 0x72 ; C2100D72 s_buffer_load_dword s33, s[12:15], 0x73 ; C2108D73 s_buffer_load_dword s34, s[12:15], 0x74 ; C2110D74 s_buffer_load_dword s35, s[12:15], 0x75 ; C2118D75 s_buffer_load_dword s36, s[12:15], 0x76 ; C2120D76 s_buffer_load_dword s37, s[12:15], 0x77 ; C2128D77 s_buffer_load_dword s38, s[12:15], 0x78 ; C2130D78 s_buffer_load_dword s39, s[12:15], 0x79 ; C2138D79 s_buffer_load_dword s40, s[12:15], 0x7a ; C2140D7A s_buffer_load_dword s41, s[12:15], 0x7b ; C2148D7B s_buffer_load_dword s42, s[12:15], 0x7c ; C2150D7C s_buffer_load_dword s43, s[12:15], 0x7d ; C2158D7D s_buffer_load_dword s44, s[12:15], 0x7e ; C2160D7E s_buffer_load_dword s45, s[12:15], 0x7f ; C2168D7F s_load_dwordx8 s[84:91], s[6:7], 0x8 ; C0EA0708 s_load_dwordx8 s[60:67], s[6:7], 0x10 ; C0DE0710 v_add_f32_e32 v49, 1.0, v16 ; 066220F2 v_subrev_f32_e32 v50, s93, v4 ; 0A64085D v_mac_f32_e64 v50, 0.5, s95 ; D23E0032 0000BEF0 v_subrev_f32_e32 v51, s94, v6 ; 0A660C5E v_mac_f32_e64 v51, 0.5, s96 ; D23E0033 0000C0F0 v_mul_f32_e32 v52, s55, v6 ; 10680C37 v_mac_f32_e32 v52, s51, v4 ; 3E680833 v_mul_f32_e32 v24, s92, v37 ; 10304A5C v_floor_f32_e32 v37, v24 ; 7E4A4918 v_add_f32_e32 v24, v22, v37 ; 06304B16 v_mul_f32_e32 v53, v23, v24 ; 106A3117 v_mul_f32_e32 v54, v22, v38 ; 106C4D16 v_mov_b32_e32 v55, 0 ; 7E6E0280 image_sample_l v[22:25], 15, 0, 0, 0, 0, 0, 0, 0, v[53:56], s[76:83], s[72:75] ; F0900F00 02531635 v_mul_f32_e32 v54, 0x3fbfdf3b, v38 ; 106C4CFF 3FBFDF3B image_sample_l v[56:57], 3, 0, 0, 0, 0, 0, 0, 0, v[53:56], s[76:83], s[72:75] ; F0900300 02533835 v_mov_b32_e32 v53, v4 ; 7E6A0304 v_mov_b32_e32 v54, v6 ; 7E6C0306 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 image_sample_l v[58:59], 3, 0, 0, 0, 0, 0, 0, 0, v[53:56], s[84:91], s[68:71] ; F0900300 02353A35 v_mul_f32_e32 v53, v47, v50 ; 106A652F v_mul_f32_e32 v54, v48, v51 ; 106C6730 image_sample_l v[47:48], 3, 0, 0, 0, 0, 0, 0, 0, v[53:56], s[60:67], s[56:59] ; F0900300 01CF2F35 v_mul_f32_e32 v38, s47, v6 ; 104C0C2F v_mac_f32_e32 v38, s53, v4 ; 3E4C0835 v_mad_f32 v50, s50, v52, v20 ; D2820032 04526832 v_mad_f32 v38, s52, v38, v21 ; D2820026 04564C34 v_mul_f32_e32 v50, v10, v50 ; 1064650A v_mul_f32_e32 v38, v10, v38 ; 104C4D0A v_fract_f32_e32 v50, v50 ; 7E644132 v_fract_f32_e32 v38, v38 ; 7E4C4126 v_cos_f32_e32 v50, v50 ; 7E646D32 v_cos_f32_e32 v38, v38 ; 7E4C6D26 v_mul_f32_e32 v39, v39, v38 ; 104E4D27 v_mac_f32_e32 v39, v41, v50 ; 3E4E6529 v_mul_f32_e32 v41, s48, v6 ; 10520C30 v_mac_f32_e32 v41, s46, v4 ; 3E52082E v_mad_f32 v41, s54, v41, v19 ; D2820029 044E5236 v_mul_f32_e32 v41, v10, v41 ; 1052530A v_fract_f32_e32 v41, v41 ; 7E524129 v_cos_f32_e32 v41, v41 ; 7E526D29 v_mac_f32_e32 v39, v43, v41 ; 3E4E532B v_mul_f32_e32 v43, s49, v6 ; 10560C31 v_mac_f32_e32 v43, s20, v4 ; 3E560814 v_mad_f32 v43, s30, v43, v14 ; D282002B 043A561E v_mul_f32_e32 v43, v10, v43 ; 1056570A v_fract_f32_e32 v43, v43 ; 7E56412B v_cos_f32_e32 v43, v43 ; 7E566D2B v_mac_f32_e32 v39, v45, v43 ; 3E4E572D v_add_f32_e32 v39, v39, v6 ; 064E0D27 v_mul_f32_e32 v45, s55, v39 ; 105A4E37 v_mul_f32_e32 v51, s47, v39 ; 10664E2F v_mul_f32_e32 v52, s48, v39 ; 10684E30 v_mul_f32_e32 v39, s49, v39 ; 104E4E31 v_mul_f32_e32 v38, v40, v38 ; 104C4D28 v_mac_f32_e32 v38, v42, v50 ; 3E4C652A v_mac_f32_e32 v38, v44, v41 ; 3E4C532C v_mac_f32_e32 v38, v46, v43 ; 3E4C572E v_add_f32_e32 v38, v38, v4 ; 064C0926 v_mac_f32_e32 v45, s51, v38 ; 3E5A4C33 v_mac_f32_e32 v51, s53, v38 ; 3E664C35 v_mac_f32_e32 v52, s46, v38 ; 3E684C2E v_mac_f32_e32 v20, s50, v45 ; 3E285A32 v_mac_f32_e32 v21, s52, v51 ; 3E2A6634 v_mac_f32_e32 v19, s54, v52 ; 3E266836 v_mul_f32_e32 v40, 0.5, v49 ; 105062F0 v_add_f32_e32 v41, 1.0, v17 ; 065222F2 v_mul_f32_e32 v41, 0.5, v41 ; 105252F0 v_mad_f32 v36, v36, v57, -4.0 ; D2820024 03DE7324 exp 15, 32, 0, 0, 0, v40, v41, v37, v36 ; F800020F 24252928 v_add_f32_e32 v22, v22, v22 ; 062C2D16 s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700 v_sub_f32_e32 v36, 1.0, v58 ; 084874F2 v_mul_f32_e32 v22, v22, v36 ; 102C4916 v_add_f32_e32 v24, v24, v24 ; 06303118 v_mac_f32_e32 v22, v24, v58 ; 3E2C7518 v_mul_f32_e32 v16, v22, v16 ; 10202116 v_add_f32_e32 v17, 0x3f4ccccd, v17 ; 062222FF 3F4CCCCD v_mul_f32_e32 v21, v10, v21 ; 102A2B0A v_fract_f32_e32 v21, v21 ; 7E2A4115 v_cos_f32_e32 v21, v21 ; 7E2A6D15 v_mul_f32_e32 v22, v35, v21 ; 102C2B23 v_mul_f32_e32 v21, v34, v21 ; 102A2B22 v_mul_f32_e32 v20, v10, v20 ; 1028290A v_fract_f32_e32 v20, v20 ; 7E284114 v_cos_f32_e32 v20, v20 ; 7E286D14 v_mac_f32_e32 v22, v33, v20 ; 3E2C2921 v_mac_f32_e32 v21, v32, v20 ; 3E2A2920 v_mul_f32_e32 v19, v10, v19 ; 1026270A v_fract_f32_e32 v19, v19 ; 7E264113 v_cos_f32_e32 v19, v19 ; 7E266D13 v_mac_f32_e32 v22, v30, v19 ; 3E2C271E v_mac_f32_e32 v21, v31, v19 ; 3E2A271F v_mac_f32_e32 v39, s20, v38 ; 3E4E4C14 v_mac_f32_e32 v14, s30, v39 ; 3E1C4E1E v_mul_f32_e32 v14, v10, v14 ; 101C1D0A v_fract_f32_e32 v14, v14 ; 7E1C410E v_cos_f32_e32 v14, v14 ; 7E1C6D0E v_mac_f32_e32 v22, v29, v14 ; 3E2C1D1D v_mac_f32_e32 v21, v28, v14 ; 3E2A1D1C v_mul_f32_e32 v14, 0.5, v56 ; 101C70F0 v_mul_f32_e32 v19, v12, v12 ; 1026190C v_mac_f32_e32 v19, v13, v13 ; 3E261B0D v_sqrt_f32_e32 v19, v19 ; 7E266713 v_mad_f32 v20, v22, v22, 4.0 ; D2820014 03DA2D16 v_mac_f32_e32 v20, v21, v21 ; 3E282B15 v_rsq_clamp_f32_e32 v20, v20 ; 7E285914 v_mul_f32_e32 v24, s29, v12 ; 1030181D v_sub_f32_e32 v28, 1.0, v19 ; 083826F2 v_mad_f32 v24, v28, -s28, -v24 ; D2820018 C460391C v_mad_f32 v24, -v13, s27, v24 ; D2820018 2460370D v_mul_f32_e32 v22, v20, v22 ; 102C2D14 v_mul_f32_e32 v29, -2.0, v22 ; 103A2CF5 v_mul_f32_e32 v29, v14, v29 ; 103A3B0E v_mul_f32_e32 v29, 0x3e4ccccd, v29 ; 103A3AFF 3E4CCCCD v_mac_f32_e32 v29, v11, v13 ; 3E3A1B0B v_mul_f32_e32 v11, v11, v12 ; 1016190B v_mad_f32 v24, 0.5, |v24|, 0.5 ; D2820218 03C230F0 v_sub_f32_e32 v30, 1.0, v24 ; 083C30F2 v_mul_f32_e32 v12, v12, v30 ; 10183D0C v_mul_f32_e32 v13, v13, v30 ; 101A3D0D v_add_f32_e32 v23, v23, v23 ; 062E2F17 v_sub_f32_e32 v31, 1.0, v59 ; 083E76F2 v_mul_f32_e32 v23, v23, v31 ; 102E3F17 v_add_f32_e32 v31, v25, v25 ; 063E3319 v_mac_f32_e32 v23, v31, v59 ; 3E2E771F v_mac_f32_e32 v12, v26, v24 ; 3E18311A v_mac_f32_e32 v13, v27, v24 ; 3E1A311B v_mul_f32_e32 v26, v28, v30 ; 10343D1C v_mac_f32_e32 v26, 1.0, v24 ; 3E3430F2 v_mul_f32_e32 v24, v12, v12 ; 1030190C v_mac_f32_e32 v24, v26, v26 ; 3E30351A v_mac_f32_e32 v24, v13, v13 ; 3E301B0D v_rsq_clamp_f32_e32 v24, v24 ; 7E305918 v_mad_f32 v4, v15, v16, v4 ; D2820004 0412210F v_mad_f32 v5, v18, v16, v5 ; D2820005 04162112 v_mac_f32_e32 v6, v7, v16 ; 3E0C2107 v_mul_f32_e32 v7, v24, v12 ; 100E1918 v_mul_f32_e32 v12, v24, v26 ; 10183518 v_mul_f32_e32 v13, v24, v13 ; 101A1B18 v_mul_f32_e32 v15, v23, v17 ; 101E2317 v_mac_f32_e32 v4, v7, v15 ; 3E081F07 v_mac_f32_e32 v5, v12, v15 ; 3E0A1F0C v_mac_f32_e32 v6, v13, v15 ; 3E0C1F0D v_add_f32_e32 v7, 1.0, v19 ; 060E26F2 v_mul_f32_e32 v3, v3, v7 ; 10060F03 v_mul_f32_e32 v3, 0x43340000, v3 ; 100606FF 43340000 v_mul_f32_e32 v3, v10, v3 ; 1006070A v_add_f32_e32 v7, -0.5, v47 ; 060E5EF1 v_add_f32_e32 v10, -0.5, v48 ; 061460F1 v_mul_f32_e32 v12, 0, v14 ; 10181C80 v_mul_f32_e32 v12, v23, v12 ; 10181917 v_mul_f32_e32 v13, v17, v12 ; 101A1911 v_cmp_lt_f32_e32 vcc, 0, v13 ; 7C021A80 v_cndmask_b32_e64 v15, v13, 1.0, vcc ; D200000F 01A9E50D v_cmp_le_f32_e32 vcc, 0, v15 ; 7C061E80 v_cndmask_b32_e32 v15, -1.0, v15 ; 001E1EF3 v_mad_f32 v13, |v13|, v9, v8 ; D282010D 0422130D v_max_f32_e32 v13, 0, v13 ; 201A1A80 v_mul_f32_e32 v13, v13, v15 ; 101A1F0D v_mac_f32_e32 v5, v17, v12 ; 3E0A1911 v_fract_f32_e32 v3, v3 ; 7E064103 v_sin_f32_e32 v3, v3 ; 7E066B03 v_mul_f32_e32 v3, 0x3f333333, v3 ; 100606FF 3F333333 v_mac_f32_e32 v5, v3, v13 ; 3E0A1B03 v_mul_f32_e32 v12, v20, v21 ; 10182B14 v_mac_f32_e32 v7, -2.0, v22 ; 3E0E2CF5 v_mac_f32_e32 v10, -2.0, v12 ; 3E1418F5 v_mul_f32_e32 v7, v14, v7 ; 100E0F0E v_mul_f32_e32 v10, v14, v10 ; 1014150E v_mul_f32_e32 v7, v23, v7 ; 100E0F17 v_mul_f32_e32 v10, v23, v10 ; 10141517 v_mac_f32_e32 v4, v17, v7 ; 3E080F11 v_mul_f32_e32 v7, v17, v7 ; 100E0F11 v_mac_f32_e32 v6, v17, v10 ; 3E0C1511 v_mul_f32_e32 v10, v17, v10 ; 10141511 v_mad_f32 v12, |v7|, v9, v8 ; D282010C 04221307 v_mad_f32 v8, |v10|, v9, v8 ; D2820108 0422130A v_cmp_lt_f32_e32 vcc, 0, v7 ; 7C020E80 v_cndmask_b32_e64 v7, v7, 1.0, vcc ; D2000007 01A9E507 v_cmp_lt_f32_e32 vcc, 0, v10 ; 7C021480 v_cndmask_b32_e64 v9, v10, 1.0, vcc ; D2000009 01A9E50A v_cmp_le_f32_e32 vcc, 0, v7 ; 7C060E80 v_cndmask_b32_e32 v7, -1.0, v7 ; 000E0EF3 v_cmp_le_f32_e32 vcc, 0, v9 ; 7C061280 v_cndmask_b32_e32 v9, -1.0, v9 ; 001212F3 v_max_f32_e32 v10, 0, v12 ; 20141880 v_mul_f32_e32 v7, v10, v7 ; 100E0F0A v_max_f32_e32 v8, 0, v8 ; 20101080 v_mul_f32_e32 v8, v8, v9 ; 10101308 v_mac_f32_e32 v4, v3, v7 ; 3E080F03 v_mac_f32_e32 v6, v3, v8 ; 3E0C1103 v_mul_f32_e32 v7, s24, v4 ; 100E0818 v_mac_f32_e32 v7, s26, v5 ; 3E0E0A1A v_mul_f32_e32 v8, s9, v4 ; 10100809 v_mul_f32_e32 v9, s5, v4 ; 10120805 v_mul_f32_e32 v4, s4, v4 ; 10080804 v_mac_f32_e32 v8, s17, v5 ; 3E100A11 v_mac_f32_e32 v9, s10, v5 ; 3E120A0A v_mac_f32_e32 v4, s8, v5 ; 3E080A08 v_mac_f32_e32 v7, s25, v6 ; 3E0E0C19 v_mac_f32_e32 v8, s19, v6 ; 3E100C13 v_mac_f32_e32 v9, s18, v6 ; 3E120C12 v_mac_f32_e32 v4, s11, v6 ; 3E080C0B v_mad_f32 v3, 0, v3, 0 ; D2820003 02020680 v_add_f32_e32 v3, 1.0, v3 ; 060606F2 v_mac_f32_e32 v8, s21, v3 ; 3E100615 v_mac_f32_e32 v9, s22, v3 ; 3E120616 v_mac_f32_e32 v4, s23, v3 ; 3E080617 v_mac_f32_e32 v7, s3, v3 ; 3E0E0603 v_mul_f32_e32 v3, s16, v8 ; 10061010 v_mul_f32_e32 v5, s31, v8 ; 100A101F v_mul_f32_e32 v6, s32, v8 ; 100C1020 v_mul_f32_e32 v8, s33, v8 ; 10101021 v_mac_f32_e32 v3, s34, v9 ; 3E061222 v_mac_f32_e32 v5, s35, v9 ; 3E0A1223 v_mac_f32_e32 v6, s36, v9 ; 3E0C1224 v_mac_f32_e32 v8, s37, v9 ; 3E101225 v_mac_f32_e32 v3, s38, v4 ; 3E060826 v_mac_f32_e32 v5, s39, v4 ; 3E0A0827 v_mac_f32_e32 v6, s40, v4 ; 3E0C0828 v_mac_f32_e32 v8, s41, v4 ; 3E100829 v_mac_f32_e32 v3, s42, v7 ; 3E060E2A v_mac_f32_e32 v5, s43, v7 ; 3E0A0E2B v_mac_f32_e32 v6, s44, v7 ; 3E0C0E2C v_mac_f32_e32 v8, s45, v7 ; 3E100E2D v_mul_f32_e32 v4, v11, v11 ; 1008170B v_mac_f32_e32 v4, v28, v28 ; 3E08391C v_mac_f32_e32 v4, v29, v29 ; 3E083B1D v_rsq_clamp_f32_e32 v4, v4 ; 7E085904 v_mul_f32_e32 v7, 0.5, v5 ; 100E0AF0 v_mul_f32_e32 v9, 0.5, v8 ; 101210F0 v_mad_f32 v10, 0.5, v3, v9 ; D282000A 042606F0 v_mac_f32_e32 v9, s2, v7 ; 3E120E02 exp 15, 33, 0, 0, 0, v10, v9, v6, v8 ; F800021F 0806090A exp 15, 34, 0, 0, 0, v6, v0, v1, v2 ; F800022F 02010006 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, v4, v11 ; 10001704 v_mul_f32_e32 v1, s9, v0 ; 10020009 v_mul_f32_e32 v2, s17, v0 ; 10040011 v_mul_f32_e32 v0, s19, v0 ; 10000013 v_mul_f32_e32 v7, v4, v28 ; 100E3904 v_mac_f32_e32 v1, s5, v7 ; 3E020E05 v_mac_f32_e32 v2, s10, v7 ; 3E040E0A v_mac_f32_e32 v0, s18, v7 ; 3E000E12 v_mul_f32_e32 v4, v4, v29 ; 10083B04 v_mac_f32_e32 v1, s4, v4 ; 3E020804 v_mac_f32_e32 v2, s8, v4 ; 3E040808 v_mac_f32_e32 v0, s11, v4 ; 3E00080B v_mul_f32_e32 v4, v1, v1 ; 10080301 v_mac_f32_e32 v4, v2, v2 ; 3E080502 v_mac_f32_e32 v4, v0, v0 ; 3E080100 v_rsq_clamp_f32_e32 v4, v4 ; 7E085904 s_buffer_load_dword s2, s[12:15], 0xa ; C2010D0A s_buffer_load_dword s3, s[12:15], 0xb ; C2018D0B s_buffer_load_dword s4, s[12:15], 0xc ; C2020D0C s_buffer_load_dword s5, s[12:15], 0xd ; C2028D0D s_buffer_load_dword s6, s[12:15], 0xe ; C2030D0E s_buffer_load_dword s7, s[12:15], 0xf ; C2038D0F s_buffer_load_dword s8, s[12:15], 0x10 ; C2040D10 s_buffer_load_dword s9, s[12:15], 0x11 ; C2048D11 s_buffer_load_dword s10, s[12:15], 0x12 ; C2050D12 s_buffer_load_dword s11, s[12:15], 0x13 ; C2058D13 s_buffer_load_dword s16, s[12:15], 0x14 ; C2080D14 s_buffer_load_dword s17, s[12:15], 0x15 ; C2088D15 s_buffer_load_dword s18, s[12:15], 0x16 ; C2090D16 s_buffer_load_dword s19, s[12:15], 0x17 ; C2098D17 s_buffer_load_dword s20, s[12:15], 0x18 ; C20A0D18 s_buffer_load_dword s21, s[12:15], 0x19 ; C20A8D19 s_buffer_load_dword s22, s[12:15], 0x1a ; C20B0D1A s_buffer_load_dword s23, s[12:15], 0x1b ; C20B8D1B s_buffer_load_dword s24, s[12:15], 0x1c ; C20C0D1C s_buffer_load_dword s25, s[12:15], 0x1d ; C20C8D1D s_buffer_load_dword s26, s[12:15], 0x1e ; C20D0D1E s_buffer_load_dword s27, s[12:15], 0x1f ; C20D8D1F s_buffer_load_dword s28, s[12:15], 0x20 ; C20E0D20 s_buffer_load_dword s29, s[12:15], 0x21 ; C20E8D21 s_buffer_load_dword s12, s[12:15], 0x22 ; C2060D22 v_mul_f32_e32 v1, v4, v1 ; 10020304 v_mul_f32_e32 v2, v4, v2 ; 10040504 v_mul_f32_e32 v0, v4, v0 ; 10000104 v_mul_f32_e32 v4, v0, v2 ; 10080500 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s17, v4 ; 100E0811 v_mul_f32_e32 v9, s21, v4 ; 10120815 v_mul_f32_e32 v4, s25, v4 ; 10080819 v_mul_f32_e32 v10, v2, v1 ; 10140302 v_mac_f32_e32 v7, s16, v10 ; 3E0E1410 v_mac_f32_e32 v9, s20, v10 ; 3E121414 v_mac_f32_e32 v4, s24, v10 ; 3E081418 v_mul_f32_e32 v10, v0, v0 ; 10140100 v_mac_f32_e32 v7, s18, v10 ; 3E0E1412 v_mac_f32_e32 v9, s22, v10 ; 3E121416 v_mac_f32_e32 v4, s26, v10 ; 3E08141A v_mul_f32_e32 v10, s1, v2 ; 10140401 v_mac_f32_e32 v10, s0, v1 ; 3E140200 v_mul_f32_e32 v11, s5, v2 ; 10160405 v_mac_f32_e32 v11, s4, v1 ; 3E160204 v_mul_f32_e32 v12, s9, v2 ; 10180409 v_mac_f32_e32 v12, s8, v1 ; 3E180208 v_mac_f32_e32 v10, s2, v0 ; 3E140002 v_mac_f32_e32 v11, s6, v0 ; 3E160006 v_mac_f32_e32 v12, s10, v0 ; 3E18000A v_mul_f32_e32 v0, v1, v0 ; 10000101 v_mac_f32_e32 v7, s19, v0 ; 3E0E0013 v_mac_f32_e32 v9, s23, v0 ; 3E120017 v_mac_f32_e32 v4, s27, v0 ; 3E08001B v_mul_f32_e32 v0, v2, v2 ; 10000502 v_mad_f32 v0, v1, v1, -v0 ; D2820000 84020301 v_mac_f32_e32 v7, s28, v0 ; 3E0E001C v_mac_f32_e32 v9, s29, v0 ; 3E12001D v_mac_f32_e32 v4, s12, v0 ; 3E08000C v_add_f32_e32 v0, s3, v10 ; 06001403 v_add_f32_e32 v0, v0, v7 ; 06000F00 v_add_f32_e32 v1, s7, v11 ; 06021607 v_add_f32_e32 v1, v1, v9 ; 06021301 v_add_f32_e32 v2, s11, v12 ; 0604180B v_add_f32_e32 v2, v2, v4 ; 06040902 exp 15, 35, 0, 0, 0, v0, v1, v2, v25 ; F800023F 19020100 exp 15, 12, 0, 1, 0, v3, v5, v6, v8 ; F80008CF 08060503 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 104 VGPRS: 60 Code Size: 2196 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL CONST[0..1] DCL CONST[3..4] DCL CONST[6..7] DCL TEMP[0..5], LOCAL IMM[0] FLT32 { 0.5000, -1.0000, 0.0000, 5.0000} IMM[1] FLT32 { 1.0000, 2.0000, 0.8800, 1.3000} 0: ADD TEMP[0].x, IN[0].zzzz, IMM[0].xxxx 1: FLR TEMP[0].x, TEMP[0].xxxx 2: MUL TEMP[0].x, TEMP[0].xxxx, CONST[3].xxxx 3: FLR TEMP[1].x, TEMP[0].xxxx 4: MUL TEMP[1].x, TEMP[1].xxxx, CONST[3].yyyy 5: FRC TEMP[0].x, TEMP[0].xxxx 6: FRC TEMP[1].x, TEMP[1].xxxx 7: MOV TEMP[0].y, TEMP[1].xxxx 8: DDX TEMP[1].xy, IN[0].xyyy 9: MUL TEMP[2], CONST[7].xxxx, IN[0].xyyy 10: DDY TEMP[2].xy, TEMP[2] 11: DP2 TEMP[1].x, TEMP[1].xyyy, TEMP[1].xyyy 12: SQRT TEMP[1].x, TEMP[1].xxxx 13: DP2 TEMP[2].x, TEMP[2].xyyy, TEMP[2].xyyy 14: SQRT TEMP[2].x, TEMP[2].xxxx 15: MOV TEMP[1].y, TEMP[2].xxxx 16: DP2 TEMP[1].x, TEMP[1].xyyy, TEMP[1].xyyy 17: SQRT TEMP[1].x, TEMP[1].xxxx 18: MUL TEMP[1].x, TEMP[1].xxxx, CONST[3].wwww 19: LG2 TEMP[1].x, TEMP[1].xxxx 20: ADD TEMP[1].x, TEMP[1].xxxx, IMM[0].yyyy 21: ADD TEMP[1].x, TEMP[1].xxxx, IN[0].wwww 22: MAX TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz 23: MIN TEMP[1].x, TEMP[1].xxxx, IMM[0].wwww 24: CEIL TEMP[2].x, TEMP[1].xxxx 25: EX2 TEMP[2].x, TEMP[2].xxxx 26: MUL TEMP[2].x, TEMP[2].xxxx, CONST[3].zzzz 27: RCP TEMP[3].x, CONST[3].wwww 28: MUL TEMP[3].x, IMM[0].xxxx, TEMP[3].xxxx 29: FRC TEMP[4].xy, IN[0].xyyy 30: MUL TEMP[5].x, TEMP[2].xxxx, IMM[1].yyyy 31: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[3].xxxx 32: ADD TEMP[5].x, IMM[1].xxxx, -TEMP[5].xxxx 33: MUL TEMP[2].x, TEMP[3].xxxx, TEMP[2].xxxx 34: MAD TEMP[2].xy, TEMP[4].xyyy, TEMP[5].xxxx, TEMP[2].xxxx 35: MAD TEMP[0].xy, TEMP[2].xyyy, CONST[3].xyyy, TEMP[0].xyyy 36: MOV TEMP[0].xy, TEMP[0].xyyy 37: MOV TEMP[0].w, TEMP[1].xxxx 38: TXL TEMP[0], TEMP[0], SAMP[0], 2D 39: FSLT TEMP[1].x, TEMP[0].wwww, CONST[6].xxxx 40: AND TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx 41: KILL_IF -TEMP[1].xxxx 42: MOV TEMP[1].xy, IN[1].xyyy 43: MOV TEMP[1].w, IN[1].wwww 44: TXP TEMP[1], TEMP[1], SAMP[1], 2D 45: LG2 TEMP[2].x, TEMP[1].xxxx 46: LG2 TEMP[2].y, TEMP[1].yyyy 47: LG2 TEMP[2].z, TEMP[1].zzzz 48: LG2 TEMP[2].w, TEMP[1].wwww 49: MOV TEMP[1].xyz, -TEMP[2] 50: ADD TEMP[1].xyz, TEMP[1].xyzz, IN[3].xyzz 51: MUL TEMP[2].xyz, TEMP[0].xyzz, IN[2].yzww 52: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[1].zzzz 53: POW TEMP[3].x, TEMP[1].xxxx, IMM[1].wwww 54: POW TEMP[3].y, TEMP[1].yyyy, IMM[1].wwww 55: POW TEMP[3].z, TEMP[1].zzzz, IMM[1].wwww 56: MAD TEMP[1].xyz, TEMP[3].xyzz, CONST[4].yyyy, CONST[4].xxxx 57: MUL TEMP[1].xyz, TEMP[2].xyzz, TEMP[1].xyzz 58: MOV TEMP[0].w, TEMP[0].wwww 59: MAD TEMP[2].x, IN[2].xxxx, CONST[1].zzzz, CONST[1].wwww 60: MOV_SAT TEMP[2].x, TEMP[2].xxxx 61: LRP TEMP[0].xyz, TEMP[2].xxxx, TEMP[1].xyzz, CONST[0].xyzz 62: MOV OUT[0], TEMP[0] 63: END ; ModuleID = 'tgsi' @ddxy_lds = external addrspace(3) global [64 x i32] define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %37 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %38 = load <32 x i8>, <32 x i8> addrspace(2)* %37, align 32, !tbaa !0 %39 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %40 = load <16 x i8>, <16 x i8> addrspace(2)* %39, align 16, !tbaa !0 %41 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %42 = bitcast <8 x i32> addrspace(2)* %41 to <32 x i8> addrspace(2)* %43 = load <32 x i8>, <32 x i8> addrspace(2)* %42, align 32, !tbaa !0 %44 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %45 = bitcast <4 x i32> addrspace(2)* %44 to <16 x i8> addrspace(2)* %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !tbaa !0 %47 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %48 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %49 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %50 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %51 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %52 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %53 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %54 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %55 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %56 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %57 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %58 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %59 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %60 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %61 = fadd float %49, 5.000000e-01 %62 = call float @llvm.floor.f32(float %61) %63 = fmul float %62, %29 %64 = call float @llvm.floor.f32(float %63) %65 = fmul float %64, %30 %66 = call float @llvm.floor.f32(float %63) %67 = fsub float %63, %66 %68 = call float @llvm.floor.f32(float %65) %69 = fsub float %65, %68 %70 = call i32 @llvm.SI.tid() %71 = sext i32 %70 to i64 %72 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i64 0, i64 %71 %73 = bitcast i32 addrspace(3)* %72 to float addrspace(3)* store float %48, float addrspace(3)* %73, align 4 %74 = call i32 @llvm.SI.tid() %75 = sext i32 %74 to i64 %76 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i64 0, i64 %75 %77 = and i32 %74, -4 %78 = sext i32 %77 to i64 %79 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i64 0, i64 %78 %80 = or i32 %77, 1 %81 = sext i32 %80 to i64 %82 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i64 0, i64 %81 %83 = bitcast i32 addrspace(3)* %76 to float addrspace(3)* store float %47, float addrspace(3)* %83, align 4 %84 = bitcast i32 addrspace(3)* %79 to float addrspace(3)* %85 = load float, float addrspace(3)* %84, align 4 %86 = bitcast i32 addrspace(3)* %82 to float addrspace(3)* %87 = load float, float addrspace(3)* %86, align 4 %88 = fsub float %87, %85 %89 = bitcast i32 addrspace(3)* %76 to float addrspace(3)* store float %48, float addrspace(3)* %89, align 4 %90 = bitcast i32 addrspace(3)* %79 to float addrspace(3)* %91 = load float, float addrspace(3)* %90, align 4 %92 = bitcast i32 addrspace(3)* %82 to float addrspace(3)* %93 = load float, float addrspace(3)* %92, align 4 %94 = fsub float %93, %91 %95 = fmul float %36, %47 %96 = fmul float %36, %48 %97 = fmul float %36, %48 %98 = call i32 @llvm.SI.tid() %99 = sext i32 %98 to i64 %100 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i64 0, i64 %99 %101 = bitcast i32 addrspace(3)* %100 to float addrspace(3)* store float %97, float addrspace(3)* %101, align 4 %102 = call i32 @llvm.SI.tid() %103 = sext i32 %102 to i64 %104 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i64 0, i64 %103 %105 = and i32 %102, -4 %106 = sext i32 %105 to i64 %107 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i64 0, i64 %106 %108 = or i32 %105, 2 %109 = sext i32 %108 to i64 %110 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i64 0, i64 %109 %111 = bitcast i32 addrspace(3)* %104 to float addrspace(3)* store float %95, float addrspace(3)* %111, align 4 %112 = bitcast i32 addrspace(3)* %107 to float addrspace(3)* %113 = load float, float addrspace(3)* %112, align 4 %114 = bitcast i32 addrspace(3)* %110 to float addrspace(3)* %115 = load float, float addrspace(3)* %114, align 4 %116 = fsub float %115, %113 %117 = bitcast i32 addrspace(3)* %104 to float addrspace(3)* store float %96, float addrspace(3)* %117, align 4 %118 = bitcast i32 addrspace(3)* %107 to float addrspace(3)* %119 = load float, float addrspace(3)* %118, align 4 %120 = bitcast i32 addrspace(3)* %110 to float addrspace(3)* %121 = load float, float addrspace(3)* %120, align 4 %122 = fsub float %121, %119 %123 = bitcast i32 addrspace(3)* %104 to float addrspace(3)* store float %97, float addrspace(3)* %123, align 4 %124 = fmul float %88, %88 %125 = fmul float %94, %94 %126 = fadd float %124, %125 %127 = call float @llvm.sqrt.f32(float %126) %128 = fmul float %116, %116 %129 = fmul float %122, %122 %130 = fadd float %128, %129 %131 = call float @llvm.sqrt.f32(float %130) %132 = fmul float %127, %127 %133 = fmul float %131, %131 %134 = fadd float %132, %133 %135 = call float @llvm.sqrt.f32(float %134) %136 = fmul float %135, %32 %137 = call float @llvm.log2.f32(float %136) %138 = fadd float %137, -1.000000e+00 %139 = fadd float %138, %50 %140 = call float @llvm.maxnum.f32(float %139, float 0.000000e+00) %141 = call float @llvm.minnum.f32(float %140, float 5.000000e+00) %142 = call float @llvm.ceil.f32(float %141) %143 = call float @llvm.AMDIL.exp.(float %142) %144 = fmul float %143, %31 %145 = fdiv float 1.000000e+00, %32 %146 = fmul float %145, 5.000000e-01 %147 = call float @llvm.floor.f32(float %47) %148 = fsub float %47, %147 %149 = call float @llvm.floor.f32(float %48) %150 = fsub float %48, %149 %151 = fmul float %144, 2.000000e+00 %152 = fmul float %151, %146 %153 = fsub float 1.000000e+00, %152 %154 = fmul float %146, %144 %155 = fmul float %148, %153 %156 = fadd float %155, %154 %157 = fmul float %150, %153 %158 = fadd float %157, %154 %159 = fmul float %156, %29 %160 = fadd float %159, %67 %161 = fmul float %158, %30 %162 = fadd float %161, %69 %163 = bitcast float %160 to i32 %164 = bitcast float %162 to i32 %165 = bitcast float %141 to i32 %166 = insertelement <4 x i32> undef, i32 %163, i32 0 %167 = insertelement <4 x i32> %166, i32 %164, i32 1 %168 = insertelement <4 x i32> %167, i32 %165, i32 2 %169 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %168, <32 x i8> %38, <16 x i8> %40, i32 2) %170 = extractelement <4 x float> %169, i32 0 %171 = extractelement <4 x float> %169, i32 1 %172 = extractelement <4 x float> %169, i32 2 %173 = extractelement <4 x float> %169, i32 3 %174 = fcmp olt float %173, %35 %175 = select i1 %174, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %175) %176 = fdiv float %51, %53 %177 = fdiv float %52, %53 %178 = bitcast float %176 to i32 %179 = bitcast float %177 to i32 %180 = insertelement <2 x i32> undef, i32 %178, i32 0 %181 = insertelement <2 x i32> %180, i32 %179, i32 1 %182 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %181, <32 x i8> %43, <16 x i8> %46, i32 2) %183 = extractelement <4 x float> %182, i32 0 %184 = extractelement <4 x float> %182, i32 1 %185 = extractelement <4 x float> %182, i32 2 %186 = call float @llvm.log2.f32(float %183) %187 = call float @llvm.log2.f32(float %184) %188 = call float @llvm.log2.f32(float %185) %189 = fsub float %58, %186 %190 = fsub float %59, %187 %191 = fsub float %60, %188 %192 = fmul float %170, %55 %193 = fmul float %171, %56 %194 = fmul float %172, %57 %195 = fmul float %189, 0x3FEC28F5C0000000 %196 = fmul float %190, 0x3FEC28F5C0000000 %197 = fmul float %191, 0x3FEC28F5C0000000 %198 = call float @llvm.pow.f32(float %195, float 0x3FF4CCCCC0000000) %199 = call float @llvm.pow.f32(float %196, float 0x3FF4CCCCC0000000) %200 = call float @llvm.pow.f32(float %197, float 0x3FF4CCCCC0000000) %201 = fmul float %198, %34 %202 = fadd float %201, %33 %203 = fmul float %199, %34 %204 = fadd float %203, %33 %205 = fmul float %200, %34 %206 = fadd float %205, %33 %207 = fmul float %192, %202 %208 = fmul float %193, %204 %209 = fmul float %194, %206 %210 = fmul float %54, %27 %211 = fadd float %210, %28 %212 = call float @llvm.AMDIL.clamp.(float %211, float 0.000000e+00, float 1.000000e+00) %213 = call float @llvm.AMDGPU.lrp(float %212, float %207, float %24) %214 = call float @llvm.AMDGPU.lrp(float %212, float %208, float %25) %215 = call float @llvm.AMDGPU.lrp(float %212, float %209, float %26) %216 = call i32 @llvm.SI.packf16(float %213, float %214) %217 = bitcast i32 %216 to float %218 = call i32 @llvm.SI.packf16(float %215, float %173) %219 = bitcast i32 %218 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %217, float %219, float %217, float %219) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.floor.f32(float) #1 ; Function Attrs: readnone declare i32 @llvm.SI.tid() #2 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.ceil.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 v_interp_p1_f32 v8, v0, 3, 1, [m0] ; C8200700 v_interp_p2_f32 v8, [v8], v1, 3, 1, [m0] ; C8210701 v_interp_p1_f32 v9, v0, 0, 2, [m0] ; C8240800 v_interp_p2_f32 v9, [v9], v1, 0, 2, [m0] ; C8250801 v_interp_p1_f32 v10, v0, 1, 2, [m0] ; C8280900 v_interp_p2_f32 v10, [v10], v1, 1, 2, [m0] ; C8290901 v_interp_p1_f32 v11, v0, 2, 2, [m0] ; C82C0A00 v_interp_p2_f32 v11, [v11], v1, 2, 2, [m0] ; C82D0A01 v_interp_p1_f32 v12, v0, 3, 2, [m0] ; C8300B00 v_interp_p2_f32 v12, [v12], v1, 3, 2, [m0] ; C8310B01 v_interp_p1_f32 v13, v0, 0, 3, [m0] ; C8340C00 v_interp_p2_f32 v13, [v13], v1, 0, 3, [m0] ; C8350C01 v_interp_p1_f32 v14, v0, 1, 3, [m0] ; C8380D00 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_interp_p2_f32 v14, [v14], v1, 1, 3, [m0] ; C8390D01 v_interp_p1_f32 v0, v0, 2, 3, [m0] ; C8000E00 v_interp_p2_f32 v0, [v0], v1, 2, 3, [m0] ; C8010E01 v_mbcnt_lo_u32_b32_e64 v1, -1, 0 ; D2460001 000100C1 v_mbcnt_hi_u32_b32_e32 v1, -1, v1 ; 480202C1 v_lshlrev_b32_e32 v15, 2, v1 ; 341E0282 s_mov_b32 m0, -1 ; BEFC03C1 ds_write_b32 v15, v3 ; D8340000 0000030F ds_write_b32 v15, v2 ; D8340000 0000020F s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x1c ; C204011C v_and_b32_e32 v1, -4, v1 ; 360202C4 v_or_b32_e32 v16, 1, v1 ; 38200281 v_lshlrev_b32_e32 v16, 2, v16 ; 34202082 v_lshlrev_b32_e32 v17, 2, v1 ; 34220282 ds_read_b32 v18, v17 ; D8D80000 12000011 ds_read_b32 v19, v16 ; D8D80000 13000010 ds_write_b32 v15, v3 ; D8340000 0000030F ds_read_b32 v16, v16 ; D8D80000 10000010 ds_read_b32 v20, v17 ; D8D80000 14000011 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v21, s8, v3 ; 102A0608 ds_write_b32 v15, v21 ; D8340000 0000150F v_mul_f32_e32 v22, s8, v2 ; 102C0408 ds_write_b32 v15, v22 ; D8340000 0000160F v_or_b32_e32 v1, 2, v1 ; 38020282 v_lshlrev_b32_e32 v1, 2, v1 ; 34020282 s_waitcnt lgkmcnt(0) ; BF8C007F ds_read_b32 v22, v17 ; D8D80000 16000011 ds_read_b32 v23, v1 ; D8D80000 17000001 ds_write_b32 v15, v21 ; D8340000 0000150F ds_read_b32 v17, v17 ; D8D80000 11000011 ds_read_b32 v1, v1 ; D8D80000 01000001 ds_write_b32 v15, v21 ; D8340000 0000150F v_subrev_f32_e32 v15, v18, v19 ; 0A1E2712 v_subrev_f32_e32 v16, v20, v16 ; 0A202114 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v18, v22, v23 ; 0A242F16 v_subrev_f32_e32 v1, v17, v1 ; 0A020311 v_mul_f32_e32 v16, v16, v16 ; 10202110 v_mac_f32_e32 v16, v15, v15 ; 3E201F0F v_mul_f32_e32 v1, v1, v1 ; 10020301 v_mac_f32_e32 v1, v18, v18 ; 3E022512 v_sqrt_f32_e32 v15, v16 ; 7E1E6710 s_buffer_load_dword s8, s[0:3], 0xc ; C204010C v_sqrt_f32_e32 v1, v1 ; 7E026701 v_mul_f32_e32 v1, v1, v1 ; 10020301 s_buffer_load_dword s9, s[0:3], 0xf ; C204810F v_mac_f32_e32 v1, v15, v15 ; 3E021F0F s_buffer_load_dword s10, s[0:3], 0xd ; C205010D v_add_f32_e32 v4, 0.5, v4 ; 060808F0 v_floor_f32_e32 v4, v4 ; 7E084904 s_buffer_load_dword s11, s[0:3], 0xe ; C205810E s_buffer_load_dword s12, s[0:3], 0x10 ; C2060110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v15, s8, v4 ; 101E0808 v_floor_f32_e32 v15, v15 ; 7E1E490F v_sqrt_f32_e32 v1, v1 ; 7E026701 v_mul_f32_e32 v1, s9, v1 ; 10020209 v_log_f32_e32 v1, v1 ; 7E024F01 v_mul_f32_e32 v16, s10, v15 ; 10201E0A v_floor_f32_e32 v16, v16 ; 7E204910 v_mad_f32 v17, v4, s8, -v15 ; D2820011 843C1104 v_mad_f32 v18, v15, s10, -v16 ; D2820012 8440150F v_add_f32_e32 v1, -1.0, v1 ; 060202F3 v_add_f32_e32 v1, v5, v1 ; 06020305 v_max_f32_e32 v1, 0, v1 ; 20020280 v_min_f32_e32 v19, 0x40a00000, v1 ; 1E2602FF 40A00000 v_ceil_f32_e32 v1, v19 ; 7E024513 v_exp_f32_e32 v1, v1 ; 7E024B01 v_mul_f32_e32 v1, s11, v1 ; 1002020B v_rcp_f32_e32 v4, s9 ; 7E085409 v_floor_f32_e32 v5, v2 ; 7E0A4902 v_subrev_f32_e32 v2, v5, v2 ; 0A040505 v_floor_f32_e32 v5, v3 ; 7E0A4903 v_subrev_f32_e32 v3, v5, v3 ; 0A060705 v_mul_f32_e32 v4, 0.5, v4 ; 100808F0 s_buffer_load_dword s9, s[0:3], 0x18 ; C2048118 s_load_dwordx4 s[16:19], s[4:5], 0x0 ; C0880500 s_load_dwordx8 s[20:27], s[6:7], 0x0 ; C0CA0700 v_mul_f32_e32 v5, -2.0, v1 ; 100A02F5 v_mad_f32 v5, v5, v4, 1.0 ; D2820005 03CA0905 v_mul_f32_e32 v1, v1, v4 ; 10020901 v_mad_f32 v2, v5, v2, v1 ; D2820002 04060505 v_mac_f32_e32 v1, v5, v3 ; 3E020705 v_mac_f32_e32 v17, s8, v2 ; 3E220408 v_mac_f32_e32 v18, s10, v1 ; 3E24020A s_load_dwordx4 s[28:31], s[4:5], 0x4 ; C08E0504 s_load_dwordx8 s[32:39], s[6:7], 0x8 ; C0D00708 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[1:4], 15, 0, 0, 0, 0, 0, 0, 0, v[17:20], s[20:27], s[16:19] ; F0900F00 00850111 s_waitcnt vmcnt(0) ; BF8C0770 v_cmp_gt_f32_e32 vcc, s9, v4 ; 7C080809 v_cndmask_b32_e64 v5, 0, -1.0, vcc ; D2000005 01A9E680 v_cmpx_le_f32_e32 vcc, 0, v5 ; 7C260A80 v_mov_b32_e32 v5, 0x6f800000 ; 7E0A02FF 6F800000 v_cmp_gt_f32_e64 vcc, |v8|, v5 ; D008016A 00020B08 v_mov_b32_e32 v5, 0x2f800000 ; 7E0A02FF 2F800000 v_cndmask_b32_e32 v5, 1.0, v5 ; 000A0AF2 v_mul_f32_e32 v8, v5, v8 ; 10101105 v_rcp_f32_e32 v8, v8 ; 7E105508 v_mul_f32_e32 v6, v8, v6 ; 100C0D08 v_mul_f32_e32 v7, v8, v7 ; 100E0F08 v_mul_f32_e32 v15, v6, v5 ; 101E0B06 v_mul_f32_e32 v16, v7, v5 ; 10200B07 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 image_sample v[5:7], 7, 0, 0, 0, 0, 0, 0, 0, v[15:16], s[32:39], s[28:31] ; F0800700 00E8050F s_buffer_load_dword s5, s[0:3], 0x6 ; C2028106 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_log_f32_e32 v7, v7 ; 7E0E4F07 v_log_f32_e32 v5, v5 ; 7E0A4F05 v_log_f32_e32 v6, v6 ; 7E0C4F06 s_buffer_load_dword s6, s[0:3], 0x11 ; C2030111 v_subrev_f32_e32 v0, v7, v0 ; 0A000107 v_mov_b32_e32 v7, 0x3f6147ae ; 7E0E02FF 3F6147AE v_mul_f32_e32 v0, v7, v0 ; 10000107 v_log_f32_e32 v0, v0 ; 7E004F00 v_mov_b32_e32 v8, s4 ; 7E100204 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 v_mac_f32_e32 v8, s5, v9 ; 3E101205 v_mov_b32_e32 v9, 0x3fa66666 ; 7E1202FF 3FA66666 v_mul_legacy_f32_e32 v0, v9, v0 ; 0E000109 v_exp_f32_e32 v0, v0 ; 7E004B00 v_mov_b32_e32 v15, s12 ; 7E1E020C s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v15, s6, v0 ; 3E1E0006 v_mul_f32_e32 v0, v12, v3 ; 1000070C v_mul_f32_e32 v0, v15, v0 ; 1000010F v_add_f32_e64 v3, 0, v8 clamp ; D2060803 00021080 v_sub_f32_e32 v8, 1.0, v3 ; 081006F2 v_mul_f32_e32 v12, s4, v8 ; 10181004 v_mac_f32_e32 v12, v0, v3 ; 3E180700 v_mul_f32_e32 v0, v10, v1 ; 1000030A v_mul_f32_e32 v1, v11, v2 ; 1002050B v_cvt_pkrtz_f16_f32_e32 v2, v12, v4 ; 5E04090C v_subrev_f32_e32 v4, v5, v13 ; 0A081B05 v_subrev_f32_e32 v5, v6, v14 ; 0A0A1D06 v_mul_f32_e32 v4, v7, v4 ; 10080907 v_mul_f32_e32 v5, v7, v5 ; 100A0B07 v_log_f32_e32 v4, v4 ; 7E084F04 v_log_f32_e32 v5, v5 ; 7E0A4F05 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s0, s[0:3], 0x1 ; C2000101 v_mul_legacy_f32_e32 v4, v9, v4 ; 0E080909 v_exp_f32_e32 v4, v4 ; 7E084B04 v_mul_legacy_f32_e32 v5, v9, v5 ; 0E0A0B09 v_mov_b32_e32 v6, s12 ; 7E0C020C v_mad_f32 v4, s6, v4, v6 ; D2820004 041A0806 v_mov_b32_e32 v6, s12 ; 7E0C020C v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_mad_f32 v5, s6, v5, v6 ; D2820005 041A0A06 v_mul_f32_e32 v0, v4, v0 ; 10000104 v_mul_f32_e32 v1, v5, v1 ; 10020305 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v8 ; 10081004 v_mac_f32_e32 v4, v0, v3 ; 3E080700 v_mul_f32_e32 v0, s0, v8 ; 10001000 v_mac_f32_e32 v0, v1, v3 ; 3E000701 v_cvt_pkrtz_f16_f32_e32 v0, v4, v0 ; 5E000104 exp 15, 0, 1, 1, 1, v0, v2, v0, v2 ; F8001C0F 02000200 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 48 VGPRS: 24 Code Size: 888 bytes LDS: 1 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL OUT[5], GENERIC[4] DCL OUT[6], GENERIC[5] DCL OUT[7], GENERIC[6] DCL OUT[8], GENERIC[7] DCL CONST[0..20] DCL TEMP[0..10], LOCAL IMM[0] FLT32 { 0.0000, 0.5000, 0.0000, 0.0000} 0: MUL TEMP[0], CONST[6], IN[0].xxxx 1: MAD TEMP[0], CONST[7], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[8], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0].xyz, CONST[9], IN[0].wwww, TEMP[0] 4: MUL TEMP[1], CONST[17], IN[0].xxxx 5: MAD TEMP[1], CONST[18], IN[0].yyyy, TEMP[1] 6: MAD TEMP[1], CONST[19], IN[0].zzzz, TEMP[1] 7: MAD TEMP[1], CONST[20], IN[0].wwww, TEMP[1] 8: MAD TEMP[2].xy, IN[2].xyyy, CONST[14].xyyy, CONST[14].zwww 9: FSEQ TEMP[3].x, CONST[16].xxxx, IMM[0].xxxx 10: UIF TEMP[3].xxxx :0 11: MOV TEMP[3].xy, IN[2].xyxx 12: ELSE :0 13: MOV TEMP[3].xy, IN[3].xyxx 14: ENDIF 15: MAD TEMP[3].xy, TEMP[3].xyyy, CONST[15].xyyy, CONST[15].zwww 16: MOV TEMP[2].zw, TEMP[3].yyxy 17: MOV TEMP[3].x, CONST[10].xxxx 18: MOV TEMP[3].y, CONST[11].xxxx 19: MOV TEMP[3].z, CONST[12].xxxx 20: MOV TEMP[4].x, CONST[10].yyyy 21: MOV TEMP[4].y, CONST[11].yyyy 22: MOV TEMP[4].z, CONST[12].yyyy 23: MOV TEMP[5].x, CONST[10].zzzz 24: MOV TEMP[5].y, CONST[11].zzzz 25: MOV TEMP[5].z, CONST[12].zzzz 26: MUL TEMP[3].xyz, TEMP[3].xyzz, IN[1].xxxx 27: MAD TEMP[3].xyz, TEMP[4].xyzz, IN[1].yyyy, TEMP[3].xyzz 28: MAD TEMP[3].xyz, TEMP[5].xyzz, IN[1].zzzz, TEMP[3].xyzz 29: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz 30: RSQ TEMP[4].x, TEMP[4].xxxx 31: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx 32: MUL TEMP[4].xyz, CONST[6].xyzz, IN[4].xxxx 33: MAD TEMP[4].xyz, CONST[7].xyzz, IN[4].yyyy, TEMP[4].xyzz 34: MAD TEMP[4].xyz, CONST[8].xyzz, IN[4].zzzz, TEMP[4].xyzz 35: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz 36: RSQ TEMP[5].x, TEMP[5].xxxx 37: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx 38: MUL TEMP[5].xyz, TEMP[3].zxyy, TEMP[4].yzxx 39: MAD TEMP[5].xyz, TEMP[3].yzxx, TEMP[4].zxyy, -TEMP[5].xyzz 40: MUL TEMP[5].xyz, TEMP[5].xyzz, IN[4].wwww 41: MOV TEMP[4].xyz, TEMP[4].xyzx 42: MOV TEMP[5].xyz, TEMP[5].xyzx 43: MOV TEMP[6].xyz, TEMP[3].xyzx 44: MUL TEMP[7].xyw, TEMP[1], IMM[0].yyyy 45: MOV TEMP[8].x, TEMP[7].xxxx 46: MUL TEMP[9].x, TEMP[7].yyyy, CONST[1].xxxx 47: MOV TEMP[8].y, TEMP[9].xxxx 48: ADD TEMP[7].xy, TEMP[8].xyyy, TEMP[7].wwww 49: MOV TEMP[7].zw, TEMP[1].wwzw 50: MUL TEMP[8], TEMP[3].xyzz, TEMP[3].yzzx 51: DP4 TEMP[9].x, CONST[2], TEMP[8] 52: DP4 TEMP[10].x, CONST[3], TEMP[8] 53: MOV TEMP[9].y, TEMP[10].xxxx 54: DP4 TEMP[8].x, CONST[4], TEMP[8] 55: MOV TEMP[9].z, TEMP[8].xxxx 56: MUL TEMP[8].x, TEMP[3].yyyy, TEMP[3].yyyy 57: MAD TEMP[3].x, TEMP[3].xxxx, TEMP[3].xxxx, -TEMP[8].xxxx 58: MAD TEMP[3].xyz, CONST[5].xyzz, TEMP[3].xxxx, TEMP[9].xyzz 59: ADD TEMP[8].xyz, TEMP[0].xyzz, -CONST[0].xyzz 60: MOV TEMP[8].yzw, TEMP[8].yxyz 61: MOV TEMP[8].x, TEMP[1].zzzz 62: MOV TEMP[0].xyz, TEMP[0].xyzx 63: MOV OUT[8], TEMP[0] 64: MOV OUT[1], TEMP[2] 65: MOV OUT[3], TEMP[5] 66: MOV OUT[2], TEMP[4] 67: MOV OUT[4], TEMP[6] 68: MOV OUT[5], TEMP[3] 69: MOV OUT[6], TEMP[7] 70: MOV OUT[0], TEMP[1] 71: MOV OUT[7], TEMP[8] 72: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248) %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 252) %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256) %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272) %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276) %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280) %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284) %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288) %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292) %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296) %72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300) %73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304) %74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308) %75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312) %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316) %77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320) %78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 324) %79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 328) %80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 332) %81 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %82 = load <16 x i8>, <16 x i8> addrspace(2)* %81, align 16, !tbaa !0 %83 = add i32 %5, %7 %84 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %82, i32 0, i32 %83) %85 = extractelement <4 x float> %84, i32 0 %86 = extractelement <4 x float> %84, i32 1 %87 = extractelement <4 x float> %84, i32 2 %88 = extractelement <4 x float> %84, i32 3 %89 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %90 = load <16 x i8>, <16 x i8> addrspace(2)* %89, align 16, !tbaa !0 %91 = add i32 %5, %7 %92 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %90, i32 0, i32 %91) %93 = extractelement <4 x float> %92, i32 0 %94 = extractelement <4 x float> %92, i32 1 %95 = extractelement <4 x float> %92, i32 2 %96 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %97 = load <16 x i8>, <16 x i8> addrspace(2)* %96, align 16, !tbaa !0 %98 = add i32 %5, %7 %99 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %97, i32 0, i32 %98) %100 = extractelement <4 x float> %99, i32 0 %101 = extractelement <4 x float> %99, i32 1 %102 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %103 = load <16 x i8>, <16 x i8> addrspace(2)* %102, align 16, !tbaa !0 %104 = add i32 %5, %7 %105 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %103, i32 0, i32 %104) %106 = extractelement <4 x float> %105, i32 0 %107 = extractelement <4 x float> %105, i32 1 %108 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4 %109 = load <16 x i8>, <16 x i8> addrspace(2)* %108, align 16, !tbaa !0 %110 = add i32 %5, %7 %111 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %109, i32 0, i32 %110) %112 = extractelement <4 x float> %111, i32 0 %113 = extractelement <4 x float> %111, i32 1 %114 = extractelement <4 x float> %111, i32 2 %115 = extractelement <4 x float> %111, i32 3 %116 = fmul float %32, %85 %117 = fmul float %33, %85 %118 = fmul float %34, %85 %119 = fmul float %35, %85 %120 = fmul float %36, %86 %121 = fadd float %120, %116 %122 = fmul float %37, %86 %123 = fadd float %122, %117 %124 = fmul float %38, %86 %125 = fadd float %124, %118 %126 = fmul float %39, %86 %127 = fadd float %126, %119 %128 = fmul float %40, %87 %129 = fadd float %128, %121 %130 = fmul float %41, %87 %131 = fadd float %130, %123 %132 = fmul float %42, %87 %133 = fadd float %132, %125 %134 = fmul float %43, %87 %135 = fadd float %134, %127 %136 = fmul float %44, %88 %137 = fadd float %136, %129 %138 = fmul float %45, %88 %139 = fadd float %138, %131 %140 = fmul float %46, %88 %141 = fadd float %140, %133 %142 = fmul float %65, %85 %143 = fmul float %66, %85 %144 = fmul float %67, %85 %145 = fmul float %68, %85 %146 = fmul float %69, %86 %147 = fadd float %146, %142 %148 = fmul float %70, %86 %149 = fadd float %148, %143 %150 = fmul float %71, %86 %151 = fadd float %150, %144 %152 = fmul float %72, %86 %153 = fadd float %152, %145 %154 = fmul float %73, %87 %155 = fadd float %154, %147 %156 = fmul float %74, %87 %157 = fadd float %156, %149 %158 = fmul float %75, %87 %159 = fadd float %158, %151 %160 = fmul float %76, %87 %161 = fadd float %160, %153 %162 = fmul float %77, %88 %163 = fadd float %162, %155 %164 = fmul float %78, %88 %165 = fadd float %164, %157 %166 = fmul float %79, %88 %167 = fadd float %166, %159 %168 = fmul float %80, %88 %169 = fadd float %168, %161 %170 = fmul float %100, %56 %171 = fadd float %170, %58 %172 = fmul float %101, %57 %173 = fadd float %172, %59 %174 = fcmp oeq float %64, 0.000000e+00 %. = select i1 %174, float %100, float %106 %.44 = select i1 %174, float %101, float %107 %175 = fmul float %., %60 %176 = fadd float %175, %62 %177 = fmul float %.44, %61 %178 = fadd float %177, %63 %179 = fmul float %47, %93 %180 = fmul float %50, %93 %181 = fmul float %53, %93 %182 = fmul float %48, %94 %183 = fadd float %182, %179 %184 = fmul float %51, %94 %185 = fadd float %184, %180 %186 = fmul float %54, %94 %187 = fadd float %186, %181 %188 = fmul float %49, %95 %189 = fadd float %188, %183 %190 = fmul float %52, %95 %191 = fadd float %190, %185 %192 = fmul float %55, %95 %193 = fadd float %192, %187 %194 = fmul float %189, %189 %195 = fmul float %191, %191 %196 = fadd float %195, %194 %197 = fmul float %193, %193 %198 = fadd float %196, %197 %199 = call float @llvm.AMDGPU.rsq.clamped.f32(float %198) %200 = fmul float %189, %199 %201 = fmul float %191, %199 %202 = fmul float %193, %199 %203 = fmul float %32, %112 %204 = fmul float %33, %112 %205 = fmul float %34, %112 %206 = fmul float %36, %113 %207 = fadd float %206, %203 %208 = fmul float %37, %113 %209 = fadd float %208, %204 %210 = fmul float %38, %113 %211 = fadd float %210, %205 %212 = fmul float %40, %114 %213 = fadd float %212, %207 %214 = fmul float %41, %114 %215 = fadd float %214, %209 %216 = fmul float %42, %114 %217 = fadd float %216, %211 %218 = fmul float %213, %213 %219 = fmul float %215, %215 %220 = fadd float %219, %218 %221 = fmul float %217, %217 %222 = fadd float %220, %221 %223 = call float @llvm.AMDGPU.rsq.clamped.f32(float %222) %224 = fmul float %213, %223 %225 = fmul float %215, %223 %226 = fmul float %217, %223 %227 = fmul float %202, %225 %228 = fmul float %200, %226 %229 = fmul float %201, %224 %230 = fmul float %201, %226 %231 = fsub float %230, %227 %232 = fmul float %202, %224 %233 = fsub float %232, %228 %234 = fmul float %200, %225 %235 = fsub float %234, %229 %236 = fmul float %231, %115 %237 = fmul float %233, %115 %238 = fmul float %235, %115 %239 = fmul float %163, 5.000000e-01 %240 = fmul float %165, 5.000000e-01 %241 = fmul float %169, 5.000000e-01 %242 = fmul float %240, %16 %243 = fadd float %239, %241 %244 = fadd float %242, %241 %245 = fmul float %200, %201 %246 = fmul float %201, %202 %247 = fmul float %202, %202 %248 = fmul float %202, %200 %249 = fmul float %17, %245 %250 = fmul float %18, %246 %251 = fadd float %249, %250 %252 = fmul float %19, %247 %253 = fadd float %251, %252 %254 = fmul float %20, %248 %255 = fadd float %253, %254 %256 = fmul float %21, %245 %257 = fmul float %22, %246 %258 = fadd float %256, %257 %259 = fmul float %23, %247 %260 = fadd float %258, %259 %261 = fmul float %24, %248 %262 = fadd float %260, %261 %263 = fmul float %25, %245 %264 = fmul float %26, %246 %265 = fadd float %263, %264 %266 = fmul float %27, %247 %267 = fadd float %265, %266 %268 = fmul float %28, %248 %269 = fadd float %267, %268 %270 = fmul float %201, %201 %271 = fmul float %200, %200 %272 = fsub float %271, %270 %273 = fmul float %29, %272 %274 = fadd float %273, %255 %275 = fmul float %30, %272 %276 = fadd float %275, %262 %277 = fmul float %31, %272 %278 = fadd float %277, %269 %279 = fsub float %137, %13 %280 = fsub float %139, %14 %281 = fsub float %141, %15 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %171, float %173, float %176, float %178) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %224, float %225, float %226, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %236, float %237, float %238, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %200, float %201, float %202, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %274, float %276, float %278, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %243, float %244, float %167, float %169) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %167, float %279, float %280, float %281) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 39, i32 0, float %137, float %139, float %141, float %135) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %163, float %165, float %167, float %169) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0 ; 7E020280 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[28:31], s[2:3], 0x0 ; C08E0300 s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 s_load_dwordx4 s[12:15], s[8:9], 0x8 ; C0860908 s_load_dwordx4 s[16:19], s[8:9], 0xc ; C088090C s_load_dwordx4 s[8:11], s[8:9], 0x10 ; C0840910 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s32, s[28:31], 0x23 ; C2101D23 buffer_load_format_xyzw v[2:5], v0, s[0:3], 0 idxen ; E00C2000 80000200 buffer_load_format_xyzw v[6:9], v0, s[4:7], 0 idxen ; E00C2000 80010600 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[9:12], v0, s[12:15], 0 idxen ; E00C2000 80030900 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[11:14], v0, s[16:19], 0 idxen ; E00C2000 80040B00 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[13:16], v0, s[8:11], 0 idxen ; E00C2000 80020D00 s_buffer_load_dword s33, s[28:31], 0x24 ; C2109D24 s_buffer_load_dword s34, s[28:31], 0x25 ; C2111D25 s_buffer_load_dword s35, s[28:31], 0x26 ; C2119D26 s_buffer_load_dword s36, s[28:31], 0x28 ; C2121D28 s_buffer_load_dword s3, s[28:31], 0x13 ; C2019D13 s_buffer_load_dword s2, s[28:31], 0x14 ; C2011D14 s_buffer_load_dword s0, s[28:31], 0x15 ; C2001D15 s_buffer_load_dword s1, s[28:31], 0x16 ; C2009D16 s_buffer_load_dword s18, s[28:31], 0x18 ; C2091D18 s_buffer_load_dword s37, s[28:31], 0x29 ; C2129D29 s_buffer_load_dword s38, s[28:31], 0x2a ; C2131D2A s_buffer_load_dword s39, s[28:31], 0x2c ; C2139D2C s_buffer_load_dword s40, s[28:31], 0x2d ; C2141D2D s_buffer_load_dword s41, s[28:31], 0x2e ; C2149D2E s_buffer_load_dword s20, s[28:31], 0x19 ; C20A1D19 s_buffer_load_dword s19, s[28:31], 0x1a ; C2099D1A s_buffer_load_dword s42, s[28:31], 0x1b ; C2151D1B s_buffer_load_dword s23, s[28:31], 0x1c ; C20B9D1C s_buffer_load_dword s22, s[28:31], 0x1d ; C20B1D1D s_buffer_load_dword s43, s[28:31], 0x30 ; C2159D30 s_buffer_load_dword s44, s[28:31], 0x31 ; C2161D31 s_buffer_load_dword s45, s[28:31], 0x32 ; C2169D32 s_buffer_load_dword s46, s[28:31], 0x38 ; C2171D38 s_buffer_load_dword s47, s[28:31], 0x39 ; C2179D39 s_buffer_load_dword s24, s[28:31], 0x1e ; C20C1D1E s_buffer_load_dword s48, s[28:31], 0x1f ; C2181D1F s_buffer_load_dword s26, s[28:31], 0x20 ; C20D1D20 s_buffer_load_dword s27, s[28:31], 0x21 ; C20D9D21 s_buffer_load_dword s25, s[28:31], 0x22 ; C20C9D22 s_buffer_load_dword s4, s[28:31], 0x3f ; C2021D3F s_buffer_load_dword s5, s[28:31], 0x40 ; C2029D40 s_buffer_load_dword s49, s[28:31], 0x44 ; C2189D44 s_buffer_load_dword s50, s[28:31], 0x45 ; C2191D45 s_buffer_load_dword s51, s[28:31], 0x46 ; C2199D46 s_buffer_load_dword s6, s[28:31], 0x3a ; C2031D3A s_buffer_load_dword s8, s[28:31], 0x3b ; C2041D3B s_buffer_load_dword s52, s[28:31], 0x3c ; C21A1D3C s_buffer_load_dword s53, s[28:31], 0x3d ; C21A9D3D s_buffer_load_dword s14, s[28:31], 0x3e ; C2071D3E s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v0, s4 ; 7E000204 s_buffer_load_dword s13, s[28:31], 0x9 ; C2069D09 v_cmp_eq_f32_e64 vcc, 0, s5 ; D004006A 00000A80 s_buffer_load_dword s9, s[28:31], 0xa ; C2049D0A s_buffer_load_dword s7, s[28:31], 0xb ; C2039D0B s_buffer_load_dword s12, s[28:31], 0xc ; C2061D0C v_mov_b32_e32 v17, s6 ; 7E220206 s_buffer_load_dword s16, s[28:31], 0xd ; C2081D0D v_mov_b32_e32 v18, s8 ; 7E240208 s_buffer_load_dword s11, s[28:31], 0xe ; C2059D0E s_buffer_load_dword s10, s[28:31], 0xf ; C2051D0F s_buffer_load_dword s15, s[28:31], 0x10 ; C2079D10 v_mov_b32_e32 v19, s14 ; 7E26020E s_buffer_load_dword s17, s[28:31], 0x11 ; C2089D11 s_buffer_load_dword s14, s[28:31], 0x12 ; C2071D12 s_buffer_load_dword s54, s[28:31], 0x47 ; C21B1D47 s_buffer_load_dword s55, s[28:31], 0x48 ; C21B9D48 s_buffer_load_dword s56, s[28:31], 0x49 ; C21C1D49 s_buffer_load_dword s57, s[28:31], 0x4a ; C21C9D4A s_buffer_load_dword s58, s[28:31], 0x4b ; C21D1D4B s_buffer_load_dword s4, s[28:31], 0x0 ; C2021D00 s_buffer_load_dword s5, s[28:31], 0x1 ; C2029D01 s_buffer_load_dword s6, s[28:31], 0x2 ; C2031D02 s_buffer_load_dword s8, s[28:31], 0x4 ; C2041D04 s_buffer_load_dword s21, s[28:31], 0x8 ; C20A9D08 s_buffer_load_dword s59, s[28:31], 0x4c ; C21D9D4C s_buffer_load_dword s60, s[28:31], 0x4d ; C21E1D4D s_buffer_load_dword s61, s[28:31], 0x4e ; C21E9D4E s_buffer_load_dword s62, s[28:31], 0x4f ; C21F1D4F s_buffer_load_dword s63, s[28:31], 0x50 ; C21F9D50 s_buffer_load_dword s64, s[28:31], 0x51 ; C2201D51 s_buffer_load_dword s65, s[28:31], 0x52 ; C2209D52 s_buffer_load_dword s28, s[28:31], 0x53 ; C20E1D53 v_mul_f32_e32 v20, s42, v2 ; 1028042A v_mac_f32_e32 v20, s48, v3 ; 3E280630 v_mac_f32_e32 v20, s32, v4 ; 3E280820 v_mac_f32_e32 v17, s46, v9 ; 3E22122E v_mac_f32_e32 v18, s47, v10 ; 3E24142F v_mul_f32_e32 v21, s49, v2 ; 102A0431 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v21, s55, v3 ; 3E2A0637 v_mac_f32_e32 v21, s59, v4 ; 3E2A083B v_mac_f32_e32 v21, s63, v5 ; 3E2A0A3F v_mul_f32_e32 v22, s50, v2 ; 102C0432 v_mac_f32_e32 v22, s56, v3 ; 3E2C0638 v_mac_f32_e32 v22, s60, v4 ; 3E2C083C v_mac_f32_e32 v22, s64, v5 ; 3E2C0A40 v_mul_f32_e32 v23, s51, v2 ; 102E0433 v_mac_f32_e32 v23, s57, v3 ; 3E2E0639 v_mac_f32_e32 v23, s61, v4 ; 3E2E083D v_mac_f32_e32 v23, s65, v5 ; 3E2E0A41 v_mul_f32_e32 v24, s54, v2 ; 10300436 v_mac_f32_e32 v24, s58, v3 ; 3E30063A v_mac_f32_e32 v24, s62, v4 ; 3E30083E v_mac_f32_e32 v24, s28, v5 ; 3E300A1C v_cndmask_b32_e32 v9, v11, v9 ; 0012130B v_cndmask_b32_e32 v10, v12, v10 ; 0014150C v_mul_f32_e32 v11, s36, v6 ; 10160C24 v_mac_f32_e32 v11, s37, v7 ; 3E160E25 v_mul_f32_e32 v12, s39, v6 ; 10180C27 v_mac_f32_e32 v12, s40, v7 ; 3E180E28 v_mul_f32_e32 v6, s43, v6 ; 100C0C2B v_mac_f32_e32 v6, s44, v7 ; 3E0C0E2C v_mac_f32_e32 v11, s38, v8 ; 3E161026 v_mac_f32_e32 v12, s41, v8 ; 3E181029 v_mac_f32_e32 v6, s45, v8 ; 3E0C102D v_mul_f32_e32 v7, s18, v2 ; 100E0412 v_mac_f32_e32 v7, s23, v3 ; 3E0E0617 v_mac_f32_e32 v7, s26, v4 ; 3E0E081A v_mac_f32_e32 v7, s33, v5 ; 3E0E0A21 v_mul_f32_e32 v8, s20, v2 ; 10100414 v_mac_f32_e32 v8, s22, v3 ; 3E100616 v_mac_f32_e32 v8, s27, v4 ; 3E10081B v_mac_f32_e32 v8, s34, v5 ; 3E100A22 v_mul_f32_e32 v2, s19, v2 ; 10040413 v_mac_f32_e32 v2, s24, v3 ; 3E040618 v_mac_f32_e32 v2, s25, v4 ; 3E040819 v_mac_f32_e32 v2, s35, v5 ; 3E040A23 v_mac_f32_e32 v19, s52, v9 ; 3E261234 v_mac_f32_e32 v0, s53, v10 ; 3E001435 exp 15, 32, 0, 0, 0, v17, v18, v19, v0 ; F800020F 00131211 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s18, v13 ; 10001A12 v_mac_f32_e32 v0, s23, v14 ; 3E001C17 v_mul_f32_e32 v3, s20, v13 ; 10061A14 v_mac_f32_e32 v3, s22, v14 ; 3E061C16 v_mul_f32_e32 v4, s19, v13 ; 10081A13 v_mac_f32_e32 v4, s24, v14 ; 3E081C18 v_mac_f32_e32 v0, s26, v15 ; 3E001E1A v_mac_f32_e32 v3, s27, v15 ; 3E061E1B v_mac_f32_e32 v4, s25, v15 ; 3E081E19 v_mul_f32_e32 v5, v11, v11 ; 100A170B v_mac_f32_e32 v5, v12, v12 ; 3E0A190C v_mac_f32_e32 v5, v6, v6 ; 3E0A0D06 v_rsq_clamp_f32_e32 v5, v5 ; 7E0A5905 v_mul_f32_e32 v9, v0, v0 ; 10120100 v_mac_f32_e32 v9, v3, v3 ; 3E120703 v_mac_f32_e32 v9, v4, v4 ; 3E120904 v_rsq_clamp_f32_e32 v9, v9 ; 7E125909 v_mul_f32_e32 v10, v5, v11 ; 10141705 v_mul_f32_e32 v11, v5, v12 ; 10161905 v_mul_f32_e32 v5, v5, v6 ; 100A0D05 v_mul_f32_e32 v0, v9, v0 ; 10000109 v_mul_f32_e32 v3, v9, v3 ; 10060709 v_mul_f32_e32 v4, v9, v4 ; 10080909 v_mul_f32_e32 v6, v3, v5 ; 100C0B03 v_mad_f32 v6, v11, v4, -v6 ; D2820006 841A090B v_mul_f32_e32 v9, v4, v10 ; 10121504 v_mad_f32 v9, v5, v0, -v9 ; D2820009 84260105 v_mul_f32_e32 v12, v0, v11 ; 10181700 v_mad_f32 v12, v10, v3, -v12 ; D282000C 8432070A v_mul_f32_e32 v6, v16, v6 ; 100C0D10 v_mul_f32_e32 v9, v16, v9 ; 10121310 v_mul_f32_e32 v12, v16, v12 ; 10181910 exp 15, 33, 0, 0, 0, v0, v3, v4, v1 ; F800021F 01040300 exp 15, 34, 0, 0, 0, v6, v9, v12, v1 ; F800022F 010C0906 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, v5, v11 ; 10001705 v_mul_f32_e32 v3, s13, v0 ; 1006000D v_mul_f32_e32 v4, s16, v0 ; 10080010 v_mul_f32_e32 v0, s17, v0 ; 10000011 v_mul_f32_e32 v6, v11, v10 ; 100C150B v_mac_f32_e32 v3, s21, v6 ; 3E060C15 v_mac_f32_e32 v4, s12, v6 ; 3E080C0C v_mac_f32_e32 v0, s15, v6 ; 3E000C0F v_mul_f32_e32 v6, v5, v5 ; 100C0B05 v_mac_f32_e32 v3, s9, v6 ; 3E060C09 v_mac_f32_e32 v4, s11, v6 ; 3E080C0B v_mac_f32_e32 v0, s14, v6 ; 3E000C0E v_mul_f32_e32 v6, v10, v5 ; 100C0B0A v_mac_f32_e32 v3, s7, v6 ; 3E060C07 v_mac_f32_e32 v4, s10, v6 ; 3E080C0A v_mac_f32_e32 v0, s3, v6 ; 3E000C03 exp 15, 35, 0, 0, 0, v10, v11, v5, v1 ; F800023F 01050B0A s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v5, v11, v11 ; 100A170B v_mad_f32 v5, v10, v10, -v5 ; D2820005 8416150A v_mac_f32_e32 v3, s2, v5 ; 3E060A02 v_mac_f32_e32 v4, s0, v5 ; 3E080A00 v_mac_f32_e32 v0, s1, v5 ; 3E000A01 v_mul_f32_e32 v5, 0.5, v22 ; 100A2CF0 v_mul_f32_e32 v6, 0.5, v24 ; 100C30F0 exp 15, 36, 0, 0, 0, v3, v4, v0, v1 ; F800024F 01000403 s_waitcnt expcnt(0) ; BF8C070F v_mad_f32 v0, 0.5, v21, v6 ; D2820000 041A2AF0 v_mac_f32_e32 v6, s8, v5 ; 3E0C0A08 exp 15, 37, 0, 0, 0, v0, v6, v23, v24 ; F800025F 18170600 s_waitcnt expcnt(0) ; BF8C070F v_subrev_f32_e32 v0, s4, v7 ; 0A000E04 v_subrev_f32_e32 v1, s5, v8 ; 0A021005 v_subrev_f32_e32 v3, s6, v2 ; 0A060406 exp 15, 38, 0, 0, 0, v23, v0, v1, v3 ; F800026F 03010017 exp 15, 39, 0, 0, 0, v7, v8, v2, v20 ; F800027F 14020807 exp 15, 12, 0, 1, 0, v21, v22, v23, v24 ; F80008CF 18171615 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 72 VGPRS: 28 Code Size: 932 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL IN[4], GENERIC[4], PERSPECTIVE DCL IN[5], GENERIC[5], PERSPECTIVE DCL IN[6], GENERIC[6], PERSPECTIVE DCL IN[7], GENERIC[7], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SVIEW[0], CUBE, FLOAT DCL SVIEW[1], CUBE, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL SVIEW[4], 2D, FLOAT DCL SVIEW[5], 2D, FLOAT DCL CONST[0..5] DCL CONST[8..19] DCL CONST[22..24] DCL CONST[26] DCL TEMP[0..18], LOCAL IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 0.0000} IMM[1] FLT32 { 0.5000, 0.7500, 7.0000, 1.0000} IMM[2] FLT32 { 10.0000, 0.9680, 0.0300, 0.0001} 0: MOV TEMP[0].x, IN[1].xxxx 1: MOV TEMP[0].y, IN[2].xxxx 2: MOV TEMP[0].z, IN[3].xxxx 3: MOV TEMP[1].x, IN[1].yyyy 4: MOV TEMP[1].y, IN[2].yyyy 5: MOV TEMP[1].z, IN[3].yyyy 6: MOV TEMP[2].x, IN[1].zzzz 7: MOV TEMP[2].y, IN[2].zzzz 8: MOV TEMP[2].z, IN[3].zzzz 9: MOV TEMP[3].xy, IN[0].xyyy 10: TEX TEMP[3].yw, TEMP[3], SAMP[3], 2D 11: MAD TEMP[3].xy, TEMP[3].wyyy, IMM[0].xxxx, IMM[0].yyyy 12: MUL TEMP[3].xy, TEMP[3].xyyy, CONST[22].xxxx 13: DP2 TEMP[4].x, TEMP[3].xyyy, TEMP[3].xyyy 14: MOV_SAT TEMP[4].x, TEMP[4].xxxx 15: ADD TEMP[4].x, IMM[0].zzzz, -TEMP[4].xxxx 16: SQRT TEMP[4].x, TEMP[4].xxxx 17: MOV TEMP[3].z, TEMP[4].xxxx 18: DP3 TEMP[0].x, TEMP[3].xyzz, TEMP[0].xyzz 19: DP3 TEMP[1].x, TEMP[3].xyzz, TEMP[1].xyzz 20: MOV TEMP[0].y, TEMP[1].xxxx 21: DP3 TEMP[1].x, TEMP[3].xyzz, TEMP[2].xyzz 22: MOV TEMP[0].z, TEMP[1].xxxx 23: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[0].xyzz 24: RSQ TEMP[1].x, TEMP[1].xxxx 25: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xxxx 26: DP3 TEMP[1].x, IN[6].yzww, IN[6].yzww 27: RSQ TEMP[1].x, TEMP[1].xxxx 28: MUL TEMP[1].xyz, IN[6].yzww, TEMP[1].xxxx 29: MOV TEMP[2].xy, IN[0].xyyy 30: TEX TEMP[2].xyz, TEMP[2], SAMP[2], 2D 31: MUL TEMP[2].xyz, CONST[19].xyzz, TEMP[2].xyzz 32: LRP TEMP[3].xyz, CONST[23].xxxx, TEMP[2].xyzz, CONST[16].xyzz 33: MUL TEMP[4].x, CONST[23].xxxx, CONST[16].wwww 34: ADD TEMP[4].x, CONST[16].wwww, -TEMP[4].xxxx 35: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[4].xxxx 36: MOV TEMP[5].xy, IN[0].xyyy 37: TEX TEMP[5].y, TEMP[5], SAMP[4], 2D 38: ADD TEMP[6].x, IMM[0].zzzz, -CONST[26].xxxx 39: MAD TEMP[5].x, TEMP[5].yyyy, CONST[26].xxxx, TEMP[6].xxxx 40: DP3 TEMP[6].x, TEMP[0].xyzz, CONST[0].xyzz 41: MAX TEMP[6].x, IMM[0].wwww, TEMP[6].xxxx 42: MOV TEMP[7].xyz, IMM[0].wwww 43: MOV TEMP[8].w, IMM[0].zzzz 44: MOV TEMP[8].xyz, TEMP[0].xyzx 45: DP4 TEMP[9].x, CONST[1], TEMP[8] 46: DP4 TEMP[10].x, CONST[2], TEMP[8] 47: MOV TEMP[9].y, TEMP[10].xxxx 48: DP4 TEMP[8].x, CONST[3], TEMP[8] 49: MOV TEMP[9].z, TEMP[8].xxxx 50: ADD TEMP[8].xyz, IN[4].xyzz, TEMP[9].xyzz 51: MOV TEMP[9].xy, IN[5].xyyy 52: MOV TEMP[9].w, IN[5].wwww 53: TXP TEMP[9].x, TEMP[9], SAMP[5], 2D 54: MUL TEMP[9].xyz, CONST[17].xyzz, TEMP[9].xxxx 55: MUL TEMP[8].xyz, TEMP[8].xyzz, TEMP[5].xxxx 56: DP3 TEMP[10].x, TEMP[0].xyzz, TEMP[1].xyzz 57: MUL TEMP[10].xyz, TEMP[10].xxxx, TEMP[0].xyzz 58: MUL TEMP[10].xyz, IMM[0].xxxx, TEMP[10].xyzz 59: ADD TEMP[10].xyz, TEMP[1].xyzz, -TEMP[10].xyzz 60: MOV TEMP[11].xyz, TEMP[10].xyzx 61: FSLT TEMP[12].x, IMM[0].wwww, CONST[10].wwww 62: UIF TEMP[12].xxxx :0 63: DP3 TEMP[12].x, TEMP[10].xyzz, TEMP[10].xyzz 64: RSQ TEMP[12].x, TEMP[12].xxxx 65: MUL TEMP[12].xyz, TEMP[10].xyzz, TEMP[12].xxxx 66: MOV TEMP[13].xyz, -IN[7].xyzx 67: ADD TEMP[14].xyz, CONST[8].xyzz, TEMP[13].xyzz 68: RCP TEMP[15].x, TEMP[12].xxxx 69: RCP TEMP[15].y, TEMP[12].yyyy 70: RCP TEMP[15].z, TEMP[12].zzzz 71: MUL TEMP[14].xyz, TEMP[14].xyzz, TEMP[15].xyzz 72: ADD TEMP[13].xyz, CONST[9].xyzz, TEMP[13].xyzz 73: RCP TEMP[15].x, TEMP[12].xxxx 74: RCP TEMP[15].y, TEMP[12].yyyy 75: RCP TEMP[15].z, TEMP[12].zzzz 76: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[15].xyzz 77: FSLT TEMP[15].xyz, IMM[0].wwww, TEMP[12].xyzz 78: UIF TEMP[15].xxxx :0 79: MOV TEMP[16].x, TEMP[14].xxxx 80: ELSE :0 81: MOV TEMP[16].x, TEMP[13].xxxx 82: ENDIF 83: UIF TEMP[15].yyyy :0 84: MOV TEMP[17].x, TEMP[14].yyyy 85: ELSE :0 86: MOV TEMP[17].x, TEMP[13].yyyy 87: ENDIF 88: UIF TEMP[15].zzzz :0 89: MOV TEMP[14].x, TEMP[14].zzzz 90: ELSE :0 91: MOV TEMP[14].x, TEMP[13].zzzz 92: ENDIF 93: ADD TEMP[13].xyz, CONST[8].xyzz, CONST[9].xyzz 94: MUL TEMP[13].xyz, TEMP[13].xyzz, IMM[1].xxxx 95: MIN TEMP[15].x, TEMP[16].xxxx, TEMP[17].xxxx 96: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[14].xxxx 97: ADD TEMP[15].xyz, TEMP[13].xyzz, -CONST[10].xyzz 98: ADD TEMP[15].xyz, TEMP[15].xyzz, IN[7].xyzz 99: MAD TEMP[12].xyz, TEMP[12].xyzz, TEMP[14].xxxx, TEMP[15].xyzz 100: ADD TEMP[11].xyz, TEMP[12].xyzz, -TEMP[13].xyzz 101: ENDIF 102: ADD TEMP[12].x, IMM[0].zzzz, -CONST[24].xxxx 103: POW TEMP[12].x, TEMP[12].xxxx, IMM[1].yyyy 104: MUL TEMP[12].x, TEMP[12].xxxx, IMM[1].zzzz 105: MOV TEMP[11].xyz, TEMP[11].xyzz 106: MOV TEMP[11].w, TEMP[12].xxxx 107: TXL TEMP[11], TEMP[11], SAMP[0], CUBE 108: POW TEMP[12].x, TEMP[11].wwww, CONST[11].yyyy 109: MUL TEMP[12].x, CONST[11].xxxx, TEMP[12].xxxx 110: MUL TEMP[11].xyz, TEMP[12].xxxx, TEMP[11].xyzz 111: FSLT TEMP[12].x, CONST[9].wwww, IMM[1].wwww 112: UIF TEMP[12].xxxx :0 113: MOV TEMP[12].xyz, TEMP[10].xyzx 114: FSLT TEMP[13].x, IMM[0].wwww, CONST[14].wwww 115: UIF TEMP[13].xxxx :0 116: DP3 TEMP[13].x, TEMP[10].xyzz, TEMP[10].xyzz 117: RSQ TEMP[13].x, TEMP[13].xxxx 118: MUL TEMP[10].xyz, TEMP[10].xyzz, TEMP[13].xxxx 119: MOV TEMP[13].xyz, -IN[7].xyzx 120: ADD TEMP[14].xyz, CONST[12].xyzz, TEMP[13].xyzz 121: RCP TEMP[15].x, TEMP[10].xxxx 122: RCP TEMP[15].y, TEMP[10].yyyy 123: RCP TEMP[15].z, TEMP[10].zzzz 124: MUL TEMP[14].xyz, TEMP[14].xyzz, TEMP[15].xyzz 125: ADD TEMP[13].xyz, CONST[13].xyzz, TEMP[13].xyzz 126: RCP TEMP[15].x, TEMP[10].xxxx 127: RCP TEMP[15].y, TEMP[10].yyyy 128: RCP TEMP[15].z, TEMP[10].zzzz 129: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[15].xyzz 130: FSLT TEMP[15].xyz, IMM[0].wwww, TEMP[10].xyzz 131: UIF TEMP[15].xxxx :0 132: MOV TEMP[16].x, TEMP[14].xxxx 133: ELSE :0 134: MOV TEMP[16].x, TEMP[13].xxxx 135: ENDIF 136: UIF TEMP[15].yyyy :0 137: MOV TEMP[17].x, TEMP[14].yyyy 138: ELSE :0 139: MOV TEMP[17].x, TEMP[13].yyyy 140: ENDIF 141: UIF TEMP[15].zzzz :0 142: MOV TEMP[14].x, TEMP[14].zzzz 143: ELSE :0 144: MOV TEMP[14].x, TEMP[13].zzzz 145: ENDIF 146: ADD TEMP[13].xyz, CONST[12].xyzz, CONST[13].xyzz 147: MUL TEMP[13].xyz, TEMP[13].xyzz, IMM[1].xxxx 148: MIN TEMP[15].x, TEMP[16].xxxx, TEMP[17].xxxx 149: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[14].xxxx 150: ADD TEMP[15].xyz, TEMP[13].xyzz, -CONST[14].xyzz 151: ADD TEMP[15].xyz, TEMP[15].xyzz, IN[7].xyzz 152: MAD TEMP[10].xyz, TEMP[10].xyzz, TEMP[14].xxxx, TEMP[15].xyzz 153: ADD TEMP[12].xyz, TEMP[10].xyzz, -TEMP[13].xyzz 154: ENDIF 155: ADD TEMP[10].x, IMM[0].zzzz, -CONST[24].xxxx 156: POW TEMP[10].x, TEMP[10].xxxx, IMM[1].yyyy 157: MUL TEMP[10].x, TEMP[10].xxxx, IMM[1].zzzz 158: MOV TEMP[12].xyz, TEMP[12].xyzz 159: MOV TEMP[12].w, TEMP[10].xxxx 160: TXL TEMP[10], TEMP[12], SAMP[1], CUBE 161: POW TEMP[12].x, TEMP[10].wwww, CONST[15].yyyy 162: MUL TEMP[12].x, CONST[15].xxxx, TEMP[12].xxxx 163: MUL TEMP[10].xyz, TEMP[12].xxxx, TEMP[10].xyzz 164: LRP TEMP[7].xyz, CONST[9].wwww, TEMP[11].xyzz, TEMP[10].xyzz 165: ELSE :0 166: MOV TEMP[7].xyz, TEMP[11].xyzx 167: ENDIF 168: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[5].xxxx 169: MOV TEMP[1].xyz, -TEMP[1].xyzx 170: ADD TEMP[5].x, IMM[0].zzzz, -CONST[24].xxxx 171: ADD TEMP[10].xyz, CONST[0].xyzz, TEMP[1].xyzz 172: DP3 TEMP[11].x, TEMP[10].xyzz, TEMP[10].xyzz 173: RSQ TEMP[11].x, TEMP[11].xxxx 174: MUL TEMP[10].xyz, TEMP[10].xyzz, TEMP[11].xxxx 175: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[1].xyzz 176: MAX TEMP[1].x, IMM[0].wwww, TEMP[1].xxxx 177: DP3 TEMP[11].x, CONST[0].xyzz, TEMP[10].xyzz 178: MAX TEMP[11].x, IMM[0].wwww, TEMP[11].xxxx 179: MUL TEMP[12].x, TEMP[5].xxxx, TEMP[5].xxxx 180: MUL TEMP[12].x, TEMP[12].xxxx, CONST[18].wwww 181: ADD TEMP[13].x, IMM[0].zzzz, -TEMP[5].xxxx 182: MAD TEMP[13].x, TEMP[13].xxxx, IMM[2].yyyy, IMM[2].zzzz 183: LG2 TEMP[13].x, TEMP[13].xxxx 184: RCP TEMP[13].x, TEMP[13].xxxx 185: MUL TEMP[13].x, IMM[2].xxxx, TEMP[13].xxxx 186: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[13].xxxx 187: ADD TEMP[14].x, IMM[0].zzzz, -TEMP[6].xxxx 188: ADD TEMP[15].x, IMM[0].zzzz, -TEMP[1].xxxx 189: MUL TEMP[16].x, IMM[0].xxxx, TEMP[11].xxxx 190: MUL TEMP[5].x, TEMP[11].xxxx, TEMP[5].xxxx 191: MAD TEMP[5].x, TEMP[16].xxxx, TEMP[5].xxxx, IMM[1].xxxx 192: ADD TEMP[11].x, IMM[0].zzzz, -TEMP[11].xxxx 193: ADD TEMP[16].x, IMM[0].zzzz, -TEMP[1].xxxx 194: ADD TEMP[4].x, IMM[0].zzzz, -TEMP[4].xxxx 195: ADD TEMP[4].x, CONST[24].xxxx, TEMP[4].xxxx 196: MOV_SAT TEMP[4].x, TEMP[4].xxxx 197: MUL TEMP[17].x, TEMP[16].xxxx, TEMP[16].xxxx 198: MUL TEMP[18].x, TEMP[16].xxxx, TEMP[16].xxxx 199: MUL TEMP[16].x, TEMP[18].xxxx, TEMP[16].xxxx 200: MUL TEMP[16].x, TEMP[17].xxxx, TEMP[16].xxxx 201: LRP TEMP[4].xyz, TEMP[16].xxxx, TEMP[4].xxxx, TEMP[3].xyzz 202: LRP TEMP[16].x, TEMP[6].xxxx, IMM[0].zzzz, TEMP[12].xxxx 203: LRP TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz, TEMP[12].xxxx 204: MAD TEMP[1].x, TEMP[16].xxxx, TEMP[1].xxxx, IMM[2].wwww 205: RCP TEMP[1].x, TEMP[1].xxxx 206: DP3 TEMP[10].x, TEMP[0].xyzz, TEMP[10].xyzz 207: MAX TEMP[10].x, IMM[0].wwww, TEMP[10].xxxx 208: POW TEMP[10].x, TEMP[10].xxxx, TEMP[13].xxxx 209: ADD TEMP[12].x, TEMP[13].xxxx, IMM[0].zzzz 210: MUL TEMP[12].x, TEMP[12].xxxx, CONST[18].yyyy 211: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[12].xxxx 212: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[10].xxxx 213: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[6].xxxx 214: MUL TEMP[1].x, TEMP[1].xxxx, CONST[18].xxxx 215: MAX TEMP[1].x, IMM[0].wwww, TEMP[1].xxxx 216: MUL TEMP[1].xyz, TEMP[1].xxxx, TEMP[9].xyzz 217: ADD TEMP[10].xyz, IMM[0].zzzz, -TEMP[3].xyzz 218: MUL TEMP[12].x, TEMP[11].xxxx, TEMP[11].xxxx 219: MUL TEMP[13].x, TEMP[11].xxxx, TEMP[11].xxxx 220: MUL TEMP[11].x, TEMP[13].xxxx, TEMP[11].xxxx 221: MUL TEMP[11].x, TEMP[12].xxxx, TEMP[11].xxxx 222: MAD TEMP[3].xyz, TEMP[10].xyzz, TEMP[11].xxxx, TEMP[3].xyzz 223: ADD TEMP[10].x, TEMP[5].xxxx, IMM[0].yyyy 224: MUL TEMP[11].x, TEMP[14].xxxx, TEMP[14].xxxx 225: MUL TEMP[12].x, TEMP[14].xxxx, TEMP[14].xxxx 226: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[14].xxxx 227: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[12].xxxx 228: MAD TEMP[10].x, TEMP[10].xxxx, TEMP[11].xxxx, IMM[0].zzzz 229: ADD TEMP[5].x, TEMP[5].xxxx, IMM[0].yyyy 230: MUL TEMP[11].x, TEMP[15].xxxx, TEMP[15].xxxx 231: MUL TEMP[12].x, TEMP[15].xxxx, TEMP[15].xxxx 232: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[15].xxxx 233: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[12].xxxx 234: MAD TEMP[5].x, TEMP[5].xxxx, TEMP[11].xxxx, IMM[0].zzzz 235: MUL TEMP[5].x, TEMP[10].xxxx, TEMP[5].xxxx 236: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[6].xxxx 237: MAD TEMP[5].xyz, TEMP[9].xyzz, TEMP[5].xxxx, TEMP[8].xyzz 238: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[5].xyzz 239: MAD TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xyzz, TEMP[2].xyzz 240: MAD TEMP[0].xyz, TEMP[7].xyzz, TEMP[4].xyzz, TEMP[1].xyzz 241: MOV TEMP[0].xyz, TEMP[0].xyzx 242: MAD TEMP[1].x, IN[6].xxxx, CONST[5].zzzz, CONST[5].wwww 243: MOV_SAT TEMP[1].x, TEMP[1].xxxx 244: LRP TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[4].xyzz 245: MOV TEMP[0].xyz, TEMP[0].xyzx 246: MOV TEMP[0].w, IMM[0].zzzz 247: MOV OUT[0], TEMP[0] 248: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 172) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200) %57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208) %58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212) %59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216) %60 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224) %61 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228) %62 = call float @llvm.SI.load.const(<16 x i8> %23, i32 232) %63 = call float @llvm.SI.load.const(<16 x i8> %23, i32 236) %64 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240) %65 = call float @llvm.SI.load.const(<16 x i8> %23, i32 244) %66 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256) %67 = call float @llvm.SI.load.const(<16 x i8> %23, i32 260) %68 = call float @llvm.SI.load.const(<16 x i8> %23, i32 264) %69 = call float @llvm.SI.load.const(<16 x i8> %23, i32 268) %70 = call float @llvm.SI.load.const(<16 x i8> %23, i32 272) %71 = call float @llvm.SI.load.const(<16 x i8> %23, i32 276) %72 = call float @llvm.SI.load.const(<16 x i8> %23, i32 280) %73 = call float @llvm.SI.load.const(<16 x i8> %23, i32 288) %74 = call float @llvm.SI.load.const(<16 x i8> %23, i32 292) %75 = call float @llvm.SI.load.const(<16 x i8> %23, i32 300) %76 = call float @llvm.SI.load.const(<16 x i8> %23, i32 304) %77 = call float @llvm.SI.load.const(<16 x i8> %23, i32 308) %78 = call float @llvm.SI.load.const(<16 x i8> %23, i32 312) %79 = call float @llvm.SI.load.const(<16 x i8> %23, i32 352) %80 = call float @llvm.SI.load.const(<16 x i8> %23, i32 368) %81 = call float @llvm.SI.load.const(<16 x i8> %23, i32 384) %82 = call float @llvm.SI.load.const(<16 x i8> %23, i32 416) %83 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %84 = load <32 x i8>, <32 x i8> addrspace(2)* %83, align 32, !tbaa !0 %85 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %86 = load <16 x i8>, <16 x i8> addrspace(2)* %85, align 16, !tbaa !0 %87 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %88 = bitcast <8 x i32> addrspace(2)* %87 to <32 x i8> addrspace(2)* %89 = load <32 x i8>, <32 x i8> addrspace(2)* %88, align 32, !tbaa !0 %90 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %91 = bitcast <4 x i32> addrspace(2)* %90 to <16 x i8> addrspace(2)* %92 = load <16 x i8>, <16 x i8> addrspace(2)* %91, align 16, !tbaa !0 %93 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %94 = bitcast <8 x i32> addrspace(2)* %93 to <32 x i8> addrspace(2)* %95 = load <32 x i8>, <32 x i8> addrspace(2)* %94, align 32, !tbaa !0 %96 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %97 = bitcast <4 x i32> addrspace(2)* %96 to <16 x i8> addrspace(2)* %98 = load <16 x i8>, <16 x i8> addrspace(2)* %97, align 16, !tbaa !0 %99 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %100 = bitcast <8 x i32> addrspace(2)* %99 to <32 x i8> addrspace(2)* %101 = load <32 x i8>, <32 x i8> addrspace(2)* %100, align 32, !tbaa !0 %102 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %103 = bitcast <4 x i32> addrspace(2)* %102 to <16 x i8> addrspace(2)* %104 = load <16 x i8>, <16 x i8> addrspace(2)* %103, align 16, !tbaa !0 %105 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %106 = bitcast <8 x i32> addrspace(2)* %105 to <32 x i8> addrspace(2)* %107 = load <32 x i8>, <32 x i8> addrspace(2)* %106, align 32, !tbaa !0 %108 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %109 = bitcast <4 x i32> addrspace(2)* %108 to <16 x i8> addrspace(2)* %110 = load <16 x i8>, <16 x i8> addrspace(2)* %109, align 16, !tbaa !0 %111 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5 %112 = bitcast <8 x i32> addrspace(2)* %111 to <32 x i8> addrspace(2)* %113 = load <32 x i8>, <32 x i8> addrspace(2)* %112, align 32, !tbaa !0 %114 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5 %115 = bitcast <4 x i32> addrspace(2)* %114 to <16 x i8> addrspace(2)* %116 = load <16 x i8>, <16 x i8> addrspace(2)* %115, align 16, !tbaa !0 %117 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %118 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %119 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %120 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %121 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %122 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %123 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %124 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %125 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %126 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %127 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %128 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %129 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %130 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %131 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7) %132 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %133 = call float @llvm.SI.fs.interp(i32 3, i32 5, i32 %5, <2 x i32> %7) %134 = call float @llvm.SI.fs.interp(i32 0, i32 6, i32 %5, <2 x i32> %7) %135 = call float @llvm.SI.fs.interp(i32 1, i32 6, i32 %5, <2 x i32> %7) %136 = call float @llvm.SI.fs.interp(i32 2, i32 6, i32 %5, <2 x i32> %7) %137 = call float @llvm.SI.fs.interp(i32 3, i32 6, i32 %5, <2 x i32> %7) %138 = call float @llvm.SI.fs.interp(i32 0, i32 7, i32 %5, <2 x i32> %7) %139 = call float @llvm.SI.fs.interp(i32 1, i32 7, i32 %5, <2 x i32> %7) %140 = call float @llvm.SI.fs.interp(i32 2, i32 7, i32 %5, <2 x i32> %7) %141 = bitcast float %117 to i32 %142 = bitcast float %118 to i32 %143 = insertelement <2 x i32> undef, i32 %141, i32 0 %144 = insertelement <2 x i32> %143, i32 %142, i32 1 %145 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %144, <32 x i8> %101, <16 x i8> %104, i32 2) %146 = extractelement <4 x float> %145, i32 1 %147 = extractelement <4 x float> %145, i32 3 %148 = fmul float %147, 2.000000e+00 %149 = fadd float %148, -1.000000e+00 %150 = fmul float %146, 2.000000e+00 %151 = fadd float %150, -1.000000e+00 %152 = fmul float %149, %79 %153 = fmul float %151, %79 %154 = fmul float %152, %152 %155 = fmul float %153, %153 %156 = fadd float %154, %155 %157 = call float @llvm.AMDIL.clamp.(float %156, float 0.000000e+00, float 1.000000e+00) %158 = fsub float 1.000000e+00, %157 %159 = call float @llvm.sqrt.f32(float %158) %160 = fmul float %152, %119 %161 = fmul float %153, %122 %162 = fadd float %161, %160 %163 = fmul float %159, %125 %164 = fadd float %162, %163 %165 = fmul float %152, %120 %166 = fmul float %153, %123 %167 = fadd float %166, %165 %168 = fmul float %159, %126 %169 = fadd float %167, %168 %170 = fmul float %152, %121 %171 = fmul float %153, %124 %172 = fadd float %171, %170 %173 = fmul float %159, %127 %174 = fadd float %172, %173 %175 = fmul float %164, %164 %176 = fmul float %169, %169 %177 = fadd float %176, %175 %178 = fmul float %174, %174 %179 = fadd float %177, %178 %180 = call float @llvm.AMDGPU.rsq.clamped.f32(float %179) %181 = fmul float %164, %180 %182 = fmul float %169, %180 %183 = fmul float %174, %180 %184 = fmul float %135, %135 %185 = fmul float %136, %136 %186 = fadd float %185, %184 %187 = fmul float %137, %137 %188 = fadd float %186, %187 %189 = call float @llvm.AMDGPU.rsq.clamped.f32(float %188) %190 = fmul float %135, %189 %191 = fmul float %136, %189 %192 = fmul float %137, %189 %193 = bitcast float %117 to i32 %194 = bitcast float %118 to i32 %195 = insertelement <2 x i32> undef, i32 %193, i32 0 %196 = insertelement <2 x i32> %195, i32 %194, i32 1 %197 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %196, <32 x i8> %95, <16 x i8> %98, i32 2) %198 = extractelement <4 x float> %197, i32 0 %199 = extractelement <4 x float> %197, i32 1 %200 = extractelement <4 x float> %197, i32 2 %201 = fmul float %76, %198 %202 = fmul float %77, %199 %203 = fmul float %78, %200 %204 = call float @llvm.AMDGPU.lrp(float %80, float %201, float %66) %205 = call float @llvm.AMDGPU.lrp(float %80, float %202, float %67) %206 = call float @llvm.AMDGPU.lrp(float %80, float %203, float %68) %207 = fmul float %80, %69 %208 = fsub float %69, %207 %209 = fmul float %201, %208 %210 = fmul float %202, %208 %211 = fmul float %203, %208 %212 = bitcast float %117 to i32 %213 = bitcast float %118 to i32 %214 = insertelement <2 x i32> undef, i32 %212, i32 0 %215 = insertelement <2 x i32> %214, i32 %213, i32 1 %216 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %215, <32 x i8> %107, <16 x i8> %110, i32 2) %217 = extractelement <4 x float> %216, i32 1 %218 = fsub float 1.000000e+00, %82 %219 = fmul float %217, %82 %220 = fadd float %219, %218 %221 = fmul float %181, %24 %222 = fmul float %182, %25 %223 = fadd float %222, %221 %224 = fmul float %183, %26 %225 = fadd float %223, %224 %226 = call float @llvm.maxnum.f32(float %225, float 0.000000e+00) %227 = fmul float %27, %181 %228 = fmul float %28, %182 %229 = fadd float %227, %228 %230 = fmul float %29, %183 %231 = fadd float %229, %230 %232 = fadd float %231, %30 %233 = fmul float %31, %181 %234 = fmul float %32, %182 %235 = fadd float %233, %234 %236 = fmul float %33, %183 %237 = fadd float %235, %236 %238 = fadd float %237, %34 %239 = fmul float %35, %181 %240 = fmul float %36, %182 %241 = fadd float %239, %240 %242 = fmul float %37, %183 %243 = fadd float %241, %242 %244 = fadd float %243, %38 %245 = fadd float %128, %232 %246 = fadd float %129, %238 %247 = fadd float %130, %244 %248 = fdiv float %131, %133 %249 = fdiv float %132, %133 %250 = bitcast float %248 to i32 %251 = bitcast float %249 to i32 %252 = insertelement <2 x i32> undef, i32 %250, i32 0 %253 = insertelement <2 x i32> %252, i32 %251, i32 1 %254 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %253, <32 x i8> %113, <16 x i8> %116, i32 2) %255 = extractelement <4 x float> %254, i32 0 %256 = fmul float %70, %255 %257 = fmul float %71, %255 %258 = fmul float %72, %255 %259 = fmul float %245, %220 %260 = fmul float %246, %220 %261 = fmul float %247, %220 %262 = fmul float %181, %190 %263 = fmul float %182, %191 %264 = fadd float %263, %262 %265 = fmul float %183, %192 %266 = fadd float %264, %265 %267 = fmul float %266, %181 %268 = fmul float %266, %182 %269 = fmul float %266, %183 %270 = fmul float %267, 2.000000e+00 %271 = fmul float %268, 2.000000e+00 %272 = fmul float %269, 2.000000e+00 %273 = fsub float %190, %270 %274 = fsub float %191, %271 %275 = fsub float %192, %272 %276 = fcmp ogt float %51, 0.000000e+00 br i1 %276, label %IF, label %ENDIF IF: ; preds = %main_body %277 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %278 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %279 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %280 = fmul float %273, %273 %281 = fmul float %274, %274 %282 = fadd float %281, %280 %283 = fmul float %275, %275 %284 = fadd float %282, %283 %285 = call float @llvm.AMDGPU.rsq.clamped.f32(float %284) %286 = fmul float %273, %285 %287 = fmul float %274, %285 %288 = fmul float %275, %285 %289 = fsub float %44, %138 %290 = fsub float %45, %139 %291 = fsub float %46, %140 %292 = fdiv float 1.000000e+00, %286 %293 = fdiv float 1.000000e+00, %287 %294 = fdiv float 1.000000e+00, %288 %295 = fmul float %289, %292 %296 = fmul float %290, %293 %297 = fmul float %291, %294 %298 = fsub float %47, %138 %299 = fsub float %48, %139 %300 = fsub float %49, %140 %301 = fdiv float 1.000000e+00, %286 %302 = fdiv float 1.000000e+00, %287 %303 = fdiv float 1.000000e+00, %288 %304 = fmul float %298, %301 %305 = fmul float %299, %302 %306 = fmul float %300, %303 %307 = fcmp ogt float %286, 0.000000e+00 %308 = fcmp ogt float %287, 0.000000e+00 %309 = fcmp ogt float %288, 0.000000e+00 %. = select i1 %307, float %295, float %304 %temp68.0 = select i1 %308, float %296, float %305 %.100 = select i1 %309, float %297, float %306 %310 = fadd float %44, %47 %311 = fadd float %45, %48 %312 = fadd float %46, %49 %313 = fmul float %310, 5.000000e-01 %314 = fmul float %311, 5.000000e-01 %315 = fmul float %312, 5.000000e-01 %316 = call float @llvm.minnum.f32(float %., float %temp68.0) %317 = call float @llvm.minnum.f32(float %316, float %.100) %318 = fsub float %313, %279 %319 = fsub float %314, %278 %320 = fsub float %315, %277 %321 = fadd float %318, %138 %322 = fadd float %319, %139 %323 = fadd float %320, %140 %324 = fmul float %286, %317 %325 = fadd float %324, %321 %326 = fmul float %287, %317 %327 = fadd float %326, %322 %328 = fmul float %288, %317 %329 = fadd float %328, %323 %330 = fsub float %325, %313 %331 = fsub float %327, %314 %332 = fsub float %329, %315 br label %ENDIF ENDIF: ; preds = %main_body, %IF %temp44.0 = phi float [ %330, %IF ], [ %273, %main_body ] %temp45.0 = phi float [ %331, %IF ], [ %274, %main_body ] %temp46.0 = phi float [ %332, %IF ], [ %275, %main_body ] %333 = fsub float 1.000000e+00, %81 %334 = call float @llvm.pow.f32(float %333, float 7.500000e-01) %335 = fmul float %334, 7.000000e+00 %336 = insertelement <4 x float> undef, float %temp44.0, i32 0 %337 = insertelement <4 x float> %336, float %temp45.0, i32 1 %338 = insertelement <4 x float> %337, float %temp46.0, i32 2 %339 = insertelement <4 x float> %338, float %335, i32 3 %340 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %339) %341 = extractelement <4 x float> %340, i32 0 %342 = extractelement <4 x float> %340, i32 1 %343 = extractelement <4 x float> %340, i32 2 %344 = extractelement <4 x float> %340, i32 3 %345 = call float @llvm.fabs.f32(float %343) %346 = fdiv float 1.000000e+00, %345 %347 = fmul float %341, %346 %348 = fadd float %347, 1.500000e+00 %349 = fmul float %342, %346 %350 = fadd float %349, 1.500000e+00 %351 = bitcast float %350 to i32 %352 = bitcast float %348 to i32 %353 = bitcast float %344 to i32 %354 = bitcast float %335 to i32 %355 = insertelement <4 x i32> undef, i32 %351, i32 0 %356 = insertelement <4 x i32> %355, i32 %352, i32 1 %357 = insertelement <4 x i32> %356, i32 %353, i32 2 %358 = insertelement <4 x i32> %357, i32 %354, i32 3 %359 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %358, <32 x i8> %84, <16 x i8> %86, i32 4) %360 = extractelement <4 x float> %359, i32 0 %361 = extractelement <4 x float> %359, i32 1 %362 = extractelement <4 x float> %359, i32 2 %363 = extractelement <4 x float> %359, i32 3 %364 = call float @llvm.pow.f32(float %363, float %53) %365 = fmul float %52, %364 %366 = fmul float %365, %360 %367 = fmul float %365, %361 %368 = fmul float %365, %362 %369 = fcmp olt float %50, 0x3FEFFFEB00000000 br i1 %369, label %IF86, label %ENDIF85 IF86: ; preds = %ENDIF %370 = fcmp ogt float %63, 0.000000e+00 br i1 %370, label %IF89, label %ENDIF88 ENDIF85: ; preds = %ENDIF, %ENDIF88 %temp28.0 = phi float [ %598, %ENDIF88 ], [ %366, %ENDIF ] %temp29.0 = phi float [ %599, %ENDIF88 ], [ %367, %ENDIF ] %temp30.0 = phi float [ %600, %ENDIF88 ], [ %368, %ENDIF ] %371 = fmul float %temp28.0, %220 %372 = fmul float %temp29.0, %220 %373 = fmul float %temp30.0, %220 %374 = fsub float 1.000000e+00, %81 %375 = fsub float %24, %190 %376 = fsub float %25, %191 %377 = fsub float %26, %192 %378 = fmul float %375, %375 %379 = fmul float %376, %376 %380 = fadd float %379, %378 %381 = fmul float %377, %377 %382 = fadd float %380, %381 %383 = call float @llvm.AMDGPU.rsq.clamped.f32(float %382) %384 = fmul float %375, %383 %385 = fmul float %376, %383 %386 = fmul float %377, %383 %387 = fmul float %190, %181 %388 = fsub float -0.000000e+00, %387 %389 = fmul float %191, %182 %390 = fsub float %388, %389 %391 = fmul float %192, %183 %392 = fsub float %390, %391 %393 = call float @llvm.maxnum.f32(float %392, float 0.000000e+00) %394 = fmul float %24, %384 %395 = fmul float %25, %385 %396 = fadd float %395, %394 %397 = fmul float %26, %386 %398 = fadd float %396, %397 %399 = call float @llvm.maxnum.f32(float %398, float 0.000000e+00) %400 = fmul float %374, %374 %401 = fmul float %400, %75 %402 = fsub float 1.000000e+00, %374 %403 = fmul float %402, 0x3FEEF9DB20000000 %404 = fadd float %403, 0x3F9EB851E0000000 %405 = call float @llvm.log2.f32(float %404) %406 = fdiv float 1.000000e+00, %405 %407 = fmul float %406, 1.000000e+01 %408 = fmul float %407, %407 %409 = fsub float 1.000000e+00, %226 %410 = fsub float 1.000000e+00, %393 %411 = fmul float %399, 2.000000e+00 %412 = fmul float %399, %374 %413 = fmul float %411, %412 %414 = fadd float %413, 5.000000e-01 %415 = fsub float 1.000000e+00, %399 %416 = fsub float 1.000000e+00, %393 %417 = fsub float 1.000000e+00, %208 %418 = fadd float %81, %417 %419 = call float @llvm.AMDIL.clamp.(float %418, float 0.000000e+00, float 1.000000e+00) %420 = fmul float %416, %416 %421 = fmul float %416, %416 %422 = fmul float %421, %416 %423 = fmul float %420, %422 %424 = call float @llvm.AMDGPU.lrp(float %423, float %419, float %204) %425 = call float @llvm.AMDGPU.lrp(float %423, float %419, float %205) %426 = call float @llvm.AMDGPU.lrp(float %423, float %419, float %206) %427 = call float @llvm.AMDGPU.lrp(float %226, float 1.000000e+00, float %401) %428 = call float @llvm.AMDGPU.lrp(float %393, float 1.000000e+00, float %401) %429 = fmul float %427, %428 %430 = fadd float %429, 0x3F1A36E2E0000000 %431 = fdiv float 1.000000e+00, %430 %432 = fmul float %181, %384 %433 = fmul float %182, %385 %434 = fadd float %433, %432 %435 = fmul float %183, %386 %436 = fadd float %434, %435 %437 = call float @llvm.maxnum.f32(float %436, float 0.000000e+00) %438 = call float @llvm.pow.f32(float %437, float %408) %439 = fadd float %408, 1.000000e+00 %440 = fmul float %439, %74 %441 = fmul float %438, %440 %442 = fmul float %431, %441 %443 = fmul float %442, %226 %444 = fmul float %443, %73 %445 = call float @llvm.maxnum.f32(float %444, float 0.000000e+00) %446 = fmul float %445, %256 %447 = fmul float %445, %257 %448 = fmul float %445, %258 %449 = fsub float 1.000000e+00, %204 %450 = fsub float 1.000000e+00, %205 %451 = fsub float 1.000000e+00, %206 %452 = fmul float %415, %415 %453 = fmul float %415, %415 %454 = fmul float %453, %415 %455 = fmul float %452, %454 %456 = fmul float %449, %455 %457 = fadd float %456, %204 %458 = fmul float %450, %455 %459 = fadd float %458, %205 %460 = fmul float %451, %455 %461 = fadd float %460, %206 %462 = fadd float %414, -1.000000e+00 %463 = fmul float %409, %409 %464 = fmul float %409, %409 %465 = fmul float %464, %409 %466 = fmul float %463, %465 %467 = fmul float %462, %466 %468 = fadd float %467, 1.000000e+00 %469 = fadd float %414, -1.000000e+00 %470 = fmul float %410, %410 %471 = fmul float %410, %410 %472 = fmul float %471, %410 %473 = fmul float %470, %472 %474 = fmul float %469, %473 %475 = fadd float %474, 1.000000e+00 %476 = fmul float %468, %475 %477 = fmul float %476, %226 %478 = fmul float %256, %477 %479 = fadd float %478, %259 %480 = fmul float %257, %477 %481 = fadd float %480, %260 %482 = fmul float %258, %477 %483 = fadd float %482, %261 %484 = fmul float %209, %479 %485 = fmul float %210, %481 %486 = fmul float %211, %483 %487 = fmul float %446, %457 %488 = fadd float %487, %484 %489 = fmul float %447, %459 %490 = fadd float %489, %485 %491 = fmul float %448, %461 %492 = fadd float %491, %486 %493 = fmul float %371, %424 %494 = fadd float %493, %488 %495 = fmul float %372, %425 %496 = fadd float %495, %490 %497 = fmul float %373, %426 %498 = fadd float %497, %492 %499 = fmul float %134, %42 %500 = fadd float %499, %43 %501 = call float @llvm.AMDIL.clamp.(float %500, float 0.000000e+00, float 1.000000e+00) %502 = call float @llvm.AMDGPU.lrp(float %501, float %494, float %39) %503 = call float @llvm.AMDGPU.lrp(float %501, float %496, float %40) %504 = call float @llvm.AMDGPU.lrp(float %501, float %498, float %41) %505 = call i32 @llvm.SI.packf16(float %502, float %503) %506 = bitcast i32 %505 to float %507 = call i32 @llvm.SI.packf16(float %504, float 1.000000e+00) %508 = bitcast i32 %507 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %506, float %508, float %506, float %508) ret void IF89: ; preds = %IF86 %509 = fmul float %273, %273 %510 = fmul float %274, %274 %511 = fadd float %510, %509 %512 = fmul float %275, %275 %513 = fadd float %511, %512 %514 = call float @llvm.AMDGPU.rsq.clamped.f32(float %513) %515 = fmul float %273, %514 %516 = fmul float %274, %514 %517 = fmul float %275, %514 %518 = fsub float %54, %138 %519 = fsub float %55, %139 %520 = fsub float %56, %140 %521 = fdiv float 1.000000e+00, %515 %522 = fdiv float 1.000000e+00, %516 %523 = fdiv float 1.000000e+00, %517 %524 = fmul float %518, %521 %525 = fmul float %519, %522 %526 = fmul float %520, %523 %527 = fsub float %57, %138 %528 = fsub float %58, %139 %529 = fsub float %59, %140 %530 = fdiv float 1.000000e+00, %515 %531 = fdiv float 1.000000e+00, %516 %532 = fdiv float 1.000000e+00, %517 %533 = fmul float %527, %530 %534 = fmul float %528, %531 %535 = fmul float %529, %532 %536 = fcmp ogt float %515, 0.000000e+00 %537 = fcmp ogt float %516, 0.000000e+00 %538 = fcmp ogt float %517, 0.000000e+00 %.101 = select i1 %536, float %524, float %533 %temp68.1 = select i1 %537, float %525, float %534 %.102 = select i1 %538, float %526, float %535 %539 = fadd float %54, %57 %540 = fadd float %55, %58 %541 = fadd float %56, %59 %542 = fmul float %539, 5.000000e-01 %543 = fmul float %540, 5.000000e-01 %544 = fmul float %541, 5.000000e-01 %545 = call float @llvm.minnum.f32(float %.101, float %temp68.1) %546 = call float @llvm.minnum.f32(float %545, float %.102) %547 = fsub float %542, %60 %548 = fsub float %543, %61 %549 = fsub float %544, %62 %550 = fadd float %547, %138 %551 = fadd float %548, %139 %552 = fadd float %549, %140 %553 = fmul float %515, %546 %554 = fadd float %553, %550 %555 = fmul float %516, %546 %556 = fadd float %555, %551 %557 = fmul float %517, %546 %558 = fadd float %557, %552 %559 = fsub float %554, %542 %560 = fsub float %556, %543 %561 = fsub float %558, %544 br label %ENDIF88 ENDIF88: ; preds = %IF86, %IF89 %temp48.0 = phi float [ %559, %IF89 ], [ %273, %IF86 ] %temp49.0 = phi float [ %560, %IF89 ], [ %274, %IF86 ] %temp50.0 = phi float [ %561, %IF89 ], [ %275, %IF86 ] %562 = fsub float 1.000000e+00, %81 %563 = call float @llvm.pow.f32(float %562, float 7.500000e-01) %564 = fmul float %563, 7.000000e+00 %565 = insertelement <4 x float> undef, float %temp48.0, i32 0 %566 = insertelement <4 x float> %565, float %temp49.0, i32 1 %567 = insertelement <4 x float> %566, float %temp50.0, i32 2 %568 = insertelement <4 x float> %567, float %564, i32 3 %569 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %568) %570 = extractelement <4 x float> %569, i32 0 %571 = extractelement <4 x float> %569, i32 1 %572 = extractelement <4 x float> %569, i32 2 %573 = extractelement <4 x float> %569, i32 3 %574 = call float @llvm.fabs.f32(float %572) %575 = fdiv float 1.000000e+00, %574 %576 = fmul float %570, %575 %577 = fadd float %576, 1.500000e+00 %578 = fmul float %571, %575 %579 = fadd float %578, 1.500000e+00 %580 = bitcast float %579 to i32 %581 = bitcast float %577 to i32 %582 = bitcast float %573 to i32 %583 = bitcast float %564 to i32 %584 = insertelement <4 x i32> undef, i32 %580, i32 0 %585 = insertelement <4 x i32> %584, i32 %581, i32 1 %586 = insertelement <4 x i32> %585, i32 %582, i32 2 %587 = insertelement <4 x i32> %586, i32 %583, i32 3 %588 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %587, <32 x i8> %89, <16 x i8> %92, i32 4) %589 = extractelement <4 x float> %588, i32 0 %590 = extractelement <4 x float> %588, i32 1 %591 = extractelement <4 x float> %588, i32 2 %592 = extractelement <4 x float> %588, i32 3 %593 = call float @llvm.pow.f32(float %592, float %65) %594 = fmul float %64, %593 %595 = fmul float %594, %589 %596 = fmul float %594, %590 %597 = fmul float %594, %591 %598 = call float @llvm.AMDGPU.lrp(float %50, float %366, float %595) %599 = call float @llvm.AMDGPU.lrp(float %50, float %367, float %596) %600 = call float @llvm.AMDGPU.lrp(float %50, float %368, float %597) br label %ENDIF85 } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[12:15], s[2:3], 0x0 ; C0860300 s_load_dwordx4 s[40:43], s[4:5], 0x8 ; C0940508 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v16, v0, 0, 0, [m0] ; C8400000 v_interp_p2_f32 v16, [v16], v1, 0, 0, [m0] ; C8410001 v_interp_p1_f32 v17, v0, 1, 0, [m0] ; C8440100 v_interp_p2_f32 v17, [v17], v1, 1, 0, [m0] ; C8450101 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s2, s[12:15], 0x58 ; C2010D58 s_buffer_load_dword s1, s[12:15], 0x5c ; C2008D5C s_buffer_load_dword s0, s[12:15], 0x60 ; C2000D60 v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600 v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601 v_interp_p1_f32 v9, v0, 0, 2, [m0] ; C8240800 v_interp_p2_f32 v9, [v9], v1, 0, 2, [m0] ; C8250801 v_interp_p1_f32 v10, v0, 1, 2, [m0] ; C8280900 v_interp_p2_f32 v10, [v10], v1, 1, 2, [m0] ; C8290901 v_interp_p1_f32 v11, v0, 2, 2, [m0] ; C82C0A00 v_interp_p2_f32 v11, [v11], v1, 2, 2, [m0] ; C82D0A01 v_interp_p1_f32 v13, v0, 0, 3, [m0] ; C8340C00 v_interp_p2_f32 v13, [v13], v1, 0, 3, [m0] ; C8350C01 v_interp_p1_f32 v18, v0, 1, 3, [m0] ; C8480D00 v_interp_p2_f32 v18, [v18], v1, 1, 3, [m0] ; C8490D01 v_interp_p1_f32 v19, v0, 2, 3, [m0] ; C84C0E00 v_interp_p2_f32 v19, [v19], v1, 2, 3, [m0] ; C84D0E01 v_interp_p1_f32 v2, v0, 0, 4, [m0] ; C8081000 v_interp_p2_f32 v2, [v2], v1, 0, 4, [m0] ; C8091001 v_interp_p1_f32 v3, v0, 1, 4, [m0] ; C80C1100 v_interp_p2_f32 v3, [v3], v1, 1, 4, [m0] ; C80D1101 v_interp_p1_f32 v5, v0, 2, 4, [m0] ; C8141200 v_interp_p2_f32 v5, [v5], v1, 2, 4, [m0] ; C8151201 v_interp_p1_f32 v20, v0, 0, 5, [m0] ; C8501400 v_interp_p2_f32 v20, [v20], v1, 0, 5, [m0] ; C8511401 v_interp_p1_f32 v21, v0, 1, 5, [m0] ; C8541500 v_interp_p2_f32 v21, [v21], v1, 1, 5, [m0] ; C8551501 v_interp_p1_f32 v22, v0, 3, 5, [m0] ; C8581700 v_interp_p2_f32 v22, [v22], v1, 3, 5, [m0] ; C8591701 v_interp_p1_f32 v4, v0, 0, 6, [m0] ; C8101800 v_interp_p2_f32 v4, [v4], v1, 0, 6, [m0] ; C8111801 v_interp_p1_f32 v23, v0, 1, 6, [m0] ; C85C1900 v_interp_p2_f32 v23, [v23], v1, 1, 6, [m0] ; C85D1901 v_interp_p1_f32 v24, v0, 2, 6, [m0] ; C8601A00 v_interp_p2_f32 v24, [v24], v1, 2, 6, [m0] ; C8611A01 s_load_dwordx4 s[8:11], s[4:5], 0xc ; C084050C s_load_dwordx4 s[16:19], s[4:5], 0x10 ; C0880510 s_load_dwordx4 s[20:23], s[4:5], 0x14 ; C08A0514 s_load_dwordx8 s[44:51], s[6:7], 0x10 ; C0D60710 v_interp_p1_f32 v25, v0, 3, 6, [m0] ; C8641B00 v_interp_p2_f32 v25, [v25], v1, 3, 6, [m0] ; C8651B01 v_interp_p1_f32 v14, v0, 0, 7, [m0] ; C8381C00 v_interp_p2_f32 v14, [v14], v1, 0, 7, [m0] ; C8391C01 v_interp_p1_f32 v12, v0, 1, 7, [m0] ; C8301D00 v_interp_p2_f32 v12, [v12], v1, 1, 7, [m0] ; C8311D01 v_interp_p1_f32 v15, v0, 2, 7, [m0] ; C83C1E00 v_interp_p2_f32 v15, [v15], v1, 2, 7, [m0] ; C83D1E01 s_load_dwordx8 s[52:59], s[6:7], 0x18 ; C0DA0718 s_load_dwordx8 s[24:31], s[6:7], 0x20 ; C0CC0720 s_load_dwordx8 s[32:39], s[6:7], 0x28 ; C0D00728 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:1], 10, 0, 0, 0, 0, 0, 0, 0, v[16:17], s[52:59], s[8:11] ; F0800A00 004D0010 image_sample v[30:32], 7, 0, 0, 0, 0, 0, 0, 0, v[16:17], s[44:51], s[40:43] ; F0800700 014B1E10 s_waitcnt vmcnt(1) ; BF8C0771 v_mad_f32 v1, 2.0, v1, -1.0 ; D2820001 03CE02F4 v_mad_f32 v0, 2.0, v0, -1.0 ; D2820000 03CE00F4 v_mul_f32_e32 v1, s2, v1 ; 10020202 v_mul_f32_e32 v0, s2, v0 ; 10000002 v_mul_f32_e32 v6, v6, v1 ; 100C0306 v_mac_f32_e32 v6, v9, v0 ; 3E0C0109 v_mul_f32_e32 v7, v7, v1 ; 100E0307 v_mac_f32_e32 v7, v10, v0 ; 3E0E010A v_mul_f32_e32 v10, v8, v1 ; 10140308 v_mac_f32_e32 v10, v11, v0 ; 3E14010B v_mul_f32_e32 v0, v0, v0 ; 10000100 v_mac_f32_e32 v0, v1, v1 ; 3E000301 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_sub_f32_e32 v0, 1.0, v0 ; 080000F2 v_sqrt_f32_e32 v0, v0 ; 7E006700 v_mac_f32_e32 v6, v13, v0 ; 3E0C010D v_mac_f32_e32 v7, v18, v0 ; 3E0E0112 v_mac_f32_e32 v10, v19, v0 ; 3E140113 v_mul_f32_e32 v0, v6, v6 ; 10000D06 v_mac_f32_e32 v0, v7, v7 ; 3E000F07 v_mac_f32_e32 v0, v10, v10 ; 3E00150A v_rsq_clamp_f32_e32 v0, v0 ; 7E005900 v_mul_f32_e32 v1, v23, v23 ; 10022F17 v_mac_f32_e32 v1, v24, v24 ; 3E023118 v_mac_f32_e32 v1, v25, v25 ; 3E023319 v_rsq_clamp_f32_e32 v1, v1 ; 7E025901 v_mul_f32_e32 v9, v0, v6 ; 10120D00 v_mul_f32_e32 v8, v0, v7 ; 10100F00 v_mul_f32_e32 v7, v0, v10 ; 100E1500 v_mul_f32_e32 v11, v1, v23 ; 10162F01 v_mul_f32_e32 v10, v1, v24 ; 10143101 v_mul_f32_e32 v0, v11, v9 ; 1000130B v_mac_f32_e32 v0, v10, v8 ; 3E00110A v_mul_f32_e32 v13, v1, v25 ; 101A3301 v_mac_f32_e32 v0, v13, v7 ; 3E000F0D v_mul_f32_e32 v6, v9, v0 ; 100C0109 v_mac_f32_e32 v6, v9, v0 ; 3E0C0109 v_mul_f32_e32 v18, v8, v0 ; 10240108 v_mac_f32_e32 v18, v8, v0 ; 3E240108 v_mad_f32 v27, v23, v1, -v6 ; D282001B 841A0317 v_mad_f32 v28, v24, v1, -v18 ; D282001C 844A0318 v_mul_f32_e32 v6, v7, v0 ; 100C0107 v_mac_f32_e32 v6, v7, v0 ; 3E0C0107 s_buffer_load_dword s2, s[12:15], 0x4c ; C2010D4C s_buffer_load_dword s3, s[12:15], 0x4d ; C2018D4D s_buffer_load_dword s8, s[12:15], 0x4e ; C2040D4E v_mad_f32 v29, v25, v1, -v6 ; D282001D 841A0319 v_mov_b32_e32 v0, 0x6f800000 ; 7E0002FF 6F800000 v_cmp_gt_f32_e64 vcc, |v22|, v0 ; D008016A 00020116 v_mov_b32_e32 v0, 0x2f800000 ; 7E0002FF 2F800000 v_cndmask_b32_e32 v0, 1.0, v0 ; 000000F2 v_mul_f32_e32 v1, v0, v22 ; 10022D00 v_rcp_f32_e32 v1, v1 ; 7E025501 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v24, s2, v30 ; 10303C02 v_mul_f32_e32 v25, s3, v31 ; 10323E03 v_mul_f32_e32 v26, s8, v32 ; 10344008 v_mul_f32_e32 v6, v1, v20 ; 100C2901 v_mul_f32_e32 v1, v1, v21 ; 10022B01 s_buffer_load_dword s2, s[12:15], 0x40 ; C2010D40 s_buffer_load_dword s3, s[12:15], 0x41 ; C2018D41 s_buffer_load_dword s8, s[12:15], 0x42 ; C2040D42 v_mul_f32_e32 v18, v6, v0 ; 10240106 v_mul_f32_e32 v19, v1, v0 ; 10260101 s_buffer_load_dword s9, s[12:15], 0x27 ; C2048D27 s_buffer_load_dword s10, s[12:15], 0x2b ; C2050D2B s_buffer_load_dword s40, s[12:15], 0x2c ; C2140D2C s_buffer_load_dword s41, s[12:15], 0x2d ; C2148D2D v_sub_f32_e64 v0, 1.0, s1 ; D2080000 000002F2 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s2, v0 ; 100C0002 v_mul_f32_e32 v1, s3, v0 ; 10020003 v_mul_f32_e32 v0, s8, v0 ; 10000008 v_mac_f32_e32 v6, s1, v24 ; 3E0C3001 v_mov_b32_e32 v30, v27 ; 7E3C031B v_mac_f32_e32 v1, s1, v25 ; 3E023201 v_mov_b32_e32 v31, v28 ; 7E3E031C v_mac_f32_e32 v0, s1, v26 ; 3E003401 v_mov_b32_e32 v32, v29 ; 7E40031D v_cmp_lt_f32_e64 s[2:3], 0, s10 ; D0020002 00001480 image_sample v[20:23], 15, 0, 0, 0, 0, 0, 0, 0, v[16:17], s[24:31], s[16:19] ; F0800F00 00861410 image_sample v[16:19], 15, 0, 0, 0, 0, 0, 0, 0, v[18:19], s[32:39], s[20:23] ; F0800F00 00A81012 s_waitcnt vmcnt(0) ; BF8C0770 s_and_saveexec_b64 s[18:19], s[2:3] ; BE922402 s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E s_cbranch_execz BB0_2 ; BF880000 s_buffer_load_dword s2, s[12:15], 0x20 ; C2010D20 s_buffer_load_dword s3, s[12:15], 0x21 ; C2018D21 s_buffer_load_dword s8, s[12:15], 0x22 ; C2040D22 s_buffer_load_dword s10, s[12:15], 0x24 ; C2050D24 s_buffer_load_dword s11, s[12:15], 0x25 ; C2058D25 v_mul_f32_e32 v17, v27, v27 ; 1022371B v_mac_f32_e32 v17, v28, v28 ; 3E22391C v_mac_f32_e32 v17, v29, v29 ; 3E223B1D v_rsq_clamp_f32_e32 v17, v17 ; 7E225911 s_buffer_load_dword s16, s[12:15], 0x26 ; C2080D26 s_buffer_load_dword s17, s[12:15], 0x28 ; C2088D28 s_buffer_load_dword s20, s[12:15], 0x29 ; C20A0D29 s_buffer_load_dword s21, s[12:15], 0x2a ; C20A8D2A v_mul_f32_e32 v18, v17, v27 ; 10243711 v_mul_f32_e32 v19, v17, v28 ; 10263911 v_mul_f32_e32 v17, v17, v29 ; 10223B11 v_rcp_f32_e32 v20, v18 ; 7E285512 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v22, s2, v14 ; 082C1C02 v_sub_f32_e32 v23, s3, v12 ; 082E1803 v_rcp_f32_e32 v30, v19 ; 7E3C5513 v_mul_f32_e32 v22, v20, v22 ; 102C2D14 v_sub_f32_e32 v31, s10, v14 ; 083E1C0A v_mul_f32_e32 v20, v20, v31 ; 10283F14 v_cmp_lt_f32_e32 vcc, 0, v18 ; 7C022480 v_cndmask_b32_e32 v20, v20, v22 ; 00282D14 v_rcp_f32_e32 v22, v17 ; 7E2C5511 v_mul_f32_e32 v23, v30, v23 ; 102E2F1E v_sub_f32_e32 v31, s11, v12 ; 083E180B v_mul_f32_e32 v30, v30, v31 ; 103C3F1E v_cmp_lt_f32_e32 vcc, 0, v19 ; 7C022680 v_cndmask_b32_e32 v23, v30, v23 ; 002E2F1E v_sub_f32_e32 v30, s8, v15 ; 083C1E08 v_mul_f32_e32 v30, v22, v30 ; 103C3D16 v_sub_f32_e32 v31, s16, v15 ; 083E1E10 v_mul_f32_e32 v22, v22, v31 ; 102C3F16 v_cmp_lt_f32_e32 vcc, 0, v17 ; 7C022280 v_cndmask_b32_e32 v22, v22, v30 ; 002C3D16 v_min3_f32 v20, v20, v23, v22 ; D2A20014 045A2F14 v_mov_b32_e32 v22, s10 ; 7E2C020A v_add_f32_e32 v22, s2, v22 ; 062C2C02 v_mov_b32_e32 v23, s11 ; 7E2E020B v_add_f32_e32 v23, s3, v23 ; 062E2E03 v_mov_b32_e32 v30, s16 ; 7E3C0210 v_add_f32_e32 v32, s8, v30 ; 06403C08 v_mad_f32 v30, 0.5, v22, -s17 ; D282001E 80462CF0 v_add_f32_e32 v30, v14, v30 ; 063C3D0E v_mac_f32_e32 v30, v20, v18 ; 3E3C2514 v_mad_f32 v18, 0.5, v23, -s20 ; D2820012 80522EF0 v_add_f32_e32 v18, v12, v18 ; 0624250C v_mac_f32_e32 v18, v20, v19 ; 3E242714 v_mad_f32 v19, 0.5, v32, -s21 ; D2820013 805640F0 v_add_f32_e32 v19, v15, v19 ; 0626270F v_mac_f32_e32 v19, v20, v17 ; 3E262314 v_mad_f32 v30, 0.5, -v22, v30 ; D282001E 447A2CF0 v_mad_f32 v31, 0.5, -v23, v18 ; D282001F 444A2EF0 v_mad_f32 v32, 0.5, -v32, v19 ; D2820020 444E40F0 s_or_b64 exec, exec, s[18:19] ; 88FE127E s_buffer_load_dword s28, s[12:15], 0x17 ; C20E0D17 s_buffer_load_dword s29, s[12:15], 0x43 ; C20E8D43 s_buffer_load_dword s27, s[12:15], 0x44 ; C20D8D44 s_buffer_load_dword s20, s[12:15], 0x45 ; C20A0D45 s_buffer_load_dword s17, s[12:15], 0x46 ; C2088D46 s_buffer_load_dword s3, s[12:15], 0x0 ; C2018D00 s_buffer_load_dword s8, s[12:15], 0x1 ; C2040D01 s_buffer_load_dword s2, s[12:15], 0x2 ; C2010D02 s_buffer_load_dword s10, s[12:15], 0x4 ; C2050D04 s_buffer_load_dword s11, s[12:15], 0x5 ; C2058D05 s_buffer_load_dword s16, s[12:15], 0x6 ; C2080D06 s_buffer_load_dword s18, s[12:15], 0x7 ; C2090D07 s_buffer_load_dword s19, s[12:15], 0x8 ; C2098D08 s_buffer_load_dword s21, s[12:15], 0x9 ; C20A8D09 s_buffer_load_dword s22, s[12:15], 0xa ; C20B0D0A s_buffer_load_dword s23, s[12:15], 0xb ; C20B8D0B s_buffer_load_dword s24, s[12:15], 0xc ; C20C0D0C s_buffer_load_dword s25, s[12:15], 0xd ; C20C8D0D s_buffer_load_dword s26, s[12:15], 0xe ; C20D0D0E v_sub_f32_e64 v17, 1.0, s0 ; D2080011 000000F2 v_log_f32_e32 v17, v17 ; 7E224F11 v_mul_legacy_f32_e32 v17, 0x3f400000, v17 ; 0E2222FF 3F400000 v_exp_f32_e32 v17, v17 ; 7E224B11 v_mul_f32_e32 v33, 0x40e00000, v17 ; 104222FF 40E00000 v_cubeid_f32 v20, v30, v31, v32 ; D2880014 04823F1E v_cubema_f32 v19, v30, v31, v32 ; D28E0013 04823F1E s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500 s_load_dwordx8 s[44:51], s[6:7], 0x0 ; C0D60700 v_cubesc_f32 v18, v30, v31, v32 ; D28A0012 04823F1E v_cubetc_f32 v17, v30, v31, v32 ; D28C0011 04823F1E v_rcp_f32_e64 v19, |v19| ; D3540113 00000113 v_mov_b32_e32 v30, 0x3fc00000 ; 7E3C02FF 3FC00000 v_mad_f32 v31, v19, v17, v30 ; D282001F 047A2313 v_mac_f32_e32 v30, v19, v18 ; 3E3C2513 v_mov_b32_e32 v32, v20 ; 7E400314 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[30:33], 15, 0, 0, 0, 0, 0, 0, 0, v[30:33], s[44:51], s[32:35] ; F0900F00 010B1E1E s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v17, v33 ; 7E224F21 s_buffer_load_dword s30, s[12:15], 0xf ; C20F0D0F s_buffer_load_dword s31, s[12:15], 0x68 ; C20F8D68 v_mul_legacy_f32_e32 v17, s41, v17 ; 0E222229 v_exp_f32_e32 v17, v17 ; 7E224B11 v_mul_f32_e32 v17, s40, v17 ; 10222228 v_mul_f32_e32 v19, v30, v17 ; 1026231E v_mul_f32_e32 v18, v31, v17 ; 1024231F v_mul_f32_e32 v17, v32, v17 ; 10222320 v_mov_b32_e32 v20, s1 ; 7E280201 v_mov_b32_e32 v22, 0x3f7fff58 ; 7E2C02FF 3F7FFF58 v_cmp_lt_f32_e32 vcc, s9, v22 ; 7C022C09 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[32:33], vcc ; BEA0246A s_xor_b64 s[32:33], exec, s[32:33] ; 89A0207E s_cbranch_execz BB0_6 ; BF880000 s_buffer_load_dword s35, s[12:15], 0x3b ; C2118D3B s_buffer_load_dword s1, s[12:15], 0x3c ; C2008D3C s_buffer_load_dword s34, s[12:15], 0x3d ; C2110D3D s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_lt_f32_e64 s[36:37], 0, s35 ; D0020024 00004680 s_and_saveexec_b64 s[36:37], s[36:37] ; BEA42424 s_xor_b64 s[36:37], exec, s[36:37] ; 89A4247E s_cbranch_execz BB0_7 ; BF880000 s_buffer_load_dword s35, s[12:15], 0x36 ; C2118D36 s_buffer_load_dword s38, s[12:15], 0x38 ; C2130D38 s_buffer_load_dword s39, s[12:15], 0x39 ; C2138D39 s_buffer_load_dword s40, s[12:15], 0x3a ; C2140D3A s_buffer_load_dword s41, s[12:15], 0x30 ; C2148D30 s_buffer_load_dword s42, s[12:15], 0x31 ; C2150D31 s_buffer_load_dword s43, s[12:15], 0x32 ; C2158D32 s_buffer_load_dword s44, s[12:15], 0x34 ; C2160D34 s_buffer_load_dword s45, s[12:15], 0x35 ; C2168D35 v_mul_f32_e32 v22, v27, v27 ; 102C371B v_mac_f32_e32 v22, v28, v28 ; 3E2C391C v_mac_f32_e32 v22, v29, v29 ; 3E2C3B1D v_rsq_clamp_f32_e32 v22, v22 ; 7E2C5916 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v23, s35, v15 ; 082E1E23 v_mov_b32_e32 v30, s35 ; 7E3C0223 v_sub_f32_e32 v31, s41, v14 ; 083E1C29 v_sub_f32_e32 v32, s42, v12 ; 0840182A v_add_f32_e32 v30, s43, v30 ; 063C3C2B v_sub_f32_e32 v33, s43, v15 ; 08421E2B v_mad_f32 v34, 0.5, v30, -s40 ; D2820022 80A23CF0 v_add_f32_e32 v15, v15, v34 ; 061E450F v_mul_f32_e32 v27, v22, v27 ; 10363716 v_mul_f32_e32 v28, v22, v28 ; 10383916 v_mul_f32_e32 v22, v22, v29 ; 102C3B16 v_rcp_f32_e32 v29, v27 ; 7E3A551B v_rcp_f32_e32 v34, v28 ; 7E44551C v_rcp_f32_e32 v35, v22 ; 7E465516 v_sub_f32_e32 v36, s44, v14 ; 08481C2C v_mov_b32_e32 v37, s44 ; 7E4A022C v_add_f32_e32 v37, s41, v37 ; 064A4A29 v_mul_f32_e32 v31, v29, v31 ; 103E3F1D v_mul_f32_e32 v29, v29, v36 ; 103A491D v_mul_f32_e32 v32, v34, v32 ; 10404122 v_mul_f32_e32 v33, v35, v33 ; 10424323 v_mul_f32_e32 v23, v35, v23 ; 102E2F23 v_mad_f32 v35, 0.5, v37, -s38 ; D2820023 809A4AF0 v_add_f32_e32 v14, v14, v35 ; 061C470E v_sub_f32_e32 v35, s45, v12 ; 0846182D v_mov_b32_e32 v36, s45 ; 7E48022D v_mul_f32_e32 v34, v34, v35 ; 10444722 v_add_f32_e32 v35, s42, v36 ; 0646482A v_cmp_lt_f32_e32 vcc, 0, v27 ; 7C023680 v_cndmask_b32_e32 v29, v29, v31 ; 003A3F1D v_cmp_lt_f32_e32 vcc, 0, v28 ; 7C023880 v_cndmask_b32_e32 v31, v34, v32 ; 003E4122 v_cmp_lt_f32_e32 vcc, 0, v22 ; 7C022C80 v_cndmask_b32_e32 v23, v23, v33 ; 002E4317 v_min3_f32 v23, v29, v31, v23 ; D2A20017 045E3F1D v_mad_f32 v29, 0.5, v35, -s39 ; D282001D 809E46F0 v_add_f32_e32 v12, v12, v29 ; 06183B0C v_mac_f32_e32 v14, v23, v27 ; 3E1C3717 v_mac_f32_e32 v12, v23, v28 ; 3E183917 v_mac_f32_e32 v15, v23, v22 ; 3E1E2D17 v_mad_f32 v27, 0.5, -v37, v14 ; D282001B 443A4AF0 v_mad_f32 v28, 0.5, -v35, v12 ; D282001C 443246F0 v_mad_f32 v29, 0.5, -v30, v15 ; D282001D 443E3CF0 s_or_b64 exec, exec, s[36:37] ; 88FE247E v_sub_f32_e64 v12, 1.0, s0 ; D208000C 000000F2 v_log_f32_e32 v12, v12 ; 7E184F0C s_load_dwordx4 s[36:39], s[4:5], 0x4 ; C0920504 v_mul_legacy_f32_e32 v12, 0x3f400000, v12 ; 0E1818FF 3F400000 v_exp_f32_e32 v12, v12 ; 7E184B0C v_mul_f32_e32 v30, 0x40e00000, v12 ; 103C18FF 40E00000 v_cubeid_f32 v34, v27, v28, v29 ; D2880022 0476391B v_cubema_f32 v33, v27, v28, v29 ; D28E0021 0476391B s_load_dwordx8 s[40:47], s[6:7], 0x8 ; C0D40708 v_cubesc_f32 v32, v27, v28, v29 ; D28A0020 0476391B v_cubetc_f32 v31, v27, v28, v29 ; D28C001F 0476391B v_rcp_f32_e64 v12, |v33| ; D354010C 00000121 v_mov_b32_e32 v27, 0x3fc00000 ; 7E3602FF 3FC00000 v_mad_f32 v28, v12, v31, v27 ; D282001C 046E3F0C v_mac_f32_e32 v27, v12, v32 ; 3E36410C v_mov_b32_e32 v29, v34 ; 7E3A0322 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[27:30], 15, 0, 0, 0, 0, 0, 0, 0, v[27:30], s[40:47], s[36:39] ; F0900F00 012A1B1B s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v12, v30 ; 7E184F1E v_sub_f32_e64 v14, 1.0, s9 ; D208000E 000012F2 v_mul_legacy_f32_e32 v12, s34, v12 ; 0E181822 v_exp_f32_e32 v12, v12 ; 7E184B0C v_mul_f32_e32 v12, s1, v12 ; 10181801 v_mul_f32_e32 v15, v27, v12 ; 101E191B v_mul_f32_e32 v22, v28, v12 ; 102C191C v_mul_f32_e32 v12, v29, v12 ; 1018191D v_mul_f32_e32 v15, v15, v14 ; 101E1D0F v_mul_f32_e32 v22, v22, v14 ; 102C1D16 v_mul_f32_e32 v12, v12, v14 ; 10181D0C v_mac_f32_e32 v15, s9, v19 ; 3E1E2609 v_mac_f32_e32 v22, s9, v18 ; 3E2C2409 v_mac_f32_e32 v12, s9, v17 ; 3E182209 v_mov_b32_e32 v17, v12 ; 7E22030C v_mov_b32_e32 v18, v22 ; 7E240316 v_mov_b32_e32 v19, v15 ; 7E26030F s_or_b64 exec, exec, s[32:33] ; 88FE207E v_mad_f32 v27, -v20, s29, s29 ; D282001B 20743B14 v_mov_b32_e32 v12, s28 ; 7E18021C v_mul_f32_e32 v20, v27, v24 ; 1028311B v_mul_f32_e32 v15, v27, v25 ; 101E331B v_mul_f32_e32 v14, v27, v26 ; 101C351B v_mul_f32_e32 v22, s27, v16 ; 102C201B v_sub_f32_e64 v24, 1.0, s31 ; D2080018 00003EF2 v_mac_f32_e32 v24, s31, v21 ; 3E302A1F v_mul_f32_e32 v21, s20, v16 ; 102A2014 v_mul_f32_e32 v16, s17, v16 ; 10202011 s_buffer_load_dword s5, s[12:15], 0x10 ; C2028D10 s_buffer_load_dword s4, s[12:15], 0x11 ; C2020D11 s_buffer_load_dword s1, s[12:15], 0x12 ; C2008D12 s_buffer_load_dword s17, s[12:15], 0x16 ; C2088D16 s_buffer_load_dword s6, s[12:15], 0x48 ; C2030D48 s_buffer_load_dword s7, s[12:15], 0x49 ; C2038D49 s_buffer_load_dword s9, s[12:15], 0x4b ; C2048D4B v_mul_f32_e32 v23, s11, v8 ; 102E100B v_mac_f32_e32 v23, s10, v9 ; 3E2E120A v_mac_f32_e32 v23, s16, v7 ; 3E2E0E10 v_add_f32_e32 v23, s18, v23 ; 062E2E12 v_mul_f32_e32 v25, s21, v8 ; 10321015 v_mac_f32_e32 v25, s19, v9 ; 3E321213 v_mac_f32_e32 v25, s22, v7 ; 3E320E16 v_add_f32_e32 v25, s23, v25 ; 06323217 v_mul_f32_e32 v26, s25, v8 ; 10341019 v_mac_f32_e32 v26, s24, v9 ; 3E341218 v_mac_f32_e32 v26, s26, v7 ; 3E340E1A v_add_f32_e32 v26, s30, v26 ; 0634341E v_add_f32_e32 v2, v23, v2 ; 06040517 v_add_f32_e32 v3, v25, v3 ; 06060719 v_add_f32_e32 v25, v26, v5 ; 06320B1A v_mul_f32_e32 v5, s3, v9 ; 100A1203 v_mac_f32_e32 v5, s8, v8 ; 3E0A1008 v_mac_f32_e32 v5, s2, v7 ; 3E0A0E02 v_max_f32_e32 v23, 0, v5 ; 202E0A80 v_mul_f32_e32 v5, v24, v2 ; 100A0518 v_mul_f32_e32 v2, v24, v3 ; 10040718 v_mul_f32_e32 v3, v24, v25 ; 10063318 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v12, s17, v4 ; 3E180811 v_mul_f32_e32 v4, v24, v19 ; 10082718 v_mul_f32_e32 v18, v24, v18 ; 10242518 v_mul_f32_e32 v17, v24, v17 ; 10222318 v_sub_f32_e32 v19, 1.0, v27 ; 082636F2 v_add_f32_e32 v19, s0, v19 ; 06262600 v_add_f32_e64 v19, 0, v19 clamp ; D2060813 00022680 v_sub_f32_e32 v24, s3, v11 ; 08301603 v_sub_f32_e32 v25, s8, v10 ; 08321408 v_mul_f32_e32 v26, v24, v24 ; 10343118 v_mac_f32_e32 v26, v25, v25 ; 3E343319 v_sub_f32_e32 v27, s2, v13 ; 08361A02 v_mac_f32_e32 v26, v27, v27 ; 3E34371B v_rsq_clamp_f32_e32 v26, v26 ; 7E34591A v_mul_f32_e32 v24, v26, v24 ; 1030311A v_mul_f32_e32 v25, v26, v25 ; 1032331A v_mul_f32_e32 v26, v26, v27 ; 1034371A v_mul_f32_e32 v11, v11, v9 ; 1016130B v_mad_f32 v10, -v10, v8, -v11 ; D282000A A42E110A v_mad_f32 v10, -v13, v7, v10 ; D282000A 242A0F0D v_mul_f32_e32 v9, v24, v9 ; 10121318 v_mac_f32_e32 v9, v25, v8 ; 3E121119 v_mul_f32_e32 v8, s3, v24 ; 10103003 v_mac_f32_e32 v8, s8, v25 ; 3E103208 v_mac_f32_e32 v9, v26, v7 ; 3E120F1A v_mac_f32_e32 v8, s2, v26 ; 3E103402 v_max_f32_e32 v7, 0, v8 ; 200E1080 v_sub_f32_e32 v8, 1.0, v7 ; 08100EF2 v_mul_f32_e32 v11, v8, v8 ; 10161108 v_mul_f32_e32 v8, v8, v11 ; 10101708 v_mul_f32_e32 v8, v8, v11 ; 10101708 v_max_f32_e32 v10, 0, v10 ; 20141480 v_sub_f32_e32 v11, 1.0, v10 ; 081614F2 v_mul_f32_e32 v13, v11, v11 ; 101A170B v_mul_f32_e32 v24, v11, v13 ; 10301B0B v_mad_f32 v25, -v13, v24, 1.0 ; D2820019 23CA310D v_mul_f32_e32 v26, v6, v25 ; 10343306 v_sub_f32_e32 v27, 1.0, v6 ; 08360CF2 v_mac_f32_e32 v6, v8, v27 ; 3E0C3708 v_mul_f32_e32 v27, v1, v25 ; 10363301 v_sub_f32_e32 v28, 1.0, v1 ; 083802F2 v_mac_f32_e32 v1, v8, v28 ; 3E023908 v_mul_f32_e32 v25, v0, v25 ; 10323300 v_sub_f32_e32 v28, 1.0, v0 ; 083800F2 v_mac_f32_e32 v0, v8, v28 ; 3E003908 v_sub_f32_e64 v8, 1.0, s0 ; D2080008 000000F2 v_sub_f32_e32 v28, 1.0, v8 ; 083810F2 v_mov_b32_e32 v29, 0x3cf5c28f ; 7E3A02FF 3CF5C28F v_madmk_f32_e32 v28, v28, v29, 0x3f77ced9 ; 40383B1C 3F77CED9 v_add_f32_e32 v29, v7, v7 ; 063A0F07 v_mul_f32_e32 v7, v8, v7 ; 100E0F08 v_mad_f32 v7, v29, v7, 0.5 ; D2820007 03C20F1D v_mul_f32_e32 v13, v24, v13 ; 101A1B18 v_mac_f32_e32 v26, v19, v13 ; 3E341B13 v_mac_f32_e32 v27, v19, v13 ; 3E361B13 v_mac_f32_e32 v25, v19, v13 ; 3E321B13 v_mul_f32_e32 v8, v8, v8 ; 10101108 v_log_f32_e32 v19, v28 ; 7E264F1C v_mul_f32_e32 v8, s9, v8 ; 10101009 v_mul_f32_e32 v11, v8, v11 ; 10161708 v_mac_f32_e32 v11, 1.0, v10 ; 3E1614F2 v_rcp_f32_e32 v10, v19 ; 7E145513 v_sub_f32_e32 v19, 1.0, v23 ; 08262EF2 v_mul_f32_e32 v8, v8, v19 ; 10102708 v_mac_f32_e32 v8, 1.0, v23 ; 3E102EF2 v_max_f32_e32 v9, 0, v9 ; 20121280 v_log_f32_e32 v9, v9 ; 7E124F09 v_madak_f32_e32 v8, v8, v11, 0x38d1b717 ; 42101708 38D1B717 v_mul_f32_e32 v10, 0x41200000, v10 ; 101414FF 41200000 v_mul_f32_e32 v11, v10, v10 ; 1016150A v_mul_legacy_f32_e32 v9, v11, v9 ; 0E12130B v_rcp_f32_e32 v8, v8 ; 7E105508 v_mad_f32 v10, v10, v10, 1.0 ; D282000A 03CA150A v_mul_f32_e32 v10, s7, v10 ; 10141407 v_exp_f32_e32 v9, v9 ; 7E124B09 v_mul_f32_e32 v9, v10, v9 ; 1012130A v_mul_f32_e32 v8, v9, v8 ; 10101109 v_mul_f32_e32 v9, v19, v19 ; 10122713 v_mul_f32_e32 v10, v19, v9 ; 10141313 v_mul_f32_e32 v9, v10, v9 ; 1012130A v_add_f32_e32 v7, -1.0, v7 ; 060E0EF3 v_mad_f32 v9, v7, v9, 1.0 ; D2820009 03CA1307 v_mad_f32 v7, v7, v13, 1.0 ; D2820007 03CA1B07 v_mul_f32_e32 v7, v7, v9 ; 100E1307 v_mul_f32_e32 v8, v23, v8 ; 10101117 v_mul_f32_e32 v8, s6, v8 ; 10101006 v_mul_f32_e32 v7, v23, v7 ; 100E0F17 v_mac_f32_e32 v5, v7, v22 ; 3E0A2D07 v_mul_f32_e32 v5, v5, v20 ; 100A2905 v_max_f32_e32 v8, 0, v8 ; 20101080 v_mul_f32_e32 v9, v22, v8 ; 10121116 v_mac_f32_e32 v5, v6, v9 ; 3E0A1306 v_mac_f32_e32 v2, v7, v21 ; 3E042B07 v_mac_f32_e32 v3, v7, v16 ; 3E062107 v_mul_f32_e32 v6, v21, v8 ; 100C1115 v_mul_f32_e32 v7, v16, v8 ; 100E1110 v_mul_f32_e32 v2, v2, v15 ; 10041F02 v_mul_f32_e32 v3, v3, v14 ; 10061D03 v_mac_f32_e32 v2, v1, v6 ; 3E040D01 v_mac_f32_e32 v3, v0, v7 ; 3E060F00 v_mac_f32_e32 v5, v26, v4 ; 3E0A091A v_mac_f32_e32 v2, v27, v18 ; 3E04251B v_mac_f32_e32 v3, v25, v17 ; 3E062319 v_add_f32_e64 v0, 0, v12 clamp ; D2060800 00021880 v_sub_f32_e32 v1, 1.0, v0 ; 080200F2 v_mul_f32_e32 v4, s5, v1 ; 10080205 v_mac_f32_e32 v4, v5, v0 ; 3E080105 v_mul_f32_e32 v5, s4, v1 ; 100A0204 v_mac_f32_e32 v5, v2, v0 ; 3E0A0102 v_mul_f32_e32 v1, s1, v1 ; 10020201 v_mac_f32_e32 v1, v3, v0 ; 3E020103 v_cvt_pkrtz_f16_f32_e32 v0, v4, v5 ; 5E000B04 v_cvt_pkrtz_f16_f32_e64 v1, v1, 1.0 ; D25E0001 0001E501 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 64 VGPRS: 40 Code Size: 2380 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[0..7] DCL CONST[9..12] DCL TEMP[0..6], LOCAL IMM[0] FLT32 { 0.2500, -1.0000, 10.0000, 0.4999} IMM[1] INT32 {256, 0, 1, 2} IMM[2] FLT32 { 1.0000, 0.0000, 0.1000, 0.0039} IMM[3] FLT32 { 16.0000, -8.0000, 4.0000, -2.0000} IMM[4] INT32 {4, 0, 0, 0} 0: MUL TEMP[0].x, IN[2].xxxx, IMM[0].xxxx 1: F2I TEMP[0].x, TEMP[0].xxxx 2: F2I TEMP[1].x, IN[2].yyyy 3: IDIV TEMP[2].x, TEMP[1].xxxx, IMM[1].xxxx 4: I2F TEMP[3].x, TEMP[0].xxxx 5: I2F TEMP[4].x, TEMP[2].xxxx 6: MOV TEMP[3].y, TEMP[4].xxxx 7: UMUL TEMP[2].x, TEMP[2].xxxx, IMM[1].xxxx 8: INEG TEMP[2].x, TEMP[2].xxxx 9: UADD TEMP[2].x, TEMP[1].xxxx, TEMP[2].xxxx 10: I2F TEMP[2].x, TEMP[2].xxxx 11: MOV TEMP[3].z, TEMP[2].xxxx 12: ADD TEMP[2].xyz, TEMP[3].xyzz, IMM[0].yyyy 13: I2F TEMP[1].x, TEMP[1].xxxx 14: ADD TEMP[1].x, IN[2].yyyy, -TEMP[1].xxxx 15: MAD TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz, IMM[0].wwww 16: F2I TEMP[1].x, TEMP[1].xxxx 17: USEQ TEMP[4].x, TEMP[1].xxxx, IMM[1].yyyy 18: AND TEMP[4].x, TEMP[4].xxxx, IMM[2].xxxx 19: USEQ TEMP[5].x, TEMP[1].xxxx, IMM[1].zzzz 20: AND TEMP[5].x, TEMP[5].xxxx, IMM[2].xxxx 21: MOV TEMP[4].y, TEMP[5].xxxx 22: USEQ TEMP[1].x, TEMP[1].xxxx, IMM[1].wwww 23: AND TEMP[1].x, TEMP[1].xxxx, IMM[2].xxxx 24: MOV TEMP[4].z, TEMP[1].xxxx 25: MOV TEMP[1].xyz, TEMP[4].xyzx 26: MOV TEMP[4].w, IMM[2].yyyy 27: MOV TEMP[4].xyz, TEMP[3].xyzx 28: MOV TEMP[3].y, IMM[2].yzyy 29: DP4 TEMP[4].x, TEMP[1], TEMP[4] 30: MUL TEMP[3].x, TEMP[4].xxxx, IMM[2].wwww 31: MOV TEMP[3].xy, TEMP[3].xyyy 32: MOV TEMP[3].w, IMM[2].yyyy 33: TXL TEMP[3].xy, TEMP[3], SAMP[0], 2D 34: MAD TEMP[4].x, TEMP[3].xxxx, IMM[3].xxxx, IMM[3].yyyy 35: MOV TEMP[2].w, TEMP[4].xxxx 36: MUL TEMP[3].x, TEMP[3].yyyy, IMM[3].zzzz 37: MOV TEMP[1].w, TEMP[3].xxxx 38: UMUL TEMP[0].x, IMM[4].xxxx, TEMP[0].xxxx 39: I2F TEMP[0].x, TEMP[0].xxxx 40: ADD TEMP[0].x, IN[2].xxxx, -TEMP[0].xxxx 41: ADD TEMP[0].x, TEMP[0].xxxx, IMM[3].wwww 42: MUL TEMP[3], CONST[9], IN[0].xxxx 43: MAD TEMP[3], CONST[10], IN[0].yyyy, TEMP[3] 44: MAD TEMP[3], CONST[11], IN[0].zzzz, TEMP[3] 45: MAD TEMP[3], CONST[12], IN[0].wwww, TEMP[3] 46: MOV TEMP[4].x, CONST[4].xxxx 47: MOV TEMP[4].y, CONST[5].xxxx 48: MOV TEMP[4].z, CONST[6].xxxx 49: MOV TEMP[5].x, CONST[4].yyyy 50: MOV TEMP[5].y, CONST[5].yyyy 51: MOV TEMP[5].z, CONST[6].yyyy 52: MOV TEMP[6].x, CONST[4].zzzz 53: MOV TEMP[6].y, CONST[5].zzzz 54: MOV TEMP[6].z, CONST[6].zzzz 55: MUL TEMP[4].xyz, TEMP[4].xyzz, IN[1].xxxx 56: MAD TEMP[4].xyz, TEMP[5].xyzz, IN[1].yyyy, TEMP[4].xyzz 57: MAD TEMP[0].xyz, TEMP[6].xyzz, TEMP[0].xxxx, TEMP[4].xyzz 58: DP3 TEMP[4].x, TEMP[0].xyzz, TEMP[0].xyzz 59: RSQ TEMP[4].x, TEMP[4].xxxx 60: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[4].xxxx 61: MOV TEMP[0].yzw, TEMP[0].yxyz 62: MUL TEMP[4], CONST[0], IN[0].xxxx 63: MAD TEMP[4], CONST[1], IN[0].yyyy, TEMP[4] 64: MAD TEMP[4], CONST[2], IN[0].zzzz, TEMP[4] 65: MAD TEMP[4].xyz, CONST[3], IN[0].wwww, TEMP[4] 66: MOV TEMP[4].xyz, TEMP[4].xyzx 67: MOV TEMP[0].x, TEMP[3].zzzz 68: MOV OUT[1], TEMP[2] 69: MOV OUT[4], TEMP[4] 70: MOV OUT[2], TEMP[1] 71: MOV OUT[3], TEMP[0] 72: MOV OUT[0], TEMP[3] 73: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 156) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 172) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 204) %53 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %54 = load <32 x i8>, <32 x i8> addrspace(2)* %53, align 32, !tbaa !0 %55 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %56 = load <16 x i8>, <16 x i8> addrspace(2)* %55, align 16, !tbaa !0 %57 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %58 = load <16 x i8>, <16 x i8> addrspace(2)* %57, align 16, !tbaa !0 %59 = add i32 %5, %7 %60 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %58, i32 0, i32 %59) %61 = extractelement <4 x float> %60, i32 0 %62 = extractelement <4 x float> %60, i32 1 %63 = extractelement <4 x float> %60, i32 2 %64 = extractelement <4 x float> %60, i32 3 %65 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %66 = load <16 x i8>, <16 x i8> addrspace(2)* %65, align 16, !tbaa !0 %67 = add i32 %5, %7 %68 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %66, i32 0, i32 %67) %69 = extractelement <4 x float> %68, i32 0 %70 = extractelement <4 x float> %68, i32 1 %71 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %72 = load <16 x i8>, <16 x i8> addrspace(2)* %71, align 16, !tbaa !0 %73 = add i32 %5, %7 %74 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %72, i32 0, i32 %73) %75 = extractelement <4 x float> %74, i32 0 %76 = extractelement <4 x float> %74, i32 1 %77 = fmul float %75, 2.500000e-01 %78 = fptosi float %77 to i32 %79 = fptosi float %76 to i32 %80 = sdiv i32 %79, 256 %81 = sitofp i32 %78 to float %82 = sitofp i32 %80 to float %83 = shl nsw i32 %80, 8 %84 = sub i32 %79, %83 %85 = sitofp i32 %84 to float %86 = fadd float %81, -1.000000e+00 %87 = fadd float %82, -1.000000e+00 %88 = fadd float %85, -1.000000e+00 %89 = sitofp i32 %79 to float %90 = fsub float %76, %89 %91 = fmul float %90, 1.000000e+01 %92 = fadd float %91, 0x3FDFFE5CA0000000 %93 = fptosi float %92 to i32 %94 = icmp eq i32 %93, 0 %95 = select i1 %94, float 1.000000e+00, float 0.000000e+00 %96 = icmp eq i32 %93, 1 %97 = select i1 %96, float 1.000000e+00, float 0.000000e+00 %98 = icmp eq i32 %93, 2 %99 = select i1 %98, float 1.000000e+00, float 0.000000e+00 %100 = fmul float %95, %81 %101 = fmul float %97, %82 %102 = fadd float %100, %101 %103 = fmul float %99, %85 %104 = fadd float %102, %103 %105 = fadd float %104, 0.000000e+00 %106 = fmul float %105, 0x3F70101020000000 %107 = bitcast float %106 to i32 %108 = insertelement <4 x i32> undef, i32 %107, i32 0 %109 = insertelement <4 x i32> %108, i32 1036831949, i32 1 %110 = insertelement <4 x i32> %109, i32 0, i32 2 %111 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %110, <32 x i8> %54, <16 x i8> %56, i32 2) %112 = extractelement <4 x float> %111, i32 0 %113 = extractelement <4 x float> %111, i32 1 %114 = fmul float %112, 1.600000e+01 %115 = fadd float %114, -8.000000e+00 %116 = fmul float %113, 4.000000e+00 %117 = shl i32 %78, 2 %118 = sitofp i32 %117 to float %119 = fsub float %75, %118 %120 = fadd float %119, -2.000000e+00 %121 = fmul float %37, %61 %122 = fmul float %38, %61 %123 = fmul float %39, %61 %124 = fmul float %40, %61 %125 = fmul float %41, %62 %126 = fadd float %125, %121 %127 = fmul float %42, %62 %128 = fadd float %127, %122 %129 = fmul float %43, %62 %130 = fadd float %129, %123 %131 = fmul float %44, %62 %132 = fadd float %131, %124 %133 = fmul float %45, %63 %134 = fadd float %133, %126 %135 = fmul float %46, %63 %136 = fadd float %135, %128 %137 = fmul float %47, %63 %138 = fadd float %137, %130 %139 = fmul float %48, %63 %140 = fadd float %139, %132 %141 = fmul float %49, %64 %142 = fadd float %141, %134 %143 = fmul float %50, %64 %144 = fadd float %143, %136 %145 = fmul float %51, %64 %146 = fadd float %145, %138 %147 = fmul float %52, %64 %148 = fadd float %147, %140 %149 = fmul float %28, %69 %150 = fmul float %31, %69 %151 = fmul float %34, %69 %152 = fmul float %29, %70 %153 = fadd float %152, %149 %154 = fmul float %32, %70 %155 = fadd float %154, %150 %156 = fmul float %35, %70 %157 = fadd float %156, %151 %158 = fmul float %30, %120 %159 = fadd float %158, %153 %160 = fmul float %33, %120 %161 = fadd float %160, %155 %162 = fmul float %36, %120 %163 = fadd float %162, %157 %164 = fmul float %159, %159 %165 = fmul float %161, %161 %166 = fadd float %165, %164 %167 = fmul float %163, %163 %168 = fadd float %166, %167 %169 = call float @llvm.AMDGPU.rsq.clamped.f32(float %168) %170 = fmul float %159, %169 %171 = fmul float %161, %169 %172 = fmul float %163, %169 %173 = fmul float %13, %61 %174 = fmul float %14, %61 %175 = fmul float %15, %61 %176 = fmul float %16, %61 %177 = fmul float %17, %62 %178 = fadd float %177, %173 %179 = fmul float %18, %62 %180 = fadd float %179, %174 %181 = fmul float %19, %62 %182 = fadd float %181, %175 %183 = fmul float %20, %62 %184 = fadd float %183, %176 %185 = fmul float %21, %63 %186 = fadd float %185, %178 %187 = fmul float %22, %63 %188 = fadd float %187, %180 %189 = fmul float %23, %63 %190 = fadd float %189, %182 %191 = fmul float %24, %63 %192 = fadd float %191, %184 %193 = fmul float %25, %64 %194 = fadd float %193, %186 %195 = fmul float %26, %64 %196 = fadd float %195, %188 %197 = fmul float %27, %64 %198 = fadd float %197, %190 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %86, float %87, float %88, float %115) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %95, float %97, float %99, float %116) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %146, float %170, float %171, float %172) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %194, float %196, float %198, float %192) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %142, float %144, float %146, float %148) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0x3efff2e5 ; 7E0202FF 3EFFF2E5 v_mov_b32_e32 v2, 0xc1000000 ; 7E0402FF C1000000 v_mov_b32_e32 v5, 0 ; 7E0A0280 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500 s_load_dwordx4 s[16:19], s[8:9], 0x0 ; C0880900 s_load_dwordx4 s[20:23], s[8:9], 0x4 ; C08A0904 s_load_dwordx4 s[24:27], s[8:9], 0x8 ; C08C0908 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s28, s[0:3], 0x10 ; C20E0110 s_buffer_load_dword s29, s[0:3], 0x11 ; C20E8111 buffer_load_format_xyzw v[6:9], v0, s[16:19], 0 idxen ; E00C2000 80040600 buffer_load_format_xyzw v[10:13], v0, s[20:23], 0 idxen ; E00C2000 80050A00 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[12:15], v0, s[24:27], 0 idxen ; E00C2000 80060C00 s_buffer_load_dword s16, s[0:3], 0x12 ; C2080112 s_buffer_load_dword s17, s[0:3], 0x14 ; C2088114 s_buffer_load_dword s18, s[0:3], 0x15 ; C2090115 s_buffer_load_dword s19, s[0:3], 0x16 ; C2098116 s_buffer_load_dword s20, s[0:3], 0x18 ; C20A0118 s_buffer_load_dword s21, s[0:3], 0x19 ; C20A8119 s_buffer_load_dword s22, s[0:3], 0x1a ; C20B011A s_buffer_load_dword s23, s[0:3], 0x24 ; C20B8124 s_buffer_load_dword s24, s[0:3], 0x25 ; C20C0125 s_buffer_load_dword s25, s[0:3], 0x26 ; C20C8126 s_buffer_load_dword s26, s[0:3], 0x27 ; C20D0127 s_buffer_load_dword s27, s[0:3], 0x28 ; C20D8128 s_buffer_load_dword s30, s[0:3], 0x29 ; C20F0129 s_buffer_load_dword s31, s[0:3], 0x0 ; C20F8100 s_buffer_load_dword s32, s[0:3], 0x1 ; C2100101 s_buffer_load_dword s33, s[0:3], 0x2 ; C2108102 s_buffer_load_dword s34, s[0:3], 0x3 ; C2110103 s_buffer_load_dword s35, s[0:3], 0x4 ; C2118104 s_buffer_load_dword s36, s[0:3], 0x2a ; C212012A s_buffer_load_dword s37, s[0:3], 0x2b ; C212812B s_buffer_load_dword s38, s[0:3], 0x2c ; C213012C s_buffer_load_dword s39, s[0:3], 0x2d ; C213812D s_buffer_load_dword s40, s[0:3], 0x2e ; C214012E s_buffer_load_dword s41, s[0:3], 0x5 ; C2148105 s_buffer_load_dword s42, s[0:3], 0x6 ; C2150106 s_buffer_load_dword s43, s[0:3], 0x7 ; C2158107 s_buffer_load_dword s44, s[0:3], 0x8 ; C2160108 s_buffer_load_dword s45, s[0:3], 0x9 ; C2168109 s_buffer_load_dword s46, s[0:3], 0xa ; C217010A s_buffer_load_dword s47, s[0:3], 0xb ; C217810B s_buffer_load_dword s48, s[0:3], 0xc ; C218010C s_buffer_load_dword s49, s[0:3], 0xd ; C218810D s_buffer_load_dword s50, s[0:3], 0xe ; C219010E s_buffer_load_dword s51, s[0:3], 0x2f ; C219812F s_buffer_load_dword s52, s[0:3], 0x30 ; C21A0130 s_buffer_load_dword s53, s[0:3], 0x31 ; C21A8131 s_buffer_load_dword s54, s[0:3], 0x32 ; C21B0132 s_buffer_load_dword s0, s[0:3], 0x33 ; C2000133 v_mul_f32_e32 v0, s28, v10 ; 1000141C s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v14, s17, v10 ; 101C1411 v_mul_f32_e32 v10, s20, v10 ; 10141414 v_mac_f32_e32 v0, s29, v11 ; 3E00161D v_mac_f32_e32 v14, s18, v11 ; 3E1C1612 v_mac_f32_e32 v10, s21, v11 ; 3E141615 v_mul_f32_e32 v11, s23, v6 ; 10160C17 v_mac_f32_e32 v11, s27, v7 ; 3E160E1B v_mul_f32_e32 v15, s24, v6 ; 101E0C18 v_mac_f32_e32 v15, s30, v7 ; 3E1E0E1E v_mul_f32_e32 v16, s25, v6 ; 10200C19 v_mac_f32_e32 v16, s36, v7 ; 3E200E24 v_mul_f32_e32 v17, s26, v6 ; 10220C1A v_mac_f32_e32 v17, s37, v7 ; 3E220E25 v_mul_f32_e32 v18, s31, v6 ; 10240C1F v_mac_f32_e32 v18, s35, v7 ; 3E240E23 v_mul_f32_e32 v19, s32, v6 ; 10260C20 v_mac_f32_e32 v19, s41, v7 ; 3E260E29 v_mul_f32_e32 v20, s33, v6 ; 10280C21 v_mac_f32_e32 v20, s42, v7 ; 3E280E2A v_mul_f32_e32 v6, s34, v6 ; 100C0C22 v_mac_f32_e32 v6, s43, v7 ; 3E0C0E2B v_mac_f32_e32 v11, s38, v8 ; 3E161026 v_mac_f32_e32 v15, s39, v8 ; 3E1E1027 v_mac_f32_e32 v16, s40, v8 ; 3E201028 v_mac_f32_e32 v17, s51, v8 ; 3E221033 v_mac_f32_e32 v18, s44, v8 ; 3E24102C v_mac_f32_e32 v19, s45, v8 ; 3E26102D v_mac_f32_e32 v20, s46, v8 ; 3E28102E v_mac_f32_e32 v6, s47, v8 ; 3E0C102F v_mac_f32_e32 v11, s52, v9 ; 3E161234 v_mac_f32_e32 v15, s53, v9 ; 3E1E1235 v_mac_f32_e32 v16, s54, v9 ; 3E201236 v_mul_f32_e32 v3, 0x3e800000, v12 ; 100618FF 3E800000 v_cvt_i32_f32_e32 v4, v13 ; 7E08110D v_cvt_i32_f32_e32 v3, v3 ; 7E061103 v_mac_f32_e32 v17, s0, v9 ; 3E221200 v_mac_f32_e32 v18, s48, v9 ; 3E241230 v_cvt_f32_i32_e32 v7, v4 ; 7E0E0B04 v_lshlrev_b32_e32 v8, 2, v3 ; 34100682 v_cvt_f32_i32_e32 v8, v8 ; 7E100B08 v_mac_f32_e32 v19, s49, v9 ; 3E261231 v_mac_f32_e32 v20, s50, v9 ; 3E281232 v_subrev_f32_e32 v7, v7, v13 ; 0A0E1B07 v_subrev_f32_e32 v8, v8, v12 ; 0A101908 v_madmk_f32_e32 v1, v7, v1, 0x41200000 ; 40020307 41200000 v_ashrrev_i32_e32 v7, 31, v4 ; 300E089F v_lshrrev_b32_e32 v7, 24, v7 ; 2C0E0E98 v_cvt_i32_f32_e32 v1, v1 ; 7E021101 v_add_i32_e32 v7, v4, v7 ; 4A0E0F04 v_and_b32_e32 v9, 0xffffff00, v7 ; 36120EFF FFFFFF00 v_sub_i32_e32 v4, v4, v9 ; 4C081304 v_cmp_eq_i32_e32 vcc, 0, v1 ; 7D040280 v_cndmask_b32_e64 v9, 0, 1.0, vcc ; D2000009 01A9E480 v_cvt_f32_i32_e32 v12, v3 ; 7E180B03 v_ashrrev_i32_e32 v3, 8, v7 ; 30060E88 v_cvt_f32_i32_e32 v7, v3 ; 7E0E0B03 v_cmp_eq_i32_e32 vcc, 1, v1 ; 7D040281 v_cmp_eq_i32_e64 s[0:1], 2, v1 ; D1040000 00020282 v_cndmask_b32_e64 v1, 0, 1.0, vcc ; D2000001 01A9E480 v_cvt_f32_i32_e32 v13, v4 ; 7E1A0B04 v_mul_f32_e32 v3, v7, v1 ; 10060307 v_mac_f32_e32 v3, v12, v9 ; 3E06130C v_cndmask_b32_e64 v21, 0, 1.0, s[0:1] ; D2000015 0001E480 v_mac_f32_e32 v3, v13, v21 ; 3E062B0D v_add_f32_e32 v3, 0, v3 ; 06060680 v_mul_f32_e32 v3, 0x3b808081, v3 ; 100606FF 3B808081 v_mov_b32_e32 v4, 0x3dcccccd ; 7E0802FF 3DCCCCCD image_sample_l v[3:4], 3, 0, 0, 0, 0, 0, 0, 0, v[3:6], s[4:11], s[12:15] ; F0900300 00610303 v_add_f32_e32 v5, -2.0, v8 ; 060A10F5 v_mac_f32_e32 v0, s16, v5 ; 3E000A10 v_mac_f32_e32 v14, s19, v5 ; 3E1C0A13 v_mac_f32_e32 v10, s22, v5 ; 3E140A16 v_add_f32_e32 v5, -1.0, v12 ; 060A18F3 v_add_f32_e32 v7, -1.0, v7 ; 060E0EF3 v_mul_f32_e32 v8, v0, v0 ; 10100100 v_mac_f32_e32 v8, v14, v14 ; 3E101D0E v_mac_f32_e32 v8, v10, v10 ; 3E10150A v_rsq_clamp_f32_e32 v8, v8 ; 7E105908 v_add_f32_e32 v12, -1.0, v13 ; 06181AF3 s_waitcnt vmcnt(0) ; BF8C0770 v_madmk_f32_e32 v2, v3, v2, 0x41800000 ; 40040503 41800000 exp 15, 32, 0, 0, 0, v5, v7, v12, v2 ; F800020F 020C0705 v_mul_f32_e32 v0, v8, v0 ; 10000108 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v2, v8, v14 ; 10041D08 v_mul_f32_e32 v3, 4.0, v4 ; 100608F6 v_mul_f32_e32 v4, v8, v10 ; 10081508 exp 15, 33, 0, 0, 0, v9, v1, v21, v3 ; F800021F 03150109 exp 15, 34, 0, 0, 0, v16, v0, v2, v4 ; F800022F 04020010 exp 15, 35, 0, 0, 0, v18, v19, v20, v6 ; F800023F 06141312 exp 15, 12, 0, 1, 0, v11, v15, v16, v17 ; F80008CF 11100F0B s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 64 VGPRS: 24 Code Size: 680 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL SAMP[7] DCL SAMP[8] DCL SAMP[9] DCL SAMP[10] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL SVIEW[4], 2D, FLOAT DCL SVIEW[5], 2D, FLOAT DCL SVIEW[6], 2D, FLOAT DCL SVIEW[7], 2D, FLOAT DCL SVIEW[8], 2D, FLOAT DCL SVIEW[9], 2D, FLOAT DCL SVIEW[10], 2D, FLOAT DCL CONST[0..3] DCL CONST[5..8] DCL CONST[19..27] DCL TEMP[0..36], LOCAL IMM[0] FLT32 { -0.2000, 7.0000, 0.0100, 0.5000} IMM[1] FLT32 { 64.0000, -64.0000, 4.0000, 0.6931} IMM[2] FLT32 { 0.0039, 0.0020, 1.0000, 2.0000} IMM[3] FLT32 { 3.0000, 0.0000, -1.0000, 0.0001} IMM[4] FLT32 { 32.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xyz, -IN[3].xyzx 1: ADD TEMP[1].xyz, CONST[1].xyzz, TEMP[0].xyzz 2: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz 3: RSQ TEMP[2].x, TEMP[2].xxxx 4: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx 5: ADD TEMP[0].xyz, CONST[0].xyzz, TEMP[0].xyzz 6: DP3 TEMP[2].x, TEMP[0].xyzz, TEMP[0].xyzz 7: RSQ TEMP[2].x, TEMP[2].xxxx 8: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[2].xxxx 9: ABS TEMP[2].xyz, IN[2].yzww 10: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz 11: RSQ TEMP[3].x, TEMP[3].xxxx 12: MAD TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx, IMM[0].xxxx 13: MUL TEMP[2].xyz, TEMP[2].xyzz, IMM[0].yyyy 14: MAX TEMP[2].xyz, TEMP[2].xyzz, IMM[0].zzzz 15: ADD TEMP[3].x, TEMP[2].xxxx, TEMP[2].yyyy 16: ADD TEMP[3].x, TEMP[3].xxxx, TEMP[2].zzzz 17: RCP TEMP[3].xyz, TEMP[3].xxxx 18: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xyzz 19: ADD TEMP[3], IN[0], IMM[0].wwww 20: FLR TEMP[3].xyz, TEMP[3] 21: MOV TEMP[4].x, CONST[19].xxxx 22: MUL TEMP[5].x, TEMP[3].xxxx, CONST[19].xxxx 23: MOV TEMP[6].x, TEMP[5].xxxx 24: FLR TEMP[5].x, TEMP[5].xxxx 25: MUL TEMP[5].x, TEMP[5].xxxx, CONST[19].xxxx 26: FSGE TEMP[7].x, TEMP[3].xxxx, IMM[1].xxxx 27: UIF TEMP[7].xxxx :0 28: MOV TEMP[4].x, CONST[20].xxxx 29: ADD TEMP[7].x, TEMP[3].xxxx, IMM[1].yyyy 30: MUL TEMP[7].x, TEMP[7].xxxx, CONST[20].xxxx 31: MOV TEMP[6].x, TEMP[7].xxxx 32: FLR TEMP[8].x, TEMP[7].xxxx 33: MUL TEMP[8].x, TEMP[8].xxxx, CONST[20].xxxx 34: MOV TEMP[5].x, TEMP[8].xxxx 35: FRC TEMP[7].x, TEMP[7].xxxx 36: FRC TEMP[9].x, TEMP[8].xxxx 37: MOV TEMP[7].y, TEMP[9].xxxx 38: FLR TEMP[8].x, TEMP[8].xxxx 39: ADD TEMP[8].x, TEMP[8].xxxx, IMM[1].zzzz 40: MOV TEMP[7].z, TEMP[8].xxxx 41: MOV TEMP[7].xyz, TEMP[7].xyzx 42: ELSE :0 43: FRC TEMP[6].x, TEMP[6].xxxx 44: FRC TEMP[8].x, TEMP[5].xxxx 45: MOV TEMP[6].y, TEMP[8].xxxx 46: FLR TEMP[5].x, TEMP[5].xxxx 47: MOV TEMP[6].z, TEMP[5].xxxx 48: MOV TEMP[7].xyz, TEMP[6].xyzx 49: ENDIF 50: MOV TEMP[5].x, CONST[19].xxxx 51: MUL TEMP[6].x, TEMP[3].yyyy, CONST[19].xxxx 52: MOV TEMP[8].x, TEMP[6].xxxx 53: FLR TEMP[6].x, TEMP[6].xxxx 54: MUL TEMP[6].x, TEMP[6].xxxx, CONST[19].xxxx 55: FSGE TEMP[9].x, TEMP[3].yyyy, IMM[1].xxxx 56: UIF TEMP[9].xxxx :0 57: MOV TEMP[5].x, CONST[20].xxxx 58: ADD TEMP[9].x, TEMP[3].yyyy, IMM[1].yyyy 59: MUL TEMP[9].x, TEMP[9].xxxx, CONST[20].xxxx 60: MOV TEMP[8].x, TEMP[9].xxxx 61: FLR TEMP[10].x, TEMP[9].xxxx 62: MUL TEMP[10].x, TEMP[10].xxxx, CONST[20].xxxx 63: MOV TEMP[6].x, TEMP[10].xxxx 64: FRC TEMP[9].x, TEMP[9].xxxx 65: FRC TEMP[11].x, TEMP[10].xxxx 66: MOV TEMP[9].y, TEMP[11].xxxx 67: FLR TEMP[10].x, TEMP[10].xxxx 68: ADD TEMP[10].x, TEMP[10].xxxx, IMM[1].zzzz 69: MOV TEMP[9].z, TEMP[10].xxxx 70: MOV TEMP[9].xyz, TEMP[9].xyzx 71: ELSE :0 72: FRC TEMP[8].x, TEMP[8].xxxx 73: FRC TEMP[10].x, TEMP[6].xxxx 74: MOV TEMP[8].y, TEMP[10].xxxx 75: FLR TEMP[6].x, TEMP[6].xxxx 76: MOV TEMP[8].z, TEMP[6].xxxx 77: MOV TEMP[9].xyz, TEMP[8].xyzx 78: ENDIF 79: MOV TEMP[6].x, CONST[19].xxxx 80: MUL TEMP[8].x, TEMP[3].zzzz, CONST[19].xxxx 81: MOV TEMP[10].x, TEMP[8].xxxx 82: FLR TEMP[8].x, TEMP[8].xxxx 83: MUL TEMP[8].x, TEMP[8].xxxx, CONST[19].xxxx 84: FSGE TEMP[11].x, TEMP[3].zzzz, IMM[1].xxxx 85: UIF TEMP[11].xxxx :0 86: MOV TEMP[6].x, CONST[20].xxxx 87: ADD TEMP[3].x, TEMP[3].zzzz, IMM[1].yyyy 88: MUL TEMP[3].x, TEMP[3].xxxx, CONST[20].xxxx 89: MOV TEMP[10].x, TEMP[3].xxxx 90: FLR TEMP[11].x, TEMP[3].xxxx 91: MUL TEMP[11].x, TEMP[11].xxxx, CONST[20].xxxx 92: MOV TEMP[8].x, TEMP[11].xxxx 93: FRC TEMP[3].x, TEMP[3].xxxx 94: FRC TEMP[12].x, TEMP[11].xxxx 95: MOV TEMP[3].y, TEMP[12].xxxx 96: FLR TEMP[11].x, TEMP[11].xxxx 97: ADD TEMP[11].x, TEMP[11].xxxx, IMM[1].zzzz 98: MOV TEMP[3].z, TEMP[11].xxxx 99: MOV TEMP[3].xyz, TEMP[3].xyzx 100: ELSE :0 101: FRC TEMP[10].x, TEMP[10].xxxx 102: FRC TEMP[11].x, TEMP[8].xxxx 103: MOV TEMP[10].y, TEMP[11].xxxx 104: FLR TEMP[8].x, TEMP[8].xxxx 105: MOV TEMP[10].z, TEMP[8].xxxx 106: MOV TEMP[3].xyz, TEMP[10].xyzx 107: ENDIF 108: ADD TEMP[8].xyz, IN[3].xyzz, -CONST[0].xyzz 109: DP3 TEMP[8].x, TEMP[8].xyzz, TEMP[8].xyzz 110: MUL TEMP[8].x, CONST[25].xxxx, TEMP[8].xxxx 111: LG2 TEMP[8].x, TEMP[8].xxxx 112: MUL TEMP[8].x, TEMP[8].xxxx, IMM[1].wwww 113: MUL TEMP[8].x, TEMP[8].xxxx, CONST[24].xxxx 114: MOV TEMP[10].xy, IN[3].xyxx 115: MOV TEMP[11].x, IMM[2].xxxx 116: FSNE TEMP[12].x, CONST[19].xxxx, TEMP[4].xxxx 117: UIF TEMP[12].xxxx :0 118: MOV TEMP[11].x, IMM[2].yyyy 119: RCP TEMP[12].x, CONST[22].xxxx 120: MUL TEMP[10].xy, IN[3].xyyy, TEMP[12].xxxx 121: ELSE :0 122: RCP TEMP[12].x, CONST[21].xxxx 123: MUL TEMP[10].xy, TEMP[10].xyyy, TEMP[12].xxxx 124: ENDIF 125: FRC TEMP[10].xy, TEMP[10].xyyy 126: MUL TEMP[12].x, CONST[23].xxxx, IMM[2].wwww 127: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[11].xxxx 128: ADD TEMP[12].x, IMM[2].zzzz, -TEMP[12].xxxx 129: MUL TEMP[11].x, TEMP[11].xxxx, CONST[23].xxxx 130: MAD TEMP[10].xy, TEMP[10].xyyy, TEMP[12].xxxx, TEMP[11].xxxx 131: MAD TEMP[10].xy, TEMP[10].xyyy, TEMP[4].xxxx, TEMP[7].xyyy 132: MOV TEMP[11].xy, TEMP[10].xyyy 133: MOV TEMP[11].w, TEMP[8].xxxx 134: TXL TEMP[11], TEMP[11], SAMP[9], 2D 135: FSEQ TEMP[12].x, TEMP[7].zzzz, IMM[1].zzzz 136: AND TEMP[12].x, TEMP[12].xxxx, IMM[2].zzzz 137: MOV TEMP[13].xy, TEMP[10].xyyy 138: MOV TEMP[13].w, TEMP[8].xxxx 139: TXL TEMP[13], TEMP[13], SAMP[7], 2D 140: FSEQ TEMP[14].x, TEMP[7].zzzz, IMM[3].xxxx 141: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz 142: MOV TEMP[15].xy, TEMP[10].xyyy 143: MOV TEMP[15].w, TEMP[8].xxxx 144: TXL TEMP[15], TEMP[15], SAMP[5], 2D 145: FSEQ TEMP[16].x, TEMP[7].zzzz, IMM[2].wwww 146: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz 147: MOV TEMP[17].xy, TEMP[10].xyyy 148: MOV TEMP[17].w, TEMP[8].xxxx 149: TXL TEMP[17], TEMP[17], SAMP[3], 2D 150: FSEQ TEMP[18].x, TEMP[7].zzzz, IMM[2].zzzz 151: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz 152: MOV TEMP[10].xy, TEMP[10].xyyy 153: MOV TEMP[10].w, TEMP[8].xxxx 154: TXL TEMP[10], TEMP[10], SAMP[1], 2D 155: FSEQ TEMP[19].x, TEMP[7].zzzz, IMM[3].yyyy 156: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz 157: MUL TEMP[10], TEMP[10], TEMP[19].xxxx 158: MAD TEMP[10], TEMP[17], TEMP[18].xxxx, TEMP[10] 159: MAD TEMP[10], TEMP[15], TEMP[16].xxxx, TEMP[10] 160: MAD TEMP[10], TEMP[13], TEMP[14].xxxx, TEMP[10] 161: MAD TEMP[10], TEMP[11], TEMP[12].xxxx, TEMP[10] 162: MOV TEMP[11].xy, IN[3].zyzz 163: MOV TEMP[12].x, IMM[2].xxxx 164: FSNE TEMP[13].x, CONST[19].xxxx, TEMP[4].xxxx 165: UIF TEMP[13].xxxx :0 166: MOV TEMP[12].x, IMM[2].yyyy 167: RCP TEMP[13].x, CONST[22].xxxx 168: MUL TEMP[11].xy, IN[3].zyyy, TEMP[13].xxxx 169: ELSE :0 170: RCP TEMP[13].x, CONST[21].xxxx 171: MUL TEMP[11].xy, TEMP[11].xyyy, TEMP[13].xxxx 172: ENDIF 173: FRC TEMP[11].xy, TEMP[11].xyyy 174: MUL TEMP[13].x, CONST[23].xxxx, IMM[2].wwww 175: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[12].xxxx 176: ADD TEMP[13].x, IMM[2].zzzz, -TEMP[13].xxxx 177: MUL TEMP[12].x, TEMP[12].xxxx, CONST[23].xxxx 178: MAD TEMP[11].xy, TEMP[11].xyyy, TEMP[13].xxxx, TEMP[12].xxxx 179: MAD TEMP[11].xy, TEMP[11].xyyy, TEMP[4].xxxx, TEMP[7].xyyy 180: MOV TEMP[12].xy, TEMP[11].xyyy 181: MOV TEMP[12].w, TEMP[8].xxxx 182: TXL TEMP[12], TEMP[12], SAMP[9], 2D 183: FSEQ TEMP[13].x, TEMP[7].zzzz, IMM[1].zzzz 184: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz 185: MOV TEMP[14].xy, TEMP[11].xyyy 186: MOV TEMP[14].w, TEMP[8].xxxx 187: TXL TEMP[14], TEMP[14], SAMP[7], 2D 188: FSEQ TEMP[15].x, TEMP[7].zzzz, IMM[3].xxxx 189: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz 190: MOV TEMP[16].xy, TEMP[11].xyyy 191: MOV TEMP[16].w, TEMP[8].xxxx 192: TXL TEMP[16], TEMP[16], SAMP[5], 2D 193: FSEQ TEMP[17].x, TEMP[7].zzzz, IMM[2].wwww 194: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz 195: MOV TEMP[18].xy, TEMP[11].xyyy 196: MOV TEMP[18].w, TEMP[8].xxxx 197: TXL TEMP[18], TEMP[18], SAMP[3], 2D 198: FSEQ TEMP[19].x, TEMP[7].zzzz, IMM[2].zzzz 199: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz 200: MOV TEMP[11].xy, TEMP[11].xyyy 201: MOV TEMP[11].w, TEMP[8].xxxx 202: TXL TEMP[11], TEMP[11], SAMP[1], 2D 203: FSEQ TEMP[20].x, TEMP[7].zzzz, IMM[3].yyyy 204: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz 205: MUL TEMP[11], TEMP[11], TEMP[20].xxxx 206: MAD TEMP[11], TEMP[18], TEMP[19].xxxx, TEMP[11] 207: MAD TEMP[11], TEMP[16], TEMP[17].xxxx, TEMP[11] 208: MAD TEMP[11], TEMP[14], TEMP[15].xxxx, TEMP[11] 209: MAD TEMP[11], TEMP[12], TEMP[13].xxxx, TEMP[11] 210: MOV TEMP[12].xy, IN[3].zxzz 211: MOV TEMP[13].x, IMM[2].xxxx 212: FSNE TEMP[14].x, CONST[19].xxxx, TEMP[4].xxxx 213: UIF TEMP[14].xxxx :0 214: MOV TEMP[13].x, IMM[2].yyyy 215: RCP TEMP[14].x, CONST[22].xxxx 216: MUL TEMP[12].xy, IN[3].zxxx, TEMP[14].xxxx 217: ELSE :0 218: RCP TEMP[14].x, CONST[21].xxxx 219: MUL TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx 220: ENDIF 221: FRC TEMP[12].xy, TEMP[12].xyyy 222: MUL TEMP[14].x, CONST[23].xxxx, IMM[2].wwww 223: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[13].xxxx 224: ADD TEMP[14].x, IMM[2].zzzz, -TEMP[14].xxxx 225: MUL TEMP[13].x, TEMP[13].xxxx, CONST[23].xxxx 226: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx, TEMP[13].xxxx 227: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[4].xxxx, TEMP[7].xyyy 228: MOV TEMP[13].xy, TEMP[12].xyyy 229: MOV TEMP[13].w, TEMP[8].xxxx 230: TXL TEMP[13], TEMP[13], SAMP[9], 2D 231: FSEQ TEMP[14].x, TEMP[7].zzzz, IMM[1].zzzz 232: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz 233: MOV TEMP[15].xy, TEMP[12].xyyy 234: MOV TEMP[15].w, TEMP[8].xxxx 235: TXL TEMP[15], TEMP[15], SAMP[7], 2D 236: FSEQ TEMP[16].x, TEMP[7].zzzz, IMM[3].xxxx 237: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz 238: MOV TEMP[17].xy, TEMP[12].xyyy 239: MOV TEMP[17].w, TEMP[8].xxxx 240: TXL TEMP[17], TEMP[17], SAMP[5], 2D 241: FSEQ TEMP[18].x, TEMP[7].zzzz, IMM[2].wwww 242: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz 243: MOV TEMP[19].xy, TEMP[12].xyyy 244: MOV TEMP[19].w, TEMP[8].xxxx 245: TXL TEMP[19], TEMP[19], SAMP[3], 2D 246: FSEQ TEMP[20].x, TEMP[7].zzzz, IMM[2].zzzz 247: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz 248: MOV TEMP[12].xy, TEMP[12].xyyy 249: MOV TEMP[12].w, TEMP[8].xxxx 250: TXL TEMP[12], TEMP[12], SAMP[1], 2D 251: FSEQ TEMP[21].x, TEMP[7].zzzz, IMM[3].yyyy 252: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz 253: MUL TEMP[12], TEMP[12], TEMP[21].xxxx 254: MAD TEMP[12], TEMP[19], TEMP[20].xxxx, TEMP[12] 255: MAD TEMP[12], TEMP[17], TEMP[18].xxxx, TEMP[12] 256: MAD TEMP[12], TEMP[15], TEMP[16].xxxx, TEMP[12] 257: MAD TEMP[12], TEMP[13], TEMP[14].xxxx, TEMP[12] 258: MOV TEMP[13].xy, IN[3].xyxx 259: MOV TEMP[14].x, IMM[2].xxxx 260: FSNE TEMP[15].x, CONST[19].xxxx, TEMP[5].xxxx 261: UIF TEMP[15].xxxx :0 262: MOV TEMP[14].x, IMM[2].yyyy 263: RCP TEMP[15].x, CONST[22].xxxx 264: MUL TEMP[13].xy, IN[3].xyyy, TEMP[15].xxxx 265: ELSE :0 266: RCP TEMP[15].x, CONST[21].xxxx 267: MUL TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx 268: ENDIF 269: FRC TEMP[13].xy, TEMP[13].xyyy 270: MUL TEMP[15].x, CONST[23].xxxx, IMM[2].wwww 271: MUL TEMP[15].x, TEMP[15].xxxx, TEMP[14].xxxx 272: ADD TEMP[15].x, IMM[2].zzzz, -TEMP[15].xxxx 273: MUL TEMP[14].x, TEMP[14].xxxx, CONST[23].xxxx 274: MAD TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx, TEMP[14].xxxx 275: MAD TEMP[13].xy, TEMP[13].xyyy, TEMP[5].xxxx, TEMP[9].xyyy 276: MOV TEMP[14].xy, TEMP[13].xyyy 277: MOV TEMP[14].w, TEMP[8].xxxx 278: TXL TEMP[14], TEMP[14], SAMP[9], 2D 279: FSEQ TEMP[15].x, TEMP[9].zzzz, IMM[1].zzzz 280: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz 281: MOV TEMP[16].xy, TEMP[13].xyyy 282: MOV TEMP[16].w, TEMP[8].xxxx 283: TXL TEMP[16], TEMP[16], SAMP[7], 2D 284: FSEQ TEMP[17].x, TEMP[9].zzzz, IMM[3].xxxx 285: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz 286: MOV TEMP[18].xy, TEMP[13].xyyy 287: MOV TEMP[18].w, TEMP[8].xxxx 288: TXL TEMP[18], TEMP[18], SAMP[5], 2D 289: FSEQ TEMP[19].x, TEMP[9].zzzz, IMM[2].wwww 290: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz 291: MOV TEMP[20].xy, TEMP[13].xyyy 292: MOV TEMP[20].w, TEMP[8].xxxx 293: TXL TEMP[20], TEMP[20], SAMP[3], 2D 294: FSEQ TEMP[21].x, TEMP[9].zzzz, IMM[2].zzzz 295: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz 296: MOV TEMP[13].xy, TEMP[13].xyyy 297: MOV TEMP[13].w, TEMP[8].xxxx 298: TXL TEMP[13], TEMP[13], SAMP[1], 2D 299: FSEQ TEMP[22].x, TEMP[9].zzzz, IMM[3].yyyy 300: AND TEMP[22].x, TEMP[22].xxxx, IMM[2].zzzz 301: MUL TEMP[13], TEMP[13], TEMP[22].xxxx 302: MAD TEMP[13], TEMP[20], TEMP[21].xxxx, TEMP[13] 303: MAD TEMP[13], TEMP[18], TEMP[19].xxxx, TEMP[13] 304: MAD TEMP[13], TEMP[16], TEMP[17].xxxx, TEMP[13] 305: MAD TEMP[13], TEMP[14], TEMP[15].xxxx, TEMP[13] 306: MOV TEMP[14].xy, IN[3].zyzz 307: MOV TEMP[15].x, IMM[2].xxxx 308: FSNE TEMP[16].x, CONST[19].xxxx, TEMP[5].xxxx 309: UIF TEMP[16].xxxx :0 310: MOV TEMP[15].x, IMM[2].yyyy 311: RCP TEMP[16].x, CONST[22].xxxx 312: MUL TEMP[14].xy, IN[3].zyyy, TEMP[16].xxxx 313: ELSE :0 314: RCP TEMP[16].x, CONST[21].xxxx 315: MUL TEMP[14].xy, TEMP[14].xyyy, TEMP[16].xxxx 316: ENDIF 317: FRC TEMP[14].xy, TEMP[14].xyyy 318: MUL TEMP[16].x, CONST[23].xxxx, IMM[2].wwww 319: MUL TEMP[16].x, TEMP[16].xxxx, TEMP[15].xxxx 320: ADD TEMP[16].x, IMM[2].zzzz, -TEMP[16].xxxx 321: MUL TEMP[15].x, TEMP[15].xxxx, CONST[23].xxxx 322: MAD TEMP[14].xy, TEMP[14].xyyy, TEMP[16].xxxx, TEMP[15].xxxx 323: MAD TEMP[14].xy, TEMP[14].xyyy, TEMP[5].xxxx, TEMP[9].xyyy 324: MOV TEMP[15].xy, TEMP[14].xyyy 325: MOV TEMP[15].w, TEMP[8].xxxx 326: TXL TEMP[15], TEMP[15], SAMP[9], 2D 327: FSEQ TEMP[16].x, TEMP[9].zzzz, IMM[1].zzzz 328: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz 329: MOV TEMP[17].xy, TEMP[14].xyyy 330: MOV TEMP[17].w, TEMP[8].xxxx 331: TXL TEMP[17], TEMP[17], SAMP[7], 2D 332: FSEQ TEMP[18].x, TEMP[9].zzzz, IMM[3].xxxx 333: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz 334: MOV TEMP[19].xy, TEMP[14].xyyy 335: MOV TEMP[19].w, TEMP[8].xxxx 336: TXL TEMP[19], TEMP[19], SAMP[5], 2D 337: FSEQ TEMP[20].x, TEMP[9].zzzz, IMM[2].wwww 338: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz 339: MOV TEMP[21].xy, TEMP[14].xyyy 340: MOV TEMP[21].w, TEMP[8].xxxx 341: TXL TEMP[21], TEMP[21], SAMP[3], 2D 342: FSEQ TEMP[22].x, TEMP[9].zzzz, IMM[2].zzzz 343: AND TEMP[22].x, TEMP[22].xxxx, IMM[2].zzzz 344: MOV TEMP[14].xy, TEMP[14].xyyy 345: MOV TEMP[14].w, TEMP[8].xxxx 346: TXL TEMP[14], TEMP[14], SAMP[1], 2D 347: FSEQ TEMP[23].x, TEMP[9].zzzz, IMM[3].yyyy 348: AND TEMP[23].x, TEMP[23].xxxx, IMM[2].zzzz 349: MUL TEMP[14], TEMP[14], TEMP[23].xxxx 350: MAD TEMP[14], TEMP[21], TEMP[22].xxxx, TEMP[14] 351: MAD TEMP[14], TEMP[19], TEMP[20].xxxx, TEMP[14] 352: MAD TEMP[14], TEMP[17], TEMP[18].xxxx, TEMP[14] 353: MAD TEMP[14], TEMP[15], TEMP[16].xxxx, TEMP[14] 354: MOV TEMP[15].xy, IN[3].zxzz 355: MOV TEMP[16].x, IMM[2].xxxx 356: FSNE TEMP[17].x, CONST[19].xxxx, TEMP[5].xxxx 357: UIF TEMP[17].xxxx :0 358: MOV TEMP[16].x, IMM[2].yyyy 359: RCP TEMP[17].x, CONST[22].xxxx 360: MUL TEMP[15].xy, IN[3].zxxx, TEMP[17].xxxx 361: ELSE :0 362: RCP TEMP[17].x, CONST[21].xxxx 363: MUL TEMP[15].xy, TEMP[15].xyyy, TEMP[17].xxxx 364: ENDIF 365: FRC TEMP[15].xy, TEMP[15].xyyy 366: MUL TEMP[17].x, CONST[23].xxxx, IMM[2].wwww 367: MUL TEMP[17].x, TEMP[17].xxxx, TEMP[16].xxxx 368: ADD TEMP[17].x, IMM[2].zzzz, -TEMP[17].xxxx 369: MUL TEMP[16].x, TEMP[16].xxxx, CONST[23].xxxx 370: MAD TEMP[15].xy, TEMP[15].xyyy, TEMP[17].xxxx, TEMP[16].xxxx 371: MAD TEMP[15].xy, TEMP[15].xyyy, TEMP[5].xxxx, TEMP[9].xyyy 372: MOV TEMP[16].xy, TEMP[15].xyyy 373: MOV TEMP[16].w, TEMP[8].xxxx 374: TXL TEMP[16], TEMP[16], SAMP[9], 2D 375: FSEQ TEMP[17].x, TEMP[9].zzzz, IMM[1].zzzz 376: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz 377: MOV TEMP[18].xy, TEMP[15].xyyy 378: MOV TEMP[18].w, TEMP[8].xxxx 379: TXL TEMP[18], TEMP[18], SAMP[7], 2D 380: FSEQ TEMP[19].x, TEMP[9].zzzz, IMM[3].xxxx 381: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz 382: MOV TEMP[20].xy, TEMP[15].xyyy 383: MOV TEMP[20].w, TEMP[8].xxxx 384: TXL TEMP[20], TEMP[20], SAMP[5], 2D 385: FSEQ TEMP[21].x, TEMP[9].zzzz, IMM[2].wwww 386: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz 387: MOV TEMP[22].xy, TEMP[15].xyyy 388: MOV TEMP[22].w, TEMP[8].xxxx 389: TXL TEMP[22], TEMP[22], SAMP[3], 2D 390: FSEQ TEMP[23].x, TEMP[9].zzzz, IMM[2].zzzz 391: AND TEMP[23].x, TEMP[23].xxxx, IMM[2].zzzz 392: MOV TEMP[15].xy, TEMP[15].xyyy 393: MOV TEMP[15].w, TEMP[8].xxxx 394: TXL TEMP[15], TEMP[15], SAMP[1], 2D 395: FSEQ TEMP[24].x, TEMP[9].zzzz, IMM[3].yyyy 396: AND TEMP[24].x, TEMP[24].xxxx, IMM[2].zzzz 397: MUL TEMP[15], TEMP[15], TEMP[24].xxxx 398: MAD TEMP[15], TEMP[22], TEMP[23].xxxx, TEMP[15] 399: MAD TEMP[15], TEMP[20], TEMP[21].xxxx, TEMP[15] 400: MAD TEMP[15], TEMP[18], TEMP[19].xxxx, TEMP[15] 401: MAD TEMP[15], TEMP[16], TEMP[17].xxxx, TEMP[15] 402: MOV TEMP[16].xy, IN[3].xyxx 403: MOV TEMP[17].x, IMM[2].xxxx 404: FSNE TEMP[18].x, CONST[19].xxxx, TEMP[6].xxxx 405: UIF TEMP[18].xxxx :0 406: MOV TEMP[17].x, IMM[2].yyyy 407: RCP TEMP[18].x, CONST[22].xxxx 408: MUL TEMP[16].xy, IN[3].xyyy, TEMP[18].xxxx 409: ELSE :0 410: RCP TEMP[18].x, CONST[21].xxxx 411: MUL TEMP[16].xy, TEMP[16].xyyy, TEMP[18].xxxx 412: ENDIF 413: FRC TEMP[16].xy, TEMP[16].xyyy 414: MUL TEMP[18].x, CONST[23].xxxx, IMM[2].wwww 415: MUL TEMP[18].x, TEMP[18].xxxx, TEMP[17].xxxx 416: ADD TEMP[18].x, IMM[2].zzzz, -TEMP[18].xxxx 417: MUL TEMP[17].x, TEMP[17].xxxx, CONST[23].xxxx 418: MAD TEMP[16].xy, TEMP[16].xyyy, TEMP[18].xxxx, TEMP[17].xxxx 419: MAD TEMP[16].xy, TEMP[16].xyyy, TEMP[6].xxxx, TEMP[3].xyyy 420: MOV TEMP[17].xy, TEMP[16].xyyy 421: MOV TEMP[17].w, TEMP[8].xxxx 422: TXL TEMP[17], TEMP[17], SAMP[9], 2D 423: FSEQ TEMP[18].x, TEMP[3].zzzz, IMM[1].zzzz 424: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz 425: MOV TEMP[19].xy, TEMP[16].xyyy 426: MOV TEMP[19].w, TEMP[8].xxxx 427: TXL TEMP[19], TEMP[19], SAMP[7], 2D 428: FSEQ TEMP[20].x, TEMP[3].zzzz, IMM[3].xxxx 429: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz 430: MOV TEMP[21].xy, TEMP[16].xyyy 431: MOV TEMP[21].w, TEMP[8].xxxx 432: TXL TEMP[21], TEMP[21], SAMP[5], 2D 433: FSEQ TEMP[22].x, TEMP[3].zzzz, IMM[2].wwww 434: AND TEMP[22].x, TEMP[22].xxxx, IMM[2].zzzz 435: MOV TEMP[23].xy, TEMP[16].xyyy 436: MOV TEMP[23].w, TEMP[8].xxxx 437: TXL TEMP[23], TEMP[23], SAMP[3], 2D 438: FSEQ TEMP[24].x, TEMP[3].zzzz, IMM[2].zzzz 439: AND TEMP[24].x, TEMP[24].xxxx, IMM[2].zzzz 440: MOV TEMP[16].xy, TEMP[16].xyyy 441: MOV TEMP[16].w, TEMP[8].xxxx 442: TXL TEMP[16], TEMP[16], SAMP[1], 2D 443: FSEQ TEMP[25].x, TEMP[3].zzzz, IMM[3].yyyy 444: AND TEMP[25].x, TEMP[25].xxxx, IMM[2].zzzz 445: MUL TEMP[16], TEMP[16], TEMP[25].xxxx 446: MAD TEMP[16], TEMP[23], TEMP[24].xxxx, TEMP[16] 447: MAD TEMP[16], TEMP[21], TEMP[22].xxxx, TEMP[16] 448: MAD TEMP[16], TEMP[19], TEMP[20].xxxx, TEMP[16] 449: MAD TEMP[16], TEMP[17], TEMP[18].xxxx, TEMP[16] 450: MOV TEMP[17].xy, IN[3].zyzz 451: MOV TEMP[18].x, IMM[2].xxxx 452: FSNE TEMP[19].x, CONST[19].xxxx, TEMP[6].xxxx 453: UIF TEMP[19].xxxx :0 454: MOV TEMP[18].x, IMM[2].yyyy 455: RCP TEMP[19].x, CONST[22].xxxx 456: MUL TEMP[17].xy, IN[3].zyyy, TEMP[19].xxxx 457: ELSE :0 458: RCP TEMP[19].x, CONST[21].xxxx 459: MUL TEMP[17].xy, TEMP[17].xyyy, TEMP[19].xxxx 460: ENDIF 461: FRC TEMP[17].xy, TEMP[17].xyyy 462: MUL TEMP[19].x, CONST[23].xxxx, IMM[2].wwww 463: MUL TEMP[19].x, TEMP[19].xxxx, TEMP[18].xxxx 464: ADD TEMP[19].x, IMM[2].zzzz, -TEMP[19].xxxx 465: MUL TEMP[18].x, TEMP[18].xxxx, CONST[23].xxxx 466: MAD TEMP[17].xy, TEMP[17].xyyy, TEMP[19].xxxx, TEMP[18].xxxx 467: MAD TEMP[17].xy, TEMP[17].xyyy, TEMP[6].xxxx, TEMP[3].xyyy 468: MOV TEMP[18].xy, TEMP[17].xyyy 469: MOV TEMP[18].w, TEMP[8].xxxx 470: TXL TEMP[18], TEMP[18], SAMP[9], 2D 471: FSEQ TEMP[19].x, TEMP[3].zzzz, IMM[1].zzzz 472: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz 473: MOV TEMP[20].xy, TEMP[17].xyyy 474: MOV TEMP[20].w, TEMP[8].xxxx 475: TXL TEMP[20], TEMP[20], SAMP[7], 2D 476: FSEQ TEMP[21].x, TEMP[3].zzzz, IMM[3].xxxx 477: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz 478: MOV TEMP[22].xy, TEMP[17].xyyy 479: MOV TEMP[22].w, TEMP[8].xxxx 480: TXL TEMP[22], TEMP[22], SAMP[5], 2D 481: FSEQ TEMP[23].x, TEMP[3].zzzz, IMM[2].wwww 482: AND TEMP[23].x, TEMP[23].xxxx, IMM[2].zzzz 483: MOV TEMP[24].xy, TEMP[17].xyyy 484: MOV TEMP[24].w, TEMP[8].xxxx 485: TXL TEMP[24], TEMP[24], SAMP[3], 2D 486: FSEQ TEMP[25].x, TEMP[3].zzzz, IMM[2].zzzz 487: AND TEMP[25].x, TEMP[25].xxxx, IMM[2].zzzz 488: MOV TEMP[17].xy, TEMP[17].xyyy 489: MOV TEMP[17].w, TEMP[8].xxxx 490: TXL TEMP[17], TEMP[17], SAMP[1], 2D 491: FSEQ TEMP[26].x, TEMP[3].zzzz, IMM[3].yyyy 492: AND TEMP[26].x, TEMP[26].xxxx, IMM[2].zzzz 493: MUL TEMP[17], TEMP[17], TEMP[26].xxxx 494: MAD TEMP[17], TEMP[24], TEMP[25].xxxx, TEMP[17] 495: MAD TEMP[17], TEMP[22], TEMP[23].xxxx, TEMP[17] 496: MAD TEMP[17], TEMP[20], TEMP[21].xxxx, TEMP[17] 497: MAD TEMP[17], TEMP[18], TEMP[19].xxxx, TEMP[17] 498: MOV TEMP[18].xy, IN[3].zxzz 499: MOV TEMP[19].x, IMM[2].xxxx 500: FSNE TEMP[20].x, CONST[19].xxxx, TEMP[6].xxxx 501: UIF TEMP[20].xxxx :0 502: MOV TEMP[19].x, IMM[2].yyyy 503: RCP TEMP[20].x, CONST[22].xxxx 504: MUL TEMP[18].xy, IN[3].zxxx, TEMP[20].xxxx 505: ELSE :0 506: RCP TEMP[20].x, CONST[21].xxxx 507: MUL TEMP[18].xy, TEMP[18].xyyy, TEMP[20].xxxx 508: ENDIF 509: FRC TEMP[18].xy, TEMP[18].xyyy 510: MUL TEMP[20].x, CONST[23].xxxx, IMM[2].wwww 511: MUL TEMP[20].x, TEMP[20].xxxx, TEMP[19].xxxx 512: ADD TEMP[20].x, IMM[2].zzzz, -TEMP[20].xxxx 513: MUL TEMP[19].x, TEMP[19].xxxx, CONST[23].xxxx 514: MAD TEMP[18].xy, TEMP[18].xyyy, TEMP[20].xxxx, TEMP[19].xxxx 515: MAD TEMP[18].xy, TEMP[18].xyyy, TEMP[6].xxxx, TEMP[3].xyyy 516: MOV TEMP[19].xy, TEMP[18].xyyy 517: MOV TEMP[19].w, TEMP[8].xxxx 518: TXL TEMP[19], TEMP[19], SAMP[9], 2D 519: FSEQ TEMP[20].x, TEMP[3].zzzz, IMM[1].zzzz 520: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz 521: MOV TEMP[21].xy, TEMP[18].xyyy 522: MOV TEMP[21].w, TEMP[8].xxxx 523: TXL TEMP[21], TEMP[21], SAMP[7], 2D 524: FSEQ TEMP[22].x, TEMP[3].zzzz, IMM[3].xxxx 525: AND TEMP[22].x, TEMP[22].xxxx, IMM[2].zzzz 526: MOV TEMP[23].xy, TEMP[18].xyyy 527: MOV TEMP[23].w, TEMP[8].xxxx 528: TXL TEMP[23], TEMP[23], SAMP[5], 2D 529: FSEQ TEMP[24].x, TEMP[3].zzzz, IMM[2].wwww 530: AND TEMP[24].x, TEMP[24].xxxx, IMM[2].zzzz 531: MOV TEMP[25].xy, TEMP[18].xyyy 532: MOV TEMP[25].w, TEMP[8].xxxx 533: TXL TEMP[25], TEMP[25], SAMP[3], 2D 534: FSEQ TEMP[26].x, TEMP[3].zzzz, IMM[2].zzzz 535: AND TEMP[26].x, TEMP[26].xxxx, IMM[2].zzzz 536: MOV TEMP[18].xy, TEMP[18].xyyy 537: MOV TEMP[18].w, TEMP[8].xxxx 538: TXL TEMP[18], TEMP[18], SAMP[1], 2D 539: FSEQ TEMP[27].x, TEMP[3].zzzz, IMM[3].yyyy 540: AND TEMP[27].x, TEMP[27].xxxx, IMM[2].zzzz 541: MUL TEMP[18], TEMP[18], TEMP[27].xxxx 542: MAD TEMP[18], TEMP[25], TEMP[26].xxxx, TEMP[18] 543: MAD TEMP[18], TEMP[23], TEMP[24].xxxx, TEMP[18] 544: MAD TEMP[18], TEMP[21], TEMP[22].xxxx, TEMP[18] 545: MAD TEMP[18], TEMP[19], TEMP[20].xxxx, TEMP[18] 546: MUL TEMP[16], TEMP[16], TEMP[2].zzzz 547: MAD TEMP[16], TEMP[17], TEMP[2].xxxx, TEMP[16] 548: MAD TEMP[16], TEMP[18], TEMP[2].yyyy, TEMP[16] 549: MUL TEMP[13], TEMP[13], TEMP[2].zzzz 550: MAD TEMP[13], TEMP[14], TEMP[2].xxxx, TEMP[13] 551: MAD TEMP[13], TEMP[15], TEMP[2].yyyy, TEMP[13] 552: MUL TEMP[10], TEMP[10], TEMP[2].zzzz 553: MAD TEMP[10], TEMP[11], TEMP[2].xxxx, TEMP[10] 554: MAD TEMP[10], TEMP[12], TEMP[2].yyyy, TEMP[10] 555: MUL TEMP[10], IN[1].xxxx, TEMP[10] 556: MAD TEMP[10], IN[1].yyyy, TEMP[13], TEMP[10] 557: MAD TEMP[10].xyz, IN[1].zzzz, TEMP[16], TEMP[10] 558: MOV TEMP[11].xy, IN[3].zyzz 559: MOV TEMP[12].x, IMM[2].xxxx 560: FSNE TEMP[13].x, CONST[19].xxxx, TEMP[4].xxxx 561: UIF TEMP[13].xxxx :0 562: MOV TEMP[12].x, IMM[2].yyyy 563: RCP TEMP[13].x, CONST[22].xxxx 564: MUL TEMP[11].xy, IN[3].zyyy, TEMP[13].xxxx 565: ELSE :0 566: RCP TEMP[13].x, CONST[21].xxxx 567: MUL TEMP[11].xy, TEMP[11].xyyy, TEMP[13].xxxx 568: ENDIF 569: FRC TEMP[11].xy, TEMP[11].xyyy 570: MUL TEMP[13].x, CONST[23].xxxx, IMM[2].wwww 571: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[12].xxxx 572: ADD TEMP[13].x, IMM[2].zzzz, -TEMP[13].xxxx 573: MUL TEMP[12].x, TEMP[12].xxxx, CONST[23].xxxx 574: MAD TEMP[11].xy, TEMP[11].xyyy, TEMP[13].xxxx, TEMP[12].xxxx 575: MAD TEMP[11].xy, TEMP[11].xyyy, TEMP[4].xxxx, TEMP[7].xyyy 576: MOV TEMP[12].xy, TEMP[11].xyyy 577: MOV TEMP[12].w, TEMP[8].xxxx 578: TXL TEMP[12], TEMP[12], SAMP[10], 2D 579: FSEQ TEMP[13].x, TEMP[7].zzzz, IMM[1].zzzz 580: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz 581: MOV TEMP[14].xy, TEMP[11].xyyy 582: MOV TEMP[14].w, TEMP[8].xxxx 583: TXL TEMP[14], TEMP[14], SAMP[8], 2D 584: FSEQ TEMP[15].x, TEMP[7].zzzz, IMM[3].xxxx 585: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz 586: MOV TEMP[16].xy, TEMP[11].xyyy 587: MOV TEMP[16].w, TEMP[8].xxxx 588: TXL TEMP[16], TEMP[16], SAMP[6], 2D 589: FSEQ TEMP[17].x, TEMP[7].zzzz, IMM[2].wwww 590: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz 591: MOV TEMP[18].xy, TEMP[11].xyyy 592: MOV TEMP[18].w, TEMP[8].xxxx 593: TXL TEMP[18], TEMP[18], SAMP[4], 2D 594: FSEQ TEMP[19].x, TEMP[7].zzzz, IMM[2].zzzz 595: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz 596: MOV TEMP[11].xy, TEMP[11].xyyy 597: MOV TEMP[11].w, TEMP[8].xxxx 598: TXL TEMP[11], TEMP[11], SAMP[2], 2D 599: FSEQ TEMP[20].x, TEMP[7].zzzz, IMM[3].yyyy 600: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz 601: MUL TEMP[11], TEMP[11], TEMP[20].xxxx 602: MAD TEMP[11], TEMP[18], TEMP[19].xxxx, TEMP[11] 603: MAD TEMP[11], TEMP[16], TEMP[17].xxxx, TEMP[11] 604: MAD TEMP[11], TEMP[14], TEMP[15].xxxx, TEMP[11] 605: MAD TEMP[11].yw, TEMP[12], TEMP[13].xxxx, TEMP[11] 606: MAD TEMP[11].xy, TEMP[11].wyyy, IMM[2].wwww, IMM[3].zzzz 607: DP2 TEMP[12].x, TEMP[11].xyyy, TEMP[11].xyyy 608: MOV_SAT TEMP[28].x, TEMP[12].xxxx 609: MOV TEMP[12].xy, IN[3].zxzz 610: MOV TEMP[13].x, IMM[2].xxxx 611: FSNE TEMP[14].x, CONST[19].xxxx, TEMP[4].xxxx 612: UIF TEMP[14].xxxx :0 613: MOV TEMP[13].x, IMM[2].yyyy 614: RCP TEMP[14].x, CONST[22].xxxx 615: MUL TEMP[12].xy, IN[3].zxxx, TEMP[14].xxxx 616: ELSE :0 617: RCP TEMP[14].x, CONST[21].xxxx 618: MUL TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx 619: ENDIF 620: FRC TEMP[12].xy, TEMP[12].xyyy 621: MUL TEMP[14].x, CONST[23].xxxx, IMM[2].wwww 622: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[13].xxxx 623: ADD TEMP[14].x, IMM[2].zzzz, -TEMP[14].xxxx 624: MUL TEMP[13].x, TEMP[13].xxxx, CONST[23].xxxx 625: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx, TEMP[13].xxxx 626: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[4].xxxx, TEMP[7].xyyy 627: MOV TEMP[13].xy, TEMP[12].xyyy 628: MOV TEMP[13].w, TEMP[8].xxxx 629: TXL TEMP[13], TEMP[13], SAMP[10], 2D 630: FSEQ TEMP[14].x, TEMP[7].zzzz, IMM[1].zzzz 631: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz 632: MOV TEMP[15].xy, TEMP[12].xyyy 633: MOV TEMP[15].w, TEMP[8].xxxx 634: TXL TEMP[15], TEMP[15], SAMP[8], 2D 635: FSEQ TEMP[16].x, TEMP[7].zzzz, IMM[3].xxxx 636: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz 637: MOV TEMP[17].xy, TEMP[12].xyyy 638: MOV TEMP[17].w, TEMP[8].xxxx 639: TXL TEMP[17], TEMP[17], SAMP[6], 2D 640: FSEQ TEMP[18].x, TEMP[7].zzzz, IMM[2].wwww 641: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz 642: MOV TEMP[19].xy, TEMP[12].xyyy 643: MOV TEMP[19].w, TEMP[8].xxxx 644: TXL TEMP[19], TEMP[19], SAMP[4], 2D 645: FSEQ TEMP[20].x, TEMP[7].zzzz, IMM[2].zzzz 646: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz 647: MOV TEMP[12].xy, TEMP[12].xyyy 648: MOV TEMP[12].w, TEMP[8].xxxx 649: TXL TEMP[12], TEMP[12], SAMP[2], 2D 650: FSEQ TEMP[21].x, TEMP[7].zzzz, IMM[3].yyyy 651: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz 652: MUL TEMP[12], TEMP[12], TEMP[21].xxxx 653: MAD TEMP[12], TEMP[19], TEMP[20].xxxx, TEMP[12] 654: MAD TEMP[12], TEMP[17], TEMP[18].xxxx, TEMP[12] 655: MAD TEMP[12], TEMP[15], TEMP[16].xxxx, TEMP[12] 656: MAD TEMP[12].yw, TEMP[13], TEMP[14].xxxx, TEMP[12] 657: MAD TEMP[12].xy, TEMP[12].wyyy, IMM[2].wwww, IMM[3].zzzz 658: DP2 TEMP[13].x, TEMP[12].xyyy, TEMP[12].xyyy 659: MOV_SAT TEMP[29].x, TEMP[13].xxxx 660: MOV TEMP[13].xy, IN[3].xyxx 661: MOV TEMP[14].x, IMM[2].xxxx 662: FSNE TEMP[15].x, CONST[19].xxxx, TEMP[4].xxxx 663: UIF TEMP[15].xxxx :0 664: MOV TEMP[14].x, IMM[2].yyyy 665: RCP TEMP[15].x, CONST[22].xxxx 666: MUL TEMP[13].xy, IN[3].xyyy, TEMP[15].xxxx 667: ELSE :0 668: RCP TEMP[15].x, CONST[21].xxxx 669: MUL TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx 670: ENDIF 671: FRC TEMP[13].xy, TEMP[13].xyyy 672: MUL TEMP[15].x, CONST[23].xxxx, IMM[2].wwww 673: MUL TEMP[15].x, TEMP[15].xxxx, TEMP[14].xxxx 674: ADD TEMP[15].x, IMM[2].zzzz, -TEMP[15].xxxx 675: MUL TEMP[14].x, TEMP[14].xxxx, CONST[23].xxxx 676: MAD TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx, TEMP[14].xxxx 677: MAD TEMP[4].xy, TEMP[13].xyyy, TEMP[4].xxxx, TEMP[7].xyyy 678: MOV TEMP[13].xy, TEMP[4].xyyy 679: MOV TEMP[13].w, TEMP[8].xxxx 680: TXL TEMP[13], TEMP[13], SAMP[10], 2D 681: FSEQ TEMP[14].x, TEMP[7].zzzz, IMM[1].zzzz 682: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz 683: MOV TEMP[15].xy, TEMP[4].xyyy 684: MOV TEMP[15].w, TEMP[8].xxxx 685: TXL TEMP[15], TEMP[15], SAMP[8], 2D 686: FSEQ TEMP[16].x, TEMP[7].zzzz, IMM[3].xxxx 687: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz 688: MOV TEMP[17].xy, TEMP[4].xyyy 689: MOV TEMP[17].w, TEMP[8].xxxx 690: TXL TEMP[17], TEMP[17], SAMP[6], 2D 691: FSEQ TEMP[18].x, TEMP[7].zzzz, IMM[2].wwww 692: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz 693: MOV TEMP[19].xy, TEMP[4].xyyy 694: MOV TEMP[19].w, TEMP[8].xxxx 695: TXL TEMP[19], TEMP[19], SAMP[4], 2D 696: FSEQ TEMP[20].x, TEMP[7].zzzz, IMM[2].zzzz 697: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz 698: MOV TEMP[4].xy, TEMP[4].xyyy 699: MOV TEMP[4].w, TEMP[8].xxxx 700: TXL TEMP[4], TEMP[4], SAMP[2], 2D 701: FSEQ TEMP[7].x, TEMP[7].zzzz, IMM[3].yyyy 702: AND TEMP[7].x, TEMP[7].xxxx, IMM[2].zzzz 703: MUL TEMP[4], TEMP[4], TEMP[7].xxxx 704: MAD TEMP[4], TEMP[19], TEMP[20].xxxx, TEMP[4] 705: MAD TEMP[4], TEMP[17], TEMP[18].xxxx, TEMP[4] 706: MAD TEMP[4], TEMP[15], TEMP[16].xxxx, TEMP[4] 707: MAD TEMP[4].yw, TEMP[13], TEMP[14].xxxx, TEMP[4] 708: MAD TEMP[4].xy, TEMP[4].wyyy, IMM[2].wwww, IMM[3].zzzz 709: DP2 TEMP[7].x, TEMP[4].xyyy, TEMP[4].xyyy 710: MOV_SAT TEMP[30].x, TEMP[7].xxxx 711: MOV TEMP[7].x, IMM[3].yyyy 712: MOV TEMP[7].y, TEMP[11].xxxx 713: MOV TEMP[7].z, TEMP[11].yyyy 714: MOV TEMP[11].y, IMM[3].yyyy 715: MOV TEMP[11].x, TEMP[12].yyyy 716: MOV TEMP[11].z, TEMP[12].xxxx 717: MOV TEMP[12].z, IMM[3].yyyy 718: MOV TEMP[12].xy, TEMP[4].xyxx 719: MUL TEMP[4].xyz, TEMP[7].xyzz, TEMP[2].xxxx 720: MAD TEMP[4].xyz, TEMP[11].xyzz, TEMP[2].yyyy, TEMP[4].xyzz 721: MAD TEMP[4].xyz, TEMP[12].xyzz, TEMP[2].zzzz, TEMP[4].xyzz 722: MOV TEMP[7].xy, IN[3].zyzz 723: MOV TEMP[11].x, IMM[2].xxxx 724: FSNE TEMP[12].x, CONST[19].xxxx, TEMP[5].xxxx 725: UIF TEMP[12].xxxx :0 726: MOV TEMP[11].x, IMM[2].yyyy 727: RCP TEMP[12].x, CONST[22].xxxx 728: MUL TEMP[7].xy, IN[3].zyyy, TEMP[12].xxxx 729: ELSE :0 730: RCP TEMP[12].x, CONST[21].xxxx 731: MUL TEMP[7].xy, TEMP[7].xyyy, TEMP[12].xxxx 732: ENDIF 733: FRC TEMP[7].xy, TEMP[7].xyyy 734: MUL TEMP[12].x, CONST[23].xxxx, IMM[2].wwww 735: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[11].xxxx 736: ADD TEMP[12].x, IMM[2].zzzz, -TEMP[12].xxxx 737: MUL TEMP[11].x, TEMP[11].xxxx, CONST[23].xxxx 738: MAD TEMP[7].xy, TEMP[7].xyyy, TEMP[12].xxxx, TEMP[11].xxxx 739: MAD TEMP[7].xy, TEMP[7].xyyy, TEMP[5].xxxx, TEMP[9].xyyy 740: MOV TEMP[11].xy, TEMP[7].xyyy 741: MOV TEMP[11].w, TEMP[8].xxxx 742: TXL TEMP[11], TEMP[11], SAMP[10], 2D 743: FSEQ TEMP[12].x, TEMP[9].zzzz, IMM[1].zzzz 744: AND TEMP[12].x, TEMP[12].xxxx, IMM[2].zzzz 745: MOV TEMP[13].xy, TEMP[7].xyyy 746: MOV TEMP[13].w, TEMP[8].xxxx 747: TXL TEMP[13], TEMP[13], SAMP[8], 2D 748: FSEQ TEMP[14].x, TEMP[9].zzzz, IMM[3].xxxx 749: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz 750: MOV TEMP[15].xy, TEMP[7].xyyy 751: MOV TEMP[15].w, TEMP[8].xxxx 752: TXL TEMP[15], TEMP[15], SAMP[6], 2D 753: FSEQ TEMP[16].x, TEMP[9].zzzz, IMM[2].wwww 754: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz 755: MOV TEMP[17].xy, TEMP[7].xyyy 756: MOV TEMP[17].w, TEMP[8].xxxx 757: TXL TEMP[17], TEMP[17], SAMP[4], 2D 758: FSEQ TEMP[18].x, TEMP[9].zzzz, IMM[2].zzzz 759: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz 760: MOV TEMP[7].xy, TEMP[7].xyyy 761: MOV TEMP[7].w, TEMP[8].xxxx 762: TXL TEMP[7], TEMP[7], SAMP[2], 2D 763: FSEQ TEMP[19].x, TEMP[9].zzzz, IMM[3].yyyy 764: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz 765: MUL TEMP[7], TEMP[7], TEMP[19].xxxx 766: MAD TEMP[7], TEMP[17], TEMP[18].xxxx, TEMP[7] 767: MAD TEMP[7], TEMP[15], TEMP[16].xxxx, TEMP[7] 768: MAD TEMP[7], TEMP[13], TEMP[14].xxxx, TEMP[7] 769: MAD TEMP[7].yw, TEMP[11], TEMP[12].xxxx, TEMP[7] 770: MAD TEMP[7].xy, TEMP[7].wyyy, IMM[2].wwww, IMM[3].zzzz 771: DP2 TEMP[11].x, TEMP[7].xyyy, TEMP[7].xyyy 772: MOV_SAT TEMP[31].x, TEMP[11].xxxx 773: MOV TEMP[11].xy, IN[3].zxzz 774: MOV TEMP[12].x, IMM[2].xxxx 775: FSNE TEMP[13].x, CONST[19].xxxx, TEMP[5].xxxx 776: UIF TEMP[13].xxxx :0 777: MOV TEMP[12].x, IMM[2].yyyy 778: RCP TEMP[13].x, CONST[22].xxxx 779: MUL TEMP[11].xy, IN[3].zxxx, TEMP[13].xxxx 780: ELSE :0 781: RCP TEMP[13].x, CONST[21].xxxx 782: MUL TEMP[11].xy, TEMP[11].xyyy, TEMP[13].xxxx 783: ENDIF 784: FRC TEMP[11].xy, TEMP[11].xyyy 785: MUL TEMP[13].x, CONST[23].xxxx, IMM[2].wwww 786: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[12].xxxx 787: ADD TEMP[13].x, IMM[2].zzzz, -TEMP[13].xxxx 788: MUL TEMP[12].x, TEMP[12].xxxx, CONST[23].xxxx 789: MAD TEMP[11].xy, TEMP[11].xyyy, TEMP[13].xxxx, TEMP[12].xxxx 790: MAD TEMP[11].xy, TEMP[11].xyyy, TEMP[5].xxxx, TEMP[9].xyyy 791: MOV TEMP[12].xy, TEMP[11].xyyy 792: MOV TEMP[12].w, TEMP[8].xxxx 793: TXL TEMP[12], TEMP[12], SAMP[10], 2D 794: FSEQ TEMP[13].x, TEMP[9].zzzz, IMM[1].zzzz 795: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz 796: MOV TEMP[14].xy, TEMP[11].xyyy 797: MOV TEMP[14].w, TEMP[8].xxxx 798: TXL TEMP[14], TEMP[14], SAMP[8], 2D 799: FSEQ TEMP[15].x, TEMP[9].zzzz, IMM[3].xxxx 800: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz 801: MOV TEMP[16].xy, TEMP[11].xyyy 802: MOV TEMP[16].w, TEMP[8].xxxx 803: TXL TEMP[16], TEMP[16], SAMP[6], 2D 804: FSEQ TEMP[17].x, TEMP[9].zzzz, IMM[2].wwww 805: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz 806: MOV TEMP[18].xy, TEMP[11].xyyy 807: MOV TEMP[18].w, TEMP[8].xxxx 808: TXL TEMP[18], TEMP[18], SAMP[4], 2D 809: FSEQ TEMP[19].x, TEMP[9].zzzz, IMM[2].zzzz 810: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz 811: MOV TEMP[11].xy, TEMP[11].xyyy 812: MOV TEMP[11].w, TEMP[8].xxxx 813: TXL TEMP[11], TEMP[11], SAMP[2], 2D 814: FSEQ TEMP[20].x, TEMP[9].zzzz, IMM[3].yyyy 815: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz 816: MUL TEMP[11], TEMP[11], TEMP[20].xxxx 817: MAD TEMP[11], TEMP[18], TEMP[19].xxxx, TEMP[11] 818: MAD TEMP[11], TEMP[16], TEMP[17].xxxx, TEMP[11] 819: MAD TEMP[11], TEMP[14], TEMP[15].xxxx, TEMP[11] 820: MAD TEMP[11].yw, TEMP[12], TEMP[13].xxxx, TEMP[11] 821: MAD TEMP[11].xy, TEMP[11].wyyy, IMM[2].wwww, IMM[3].zzzz 822: DP2 TEMP[12].x, TEMP[11].xyyy, TEMP[11].xyyy 823: MOV_SAT TEMP[32].x, TEMP[12].xxxx 824: MOV TEMP[12].xy, IN[3].xyxx 825: MOV TEMP[13].x, IMM[2].xxxx 826: FSNE TEMP[14].x, CONST[19].xxxx, TEMP[5].xxxx 827: UIF TEMP[14].xxxx :0 828: MOV TEMP[13].x, IMM[2].yyyy 829: RCP TEMP[14].x, CONST[22].xxxx 830: MUL TEMP[12].xy, IN[3].xyyy, TEMP[14].xxxx 831: ELSE :0 832: RCP TEMP[14].x, CONST[21].xxxx 833: MUL TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx 834: ENDIF 835: FRC TEMP[12].xy, TEMP[12].xyyy 836: MUL TEMP[14].x, CONST[23].xxxx, IMM[2].wwww 837: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[13].xxxx 838: ADD TEMP[14].x, IMM[2].zzzz, -TEMP[14].xxxx 839: MUL TEMP[13].x, TEMP[13].xxxx, CONST[23].xxxx 840: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx, TEMP[13].xxxx 841: MAD TEMP[5].xy, TEMP[12].xyyy, TEMP[5].xxxx, TEMP[9].xyyy 842: MOV TEMP[12].xy, TEMP[5].xyyy 843: MOV TEMP[12].w, TEMP[8].xxxx 844: TXL TEMP[12], TEMP[12], SAMP[10], 2D 845: FSEQ TEMP[13].x, TEMP[9].zzzz, IMM[1].zzzz 846: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz 847: MOV TEMP[14].xy, TEMP[5].xyyy 848: MOV TEMP[14].w, TEMP[8].xxxx 849: TXL TEMP[14], TEMP[14], SAMP[8], 2D 850: FSEQ TEMP[15].x, TEMP[9].zzzz, IMM[3].xxxx 851: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz 852: MOV TEMP[16].xy, TEMP[5].xyyy 853: MOV TEMP[16].w, TEMP[8].xxxx 854: TXL TEMP[16], TEMP[16], SAMP[6], 2D 855: FSEQ TEMP[17].x, TEMP[9].zzzz, IMM[2].wwww 856: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz 857: MOV TEMP[18].xy, TEMP[5].xyyy 858: MOV TEMP[18].w, TEMP[8].xxxx 859: TXL TEMP[18], TEMP[18], SAMP[4], 2D 860: FSEQ TEMP[19].x, TEMP[9].zzzz, IMM[2].zzzz 861: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz 862: MOV TEMP[5].xy, TEMP[5].xyyy 863: MOV TEMP[5].w, TEMP[8].xxxx 864: TXL TEMP[5], TEMP[5], SAMP[2], 2D 865: FSEQ TEMP[9].x, TEMP[9].zzzz, IMM[3].yyyy 866: AND TEMP[9].x, TEMP[9].xxxx, IMM[2].zzzz 867: MUL TEMP[5], TEMP[5], TEMP[9].xxxx 868: MAD TEMP[5], TEMP[18], TEMP[19].xxxx, TEMP[5] 869: MAD TEMP[5], TEMP[16], TEMP[17].xxxx, TEMP[5] 870: MAD TEMP[5], TEMP[14], TEMP[15].xxxx, TEMP[5] 871: MAD TEMP[5].yw, TEMP[12], TEMP[13].xxxx, TEMP[5] 872: MAD TEMP[5].xy, TEMP[5].wyyy, IMM[2].wwww, IMM[3].zzzz 873: DP2 TEMP[9].x, TEMP[5].xyyy, TEMP[5].xyyy 874: MOV_SAT TEMP[33].x, TEMP[9].xxxx 875: MOV TEMP[9].x, IMM[3].yyyy 876: MOV TEMP[9].y, TEMP[7].xxxx 877: MOV TEMP[9].z, TEMP[7].yyyy 878: MOV TEMP[7].y, IMM[3].yyyy 879: MOV TEMP[7].x, TEMP[11].yyyy 880: MOV TEMP[7].z, TEMP[11].xxxx 881: MOV TEMP[11].z, IMM[3].yyyy 882: MOV TEMP[11].xy, TEMP[5].xyxx 883: MUL TEMP[5].xyz, TEMP[9].xyzz, TEMP[2].xxxx 884: MAD TEMP[5].xyz, TEMP[7].xyzz, TEMP[2].yyyy, TEMP[5].xyzz 885: MAD TEMP[5].xyz, TEMP[11].xyzz, TEMP[2].zzzz, TEMP[5].xyzz 886: MOV TEMP[7].xy, IN[3].zyzz 887: MOV TEMP[9].x, IMM[2].xxxx 888: FSNE TEMP[11].x, CONST[19].xxxx, TEMP[6].xxxx 889: UIF TEMP[11].xxxx :0 890: MOV TEMP[9].x, IMM[2].yyyy 891: RCP TEMP[11].x, CONST[22].xxxx 892: MUL TEMP[7].xy, IN[3].zyyy, TEMP[11].xxxx 893: ELSE :0 894: RCP TEMP[11].x, CONST[21].xxxx 895: MUL TEMP[7].xy, TEMP[7].xyyy, TEMP[11].xxxx 896: ENDIF 897: FRC TEMP[7].xy, TEMP[7].xyyy 898: MUL TEMP[11].x, CONST[23].xxxx, IMM[2].wwww 899: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[9].xxxx 900: ADD TEMP[11].x, IMM[2].zzzz, -TEMP[11].xxxx 901: MUL TEMP[9].x, TEMP[9].xxxx, CONST[23].xxxx 902: MAD TEMP[7].xy, TEMP[7].xyyy, TEMP[11].xxxx, TEMP[9].xxxx 903: MAD TEMP[7].xy, TEMP[7].xyyy, TEMP[6].xxxx, TEMP[3].xyyy 904: MOV TEMP[9].xy, TEMP[7].xyyy 905: MOV TEMP[9].w, TEMP[8].xxxx 906: TXL TEMP[9], TEMP[9], SAMP[10], 2D 907: FSEQ TEMP[11].x, TEMP[3].zzzz, IMM[1].zzzz 908: AND TEMP[11].x, TEMP[11].xxxx, IMM[2].zzzz 909: MOV TEMP[12].xy, TEMP[7].xyyy 910: MOV TEMP[12].w, TEMP[8].xxxx 911: TXL TEMP[12], TEMP[12], SAMP[8], 2D 912: FSEQ TEMP[13].x, TEMP[3].zzzz, IMM[3].xxxx 913: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz 914: MOV TEMP[14].xy, TEMP[7].xyyy 915: MOV TEMP[14].w, TEMP[8].xxxx 916: TXL TEMP[14], TEMP[14], SAMP[6], 2D 917: FSEQ TEMP[15].x, TEMP[3].zzzz, IMM[2].wwww 918: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz 919: MOV TEMP[16].xy, TEMP[7].xyyy 920: MOV TEMP[16].w, TEMP[8].xxxx 921: TXL TEMP[16], TEMP[16], SAMP[4], 2D 922: FSEQ TEMP[17].x, TEMP[3].zzzz, IMM[2].zzzz 923: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz 924: MOV TEMP[7].xy, TEMP[7].xyyy 925: MOV TEMP[7].w, TEMP[8].xxxx 926: TXL TEMP[7], TEMP[7], SAMP[2], 2D 927: FSEQ TEMP[18].x, TEMP[3].zzzz, IMM[3].yyyy 928: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz 929: MUL TEMP[7], TEMP[7], TEMP[18].xxxx 930: MAD TEMP[7], TEMP[16], TEMP[17].xxxx, TEMP[7] 931: MAD TEMP[7], TEMP[14], TEMP[15].xxxx, TEMP[7] 932: MAD TEMP[7], TEMP[12], TEMP[13].xxxx, TEMP[7] 933: MAD TEMP[7].yw, TEMP[9], TEMP[11].xxxx, TEMP[7] 934: MAD TEMP[7].xy, TEMP[7].wyyy, IMM[2].wwww, IMM[3].zzzz 935: DP2 TEMP[9].x, TEMP[7].xyyy, TEMP[7].xyyy 936: MOV_SAT TEMP[34].x, TEMP[9].xxxx 937: MOV TEMP[9].xy, IN[3].zxzz 938: MOV TEMP[11].x, IMM[2].xxxx 939: FSNE TEMP[12].x, CONST[19].xxxx, TEMP[6].xxxx 940: UIF TEMP[12].xxxx :0 941: MOV TEMP[11].x, IMM[2].yyyy 942: RCP TEMP[12].x, CONST[22].xxxx 943: MUL TEMP[9].xy, IN[3].zxxx, TEMP[12].xxxx 944: ELSE :0 945: RCP TEMP[12].x, CONST[21].xxxx 946: MUL TEMP[9].xy, TEMP[9].xyyy, TEMP[12].xxxx 947: ENDIF 948: FRC TEMP[9].xy, TEMP[9].xyyy 949: MUL TEMP[12].x, CONST[23].xxxx, IMM[2].wwww 950: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[11].xxxx 951: ADD TEMP[12].x, IMM[2].zzzz, -TEMP[12].xxxx 952: MUL TEMP[11].x, TEMP[11].xxxx, CONST[23].xxxx 953: MAD TEMP[9].xy, TEMP[9].xyyy, TEMP[12].xxxx, TEMP[11].xxxx 954: MAD TEMP[9].xy, TEMP[9].xyyy, TEMP[6].xxxx, TEMP[3].xyyy 955: MOV TEMP[11].xy, TEMP[9].xyyy 956: MOV TEMP[11].w, TEMP[8].xxxx 957: TXL TEMP[11], TEMP[11], SAMP[10], 2D 958: FSEQ TEMP[12].x, TEMP[3].zzzz, IMM[1].zzzz 959: AND TEMP[12].x, TEMP[12].xxxx, IMM[2].zzzz 960: MOV TEMP[13].xy, TEMP[9].xyyy 961: MOV TEMP[13].w, TEMP[8].xxxx 962: TXL TEMP[13], TEMP[13], SAMP[8], 2D 963: FSEQ TEMP[14].x, TEMP[3].zzzz, IMM[3].xxxx 964: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz 965: MOV TEMP[15].xy, TEMP[9].xyyy 966: MOV TEMP[15].w, TEMP[8].xxxx 967: TXL TEMP[15], TEMP[15], SAMP[6], 2D 968: FSEQ TEMP[16].x, TEMP[3].zzzz, IMM[2].wwww 969: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz 970: MOV TEMP[17].xy, TEMP[9].xyyy 971: MOV TEMP[17].w, TEMP[8].xxxx 972: TXL TEMP[17], TEMP[17], SAMP[4], 2D 973: FSEQ TEMP[18].x, TEMP[3].zzzz, IMM[2].zzzz 974: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz 975: MOV TEMP[9].xy, TEMP[9].xyyy 976: MOV TEMP[9].w, TEMP[8].xxxx 977: TXL TEMP[9], TEMP[9], SAMP[2], 2D 978: FSEQ TEMP[19].x, TEMP[3].zzzz, IMM[3].yyyy 979: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz 980: MUL TEMP[9], TEMP[9], TEMP[19].xxxx 981: MAD TEMP[9], TEMP[17], TEMP[18].xxxx, TEMP[9] 982: MAD TEMP[9], TEMP[15], TEMP[16].xxxx, TEMP[9] 983: MAD TEMP[9], TEMP[13], TEMP[14].xxxx, TEMP[9] 984: MAD TEMP[9].yw, TEMP[11], TEMP[12].xxxx, TEMP[9] 985: MAD TEMP[9].xy, TEMP[9].wyyy, IMM[2].wwww, IMM[3].zzzz 986: DP2 TEMP[11].x, TEMP[9].xyyy, TEMP[9].xyyy 987: MOV_SAT TEMP[35].x, TEMP[11].xxxx 988: MOV TEMP[11].xy, IN[3].xyxx 989: MOV TEMP[12].x, IMM[2].xxxx 990: FSNE TEMP[13].x, CONST[19].xxxx, TEMP[6].xxxx 991: UIF TEMP[13].xxxx :0 992: MOV TEMP[12].x, IMM[2].yyyy 993: RCP TEMP[13].x, CONST[22].xxxx 994: MUL TEMP[11].xy, IN[3].xyyy, TEMP[13].xxxx 995: ELSE :0 996: RCP TEMP[13].x, CONST[21].xxxx 997: MUL TEMP[11].xy, TEMP[11].xyyy, TEMP[13].xxxx 998: ENDIF 999: FRC TEMP[11].xy, TEMP[11].xyyy 1000: MUL TEMP[13].x, CONST[23].xxxx, IMM[2].wwww 1001: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[12].xxxx 1002: ADD TEMP[13].x, IMM[2].zzzz, -TEMP[13].xxxx 1003: MUL TEMP[12].x, TEMP[12].xxxx, CONST[23].xxxx 1004: MAD TEMP[11].xy, TEMP[11].xyyy, TEMP[13].xxxx, TEMP[12].xxxx 1005: MAD TEMP[6].xy, TEMP[11].xyyy, TEMP[6].xxxx, TEMP[3].xyyy 1006: MOV TEMP[11].xy, TEMP[6].xyyy 1007: MOV TEMP[11].w, TEMP[8].xxxx 1008: TXL TEMP[11], TEMP[11], SAMP[10], 2D 1009: FSEQ TEMP[12].x, TEMP[3].zzzz, IMM[1].zzzz 1010: AND TEMP[12].x, TEMP[12].xxxx, IMM[2].zzzz 1011: MOV TEMP[13].xy, TEMP[6].xyyy 1012: MOV TEMP[13].w, TEMP[8].xxxx 1013: TXL TEMP[13], TEMP[13], SAMP[8], 2D 1014: FSEQ TEMP[14].x, TEMP[3].zzzz, IMM[3].xxxx 1015: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz 1016: MOV TEMP[15].xy, TEMP[6].xyyy 1017: MOV TEMP[15].w, TEMP[8].xxxx 1018: TXL TEMP[15], TEMP[15], SAMP[6], 2D 1019: FSEQ TEMP[16].x, TEMP[3].zzzz, IMM[2].wwww 1020: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz 1021: MOV TEMP[17].xy, TEMP[6].xyyy 1022: MOV TEMP[17].w, TEMP[8].xxxx 1023: TXL TEMP[17], TEMP[17], SAMP[4], 2D 1024: FSEQ TEMP[18].x, TEMP[3].zzzz, IMM[2].zzzz 1025: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz 1026: MOV TEMP[6].xy, TEMP[6].xyyy 1027: MOV TEMP[6].w, TEMP[8].xxxx 1028: TXL TEMP[6], TEMP[6], SAMP[2], 2D 1029: FSEQ TEMP[3].x, TEMP[3].zzzz, IMM[3].yyyy 1030: AND TEMP[3].x, TEMP[3].xxxx, IMM[2].zzzz 1031: MUL TEMP[3], TEMP[6], TEMP[3].xxxx 1032: MAD TEMP[3], TEMP[17], TEMP[18].xxxx, TEMP[3] 1033: MAD TEMP[3], TEMP[15], TEMP[16].xxxx, TEMP[3] 1034: MAD TEMP[3], TEMP[13], TEMP[14].xxxx, TEMP[3] 1035: MAD TEMP[3].yw, TEMP[11], TEMP[12].xxxx, TEMP[3] 1036: MAD TEMP[3].xy, TEMP[3].wyyy, IMM[2].wwww, IMM[3].zzzz 1037: DP2 TEMP[6].x, TEMP[3].xyyy, TEMP[3].xyyy 1038: MOV_SAT TEMP[36].x, TEMP[6].xxxx 1039: MOV TEMP[6].x, IMM[3].yyyy 1040: MOV TEMP[6].y, TEMP[7].xxxx 1041: MOV TEMP[6].z, TEMP[7].yyyy 1042: MOV TEMP[7].y, IMM[3].yyyy 1043: MOV TEMP[7].x, TEMP[9].yyyy 1044: MOV TEMP[7].z, TEMP[9].xxxx 1045: MOV TEMP[8].z, IMM[3].yyyy 1046: MOV TEMP[8].xy, TEMP[3].xyxx 1047: MOV TEMP[3].w, IMM[2].zzzz 1048: MUL TEMP[6].xyz, TEMP[6].xyzz, TEMP[2].xxxx 1049: MAD TEMP[6].xyz, TEMP[7].xyzz, TEMP[2].yyyy, TEMP[6].xyzz 1050: MAD TEMP[2].xyz, TEMP[8].xyzz, TEMP[2].zzzz, TEMP[6].xyzz 1051: MUL TEMP[4].xyz, IN[1].xxxx, TEMP[4].xyzz 1052: MAD TEMP[4].xyz, IN[1].yyyy, TEMP[5].xyzz, TEMP[4].xyzz 1053: MAD TEMP[3].xyz, IN[1].zzzz, TEMP[2].xyzz, TEMP[4].xyzz 1054: DP4 TEMP[2].x, TEMP[3], TEMP[3] 1055: RSQ TEMP[2].x, TEMP[2].xxxx 1056: MUL TEMP[2].xyz, TEMP[3], TEMP[2].xxxx 1057: MUL TEMP[2].xyz, TEMP[2].xyzz, IN[0].wwww 1058: ADD TEMP[2].xyz, IN[2].yzww, -TEMP[2].xyzz 1059: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz 1060: RSQ TEMP[3].x, TEMP[3].xxxx 1061: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx 1062: MUL TEMP[3], CONST[5], IN[3].xxxx 1063: MAD TEMP[3], CONST[6], IN[3].yyyy, TEMP[3] 1064: MAD TEMP[3], CONST[7], IN[3].zzzz, TEMP[3] 1065: ADD TEMP[3].xyz, TEMP[3], CONST[8] 1066: DP3 TEMP[3].x, TEMP[3].xyzz, TEMP[3].xyzz 1067: MOV TEMP[3].xy, TEMP[3].xxxx 1068: TEX TEMP[3].w, TEMP[3], SAMP[0], 2D 1069: ADD TEMP[0].xyz, TEMP[1].xyzz, TEMP[0].xyzz 1070: DP3 TEMP[4].x, TEMP[0].xyzz, TEMP[0].xyzz 1071: RSQ TEMP[4].x, TEMP[4].xxxx 1072: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[4].xxxx 1073: DP3 TEMP[0].x, TEMP[2].xyzz, TEMP[0].xyzz 1074: MAX TEMP[0].x, IMM[3].wwww, TEMP[0].xxxx 1075: MUL TEMP[4].x, IMM[4].xxxx, IN[1].wwww 1076: POW TEMP[0].x, TEMP[0].xxxx, TEMP[4].xxxx 1077: MOV_SAT TEMP[0].x, TEMP[0].xxxx 1078: MOV TEMP[4].w, IMM[3].yyyy 1079: MOV TEMP[4].xyz, CONST[26].xyzx 1080: MUL TEMP[5].x, IMM[2].wwww, TEMP[0].xxxx 1081: ADD TEMP[5].x, IMM[3].xxxx, -TEMP[5].xxxx 1082: MUL TEMP[5].x, TEMP[0].xxxx, TEMP[5].xxxx 1083: MUL TEMP[5].x, TEMP[0].xxxx, TEMP[5].xxxx 1084: MUL TEMP[5].x, TEMP[5].xxxx, IN[1].wwww 1085: MUL TEMP[6].xyz, TEMP[10].xyzz, CONST[3].xyzz 1086: DP3 TEMP[1].x, TEMP[2].xyzz, TEMP[1].xyzz 1087: MOV_SAT TEMP[1].x, TEMP[1].xxxx 1088: MUL TEMP[2], CONST[27], IMM[2].wwww 1089: MUL TEMP[2], TEMP[2], TEMP[3].wwww 1090: MAX TEMP[2], TEMP[2], TEMP[4] 1091: MIN TEMP[2].xyz, TEMP[2], IMM[4].yyyz 1092: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[10].xyzz 1093: MAD TEMP[1].xyz, TEMP[6].xyzz, TEMP[1].xxxx, TEMP[2].xyzz 1094: MAD TEMP[1].xyz, CONST[3].xyzz, TEMP[5].xxxx, TEMP[1].xyzz 1095: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].wwww 1096: MUL TEMP[0].xyz, TEMP[1].xyzz, IMM[0].wwww 1097: MAD TEMP[1].x, IN[2].xxxx, CONST[2].zzzz, CONST[2].wwww 1098: MOV_SAT TEMP[1].x, TEMP[1].xxxx 1099: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xxxx 1100: MOV TEMP[0].w, IMM[2].zzzz 1101: MOV OUT[0], TEMP[0] 1102: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 304) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 320) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 336) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 352) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 368) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 384) %53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 400) %54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 416) %55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 420) %56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 424) %57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 432) %58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 436) %59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 440) %60 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %61 = load <32 x i8>, <32 x i8> addrspace(2)* %60, align 32, !tbaa !0 %62 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %63 = load <16 x i8>, <16 x i8> addrspace(2)* %62, align 16, !tbaa !0 %64 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %65 = load <8 x i32>, <8 x i32> addrspace(2)* %64, align 32, !tbaa !0 %66 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %67 = load <4 x i32>, <4 x i32> addrspace(2)* %66, align 16, !tbaa !0 %68 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %69 = load <8 x i32>, <8 x i32> addrspace(2)* %68, align 32, !tbaa !0 %70 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %71 = load <4 x i32>, <4 x i32> addrspace(2)* %70, align 16, !tbaa !0 %72 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %73 = load <8 x i32>, <8 x i32> addrspace(2)* %72, align 32, !tbaa !0 %74 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %75 = load <4 x i32>, <4 x i32> addrspace(2)* %74, align 16, !tbaa !0 %76 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %77 = load <8 x i32>, <8 x i32> addrspace(2)* %76, align 32, !tbaa !0 %78 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %79 = load <4 x i32>, <4 x i32> addrspace(2)* %78, align 16, !tbaa !0 %80 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5 %81 = load <8 x i32>, <8 x i32> addrspace(2)* %80, align 32, !tbaa !0 %82 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5 %83 = load <4 x i32>, <4 x i32> addrspace(2)* %82, align 16, !tbaa !0 %84 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 6 %85 = load <8 x i32>, <8 x i32> addrspace(2)* %84, align 32, !tbaa !0 %86 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 6 %87 = load <4 x i32>, <4 x i32> addrspace(2)* %86, align 16, !tbaa !0 %88 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 7 %89 = load <8 x i32>, <8 x i32> addrspace(2)* %88, align 32, !tbaa !0 %90 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 7 %91 = load <4 x i32>, <4 x i32> addrspace(2)* %90, align 16, !tbaa !0 %92 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 8 %93 = load <8 x i32>, <8 x i32> addrspace(2)* %92, align 32, !tbaa !0 %94 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 8 %95 = load <4 x i32>, <4 x i32> addrspace(2)* %94, align 16, !tbaa !0 %96 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 9 %97 = load <8 x i32>, <8 x i32> addrspace(2)* %96, align 32, !tbaa !0 %98 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 9 %99 = load <4 x i32>, <4 x i32> addrspace(2)* %98, align 16, !tbaa !0 %100 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 10 %101 = load <8 x i32>, <8 x i32> addrspace(2)* %100, align 32, !tbaa !0 %102 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 10 %103 = load <4 x i32>, <4 x i32> addrspace(2)* %102, align 16, !tbaa !0 %104 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %105 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %106 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %107 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %108 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %109 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %110 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %111 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %112 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %113 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %114 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %115 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %116 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %117 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %118 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %119 = fsub float %27, %116 %120 = fsub float %28, %117 %121 = fsub float %29, %118 %122 = fmul float %119, %119 %123 = fmul float %120, %120 %124 = fadd float %123, %122 %125 = fmul float %121, %121 %126 = fadd float %124, %125 %127 = call float @llvm.AMDGPU.rsq.clamped.f32(float %126) %128 = fmul float %119, %127 %129 = fmul float %120, %127 %130 = fmul float %121, %127 %131 = fsub float %24, %116 %132 = fsub float %25, %117 %133 = fsub float %26, %118 %134 = fmul float %131, %131 %135 = fmul float %132, %132 %136 = fadd float %135, %134 %137 = fmul float %133, %133 %138 = fadd float %136, %137 %139 = call float @llvm.AMDGPU.rsq.clamped.f32(float %138) %140 = fmul float %131, %139 %141 = fmul float %132, %139 %142 = fmul float %133, %139 %143 = call float @llvm.fabs.f32(float %113) %144 = call float @llvm.fabs.f32(float %114) %145 = call float @llvm.fabs.f32(float %115) %146 = fmul float %143, %143 %147 = fmul float %144, %144 %148 = fadd float %147, %146 %149 = fmul float %145, %145 %150 = fadd float %148, %149 %151 = call float @llvm.AMDGPU.rsq.clamped.f32(float %150) %152 = fmul float %143, %151 %153 = fadd float %152, 0xBFC99999A0000000 %154 = fmul float %144, %151 %155 = fadd float %154, 0xBFC99999A0000000 %156 = fmul float %145, %151 %157 = fadd float %156, 0xBFC99999A0000000 %158 = fmul float %153, 7.000000e+00 %159 = fmul float %155, 7.000000e+00 %160 = fmul float %157, 7.000000e+00 %161 = call float @llvm.maxnum.f32(float %158, float 0x3F847AE140000000) %162 = call float @llvm.maxnum.f32(float %159, float 0x3F847AE140000000) %163 = call float @llvm.maxnum.f32(float %160, float 0x3F847AE140000000) %164 = fadd float %161, %162 %165 = fadd float %164, %163 %166 = fdiv float 1.000000e+00, %165 %167 = fmul float %161, %166 %168 = fmul float %162, %166 %169 = fmul float %163, %166 %170 = fadd float %104, 5.000000e-01 %171 = fadd float %105, 5.000000e-01 %172 = fadd float %106, 5.000000e-01 %173 = call float @llvm.floor.f32(float %170) %174 = call float @llvm.floor.f32(float %171) %175 = call float @llvm.floor.f32(float %172) %176 = fmul float %173, %47 %177 = call float @llvm.floor.f32(float %176) %178 = fmul float %177, %47 %179 = fcmp ult float %173, 6.400000e+01 br i1 %179, label %ELSE, label %IF IF: ; preds = %main_body %180 = fadd float %173, -6.400000e+01 %181 = fmul float %180, %48 %182 = call float @llvm.floor.f32(float %181) %183 = fmul float %182, %48 %184 = call float @llvm.floor.f32(float %181) %185 = fsub float %181, %184 %186 = call float @llvm.floor.f32(float %183) %187 = fsub float %183, %186 %188 = call float @llvm.floor.f32(float %183) %189 = fadd float %188, 4.000000e+00 br label %ENDIF ELSE: ; preds = %main_body %190 = call float @llvm.floor.f32(float %176) %191 = fsub float %176, %190 %192 = call float @llvm.floor.f32(float %178) %193 = fsub float %178, %192 %194 = call float @llvm.floor.f32(float %178) br label %ENDIF ENDIF: ; preds = %ELSE, %IF %temp30.0 = phi float [ %189, %IF ], [ %194, %ELSE ] %temp29.0 = phi float [ %187, %IF ], [ %193, %ELSE ] %temp28.0 = phi float [ %185, %IF ], [ %191, %ELSE ] %temp16.0 = phi float [ %48, %IF ], [ %47, %ELSE ] %195 = fmul float %174, %47 %196 = call float @llvm.floor.f32(float %195) %197 = fmul float %196, %47 %198 = fcmp ult float %174, 6.400000e+01 br i1 %198, label %ELSE150, label %IF149 IF149: ; preds = %ENDIF %199 = fadd float %174, -6.400000e+01 %200 = fmul float %199, %48 %201 = call float @llvm.floor.f32(float %200) %202 = fmul float %201, %48 %203 = call float @llvm.floor.f32(float %200) %204 = fsub float %200, %203 %205 = call float @llvm.floor.f32(float %202) %206 = fsub float %202, %205 %207 = call float @llvm.floor.f32(float %202) %208 = fadd float %207, 4.000000e+00 br label %ENDIF148 ELSE150: ; preds = %ENDIF %209 = call float @llvm.floor.f32(float %195) %210 = fsub float %195, %209 %211 = call float @llvm.floor.f32(float %197) %212 = fsub float %197, %211 %213 = call float @llvm.floor.f32(float %197) br label %ENDIF148 ENDIF148: ; preds = %ELSE150, %IF149 %temp36.0 = phi float [ %204, %IF149 ], [ %210, %ELSE150 ] %temp37.0 = phi float [ %206, %IF149 ], [ %212, %ELSE150 ] %temp38.0 = phi float [ %208, %IF149 ], [ %213, %ELSE150 ] %temp20.0 = phi float [ %48, %IF149 ], [ %47, %ELSE150 ] %214 = fmul float %175, %47 %215 = call float @llvm.floor.f32(float %214) %216 = fmul float %215, %47 %217 = fcmp ult float %175, 6.400000e+01 br i1 %217, label %ELSE153, label %IF152 IF152: ; preds = %ENDIF148 %218 = fadd float %175, -6.400000e+01 %219 = fmul float %218, %48 %220 = call float @llvm.floor.f32(float %219) %221 = fmul float %220, %48 %222 = call float @llvm.floor.f32(float %219) %223 = fsub float %219, %222 %224 = call float @llvm.floor.f32(float %221) %225 = fsub float %221, %224 %226 = call float @llvm.floor.f32(float %221) %227 = fadd float %226, 4.000000e+00 br label %ENDIF151 ELSE153: ; preds = %ENDIF148 %228 = call float @llvm.floor.f32(float %214) %229 = fsub float %214, %228 %230 = call float @llvm.floor.f32(float %216) %231 = fsub float %216, %230 %232 = call float @llvm.floor.f32(float %216) br label %ENDIF151 ENDIF151: ; preds = %ELSE153, %IF152 %temp24.0 = phi float [ %48, %IF152 ], [ %47, %ELSE153 ] %temp14.0 = phi float [ %227, %IF152 ], [ %232, %ELSE153 ] %temp13.0 = phi float [ %225, %IF152 ], [ %231, %ELSE153 ] %temp12.0 = phi float [ %223, %IF152 ], [ %229, %ELSE153 ] %233 = fsub float %116, %24 %234 = fsub float %117, %25 %235 = fsub float %118, %26 %236 = fmul float %233, %233 %237 = fmul float %234, %234 %238 = fadd float %237, %236 %239 = fmul float %235, %235 %240 = fadd float %238, %239 %241 = fmul float %53, %240 %242 = call float @llvm.log2.f32(float %241) %243 = fmul float %242, 0x3FE62E4300000000 %244 = fmul float %243, %52 %245 = fcmp une float %47, %temp16.0 %.sink208 = select i1 %245, float %50, float %49 %temp44.0 = select i1 %245, float 1.953125e-03, float 3.906250e-03 %246 = fdiv float 1.000000e+00, %.sink208 %247 = fmul float %116, %246 %248 = fmul float %117, %246 %249 = call float @llvm.floor.f32(float %247) %250 = fsub float %247, %249 %251 = call float @llvm.floor.f32(float %248) %252 = fsub float %248, %251 %253 = fmul float %51, 2.000000e+00 %254 = fmul float %253, %temp44.0 %255 = fsub float 1.000000e+00, %254 %256 = fmul float %temp44.0, %51 %257 = fmul float %250, %255 %258 = fadd float %257, %256 %259 = fmul float %252, %255 %260 = fadd float %259, %256 %261 = fmul float %258, %temp16.0 %262 = fadd float %261, %temp28.0 %263 = fmul float %260, %temp16.0 %264 = fadd float %263, %temp29.0 %265 = bitcast float %262 to i32 %266 = bitcast float %264 to i32 %267 = bitcast float %244 to i32 %268 = insertelement <4 x i32> undef, i32 %265, i32 0 %269 = insertelement <4 x i32> %268, i32 %266, i32 1 %270 = insertelement <4 x i32> %269, i32 %267, i32 2 %271 = bitcast <8 x i32> %97 to <32 x i8> %272 = bitcast <4 x i32> %99 to <16 x i8> %273 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %270, <32 x i8> %271, <16 x i8> %272, i32 2) %274 = extractelement <4 x float> %273, i32 0 %275 = extractelement <4 x float> %273, i32 1 %276 = extractelement <4 x float> %273, i32 2 %277 = fcmp oeq float %temp30.0, 4.000000e+00 %278 = select i1 %277, float 1.000000e+00, float 0.000000e+00 %279 = bitcast float %262 to i32 %280 = bitcast float %264 to i32 %281 = bitcast float %244 to i32 %282 = insertelement <4 x i32> undef, i32 %279, i32 0 %283 = insertelement <4 x i32> %282, i32 %280, i32 1 %284 = insertelement <4 x i32> %283, i32 %281, i32 2 %285 = bitcast <8 x i32> %89 to <32 x i8> %286 = bitcast <4 x i32> %91 to <16 x i8> %287 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %284, <32 x i8> %285, <16 x i8> %286, i32 2) %288 = extractelement <4 x float> %287, i32 0 %289 = extractelement <4 x float> %287, i32 1 %290 = extractelement <4 x float> %287, i32 2 %291 = fcmp oeq float %temp30.0, 3.000000e+00 %292 = select i1 %291, float 1.000000e+00, float 0.000000e+00 %293 = bitcast float %262 to i32 %294 = bitcast float %264 to i32 %295 = bitcast float %244 to i32 %296 = insertelement <4 x i32> undef, i32 %293, i32 0 %297 = insertelement <4 x i32> %296, i32 %294, i32 1 %298 = insertelement <4 x i32> %297, i32 %295, i32 2 %299 = bitcast <8 x i32> %81 to <32 x i8> %300 = bitcast <4 x i32> %83 to <16 x i8> %301 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %298, <32 x i8> %299, <16 x i8> %300, i32 2) %302 = extractelement <4 x float> %301, i32 0 %303 = extractelement <4 x float> %301, i32 1 %304 = extractelement <4 x float> %301, i32 2 %305 = fcmp oeq float %temp30.0, 2.000000e+00 %306 = select i1 %305, float 1.000000e+00, float 0.000000e+00 %307 = bitcast float %262 to i32 %308 = bitcast float %264 to i32 %309 = bitcast float %244 to i32 %310 = insertelement <4 x i32> undef, i32 %307, i32 0 %311 = insertelement <4 x i32> %310, i32 %308, i32 1 %312 = insertelement <4 x i32> %311, i32 %309, i32 2 %313 = bitcast <8 x i32> %73 to <32 x i8> %314 = bitcast <4 x i32> %75 to <16 x i8> %315 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %312, <32 x i8> %313, <16 x i8> %314, i32 2) %316 = extractelement <4 x float> %315, i32 0 %317 = extractelement <4 x float> %315, i32 1 %318 = extractelement <4 x float> %315, i32 2 %319 = fcmp oeq float %temp30.0, 1.000000e+00 %320 = select i1 %319, float 1.000000e+00, float 0.000000e+00 %321 = bitcast float %262 to i32 %322 = bitcast float %264 to i32 %323 = bitcast float %244 to i32 %324 = insertelement <4 x i32> undef, i32 %321, i32 0 %325 = insertelement <4 x i32> %324, i32 %322, i32 1 %326 = insertelement <4 x i32> %325, i32 %323, i32 2 %327 = bitcast <8 x i32> %65 to <32 x i8> %328 = bitcast <4 x i32> %67 to <16 x i8> %329 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %326, <32 x i8> %327, <16 x i8> %328, i32 2) %330 = extractelement <4 x float> %329, i32 0 %331 = extractelement <4 x float> %329, i32 1 %332 = extractelement <4 x float> %329, i32 2 %333 = fcmp oeq float %temp30.0, 0.000000e+00 %334 = select i1 %333, float 1.000000e+00, float 0.000000e+00 %335 = fmul float %330, %334 %336 = fmul float %331, %334 %337 = fmul float %332, %334 %338 = fmul float %316, %320 %339 = fadd float %338, %335 %340 = fmul float %317, %320 %341 = fadd float %340, %336 %342 = fmul float %318, %320 %343 = fadd float %342, %337 %344 = fmul float %302, %306 %345 = fadd float %344, %339 %346 = fmul float %303, %306 %347 = fadd float %346, %341 %348 = fmul float %304, %306 %349 = fadd float %348, %343 %350 = fmul float %288, %292 %351 = fadd float %350, %345 %352 = fmul float %289, %292 %353 = fadd float %352, %347 %354 = fmul float %290, %292 %355 = fadd float %354, %349 %356 = fmul float %274, %278 %357 = fadd float %356, %351 %358 = fmul float %275, %278 %359 = fadd float %358, %353 %360 = fmul float %276, %278 %361 = fadd float %360, %355 %362 = fcmp une float %47, %temp16.0 %.sink209 = select i1 %362, float %50, float %49 %temp48.0 = select i1 %362, float 1.953125e-03, float 3.906250e-03 %363 = fdiv float 1.000000e+00, %.sink209 %364 = fmul float %118, %363 %365 = fmul float %117, %363 %366 = call float @llvm.floor.f32(float %364) %367 = fsub float %364, %366 %368 = call float @llvm.floor.f32(float %365) %369 = fsub float %365, %368 %370 = fmul float %51, 2.000000e+00 %371 = fmul float %370, %temp48.0 %372 = fsub float 1.000000e+00, %371 %373 = fmul float %temp48.0, %51 %374 = fmul float %367, %372 %375 = fadd float %374, %373 %376 = fmul float %369, %372 %377 = fadd float %376, %373 %378 = fmul float %375, %temp16.0 %379 = fadd float %378, %temp28.0 %380 = fmul float %377, %temp16.0 %381 = fadd float %380, %temp29.0 %382 = bitcast float %379 to i32 %383 = bitcast float %381 to i32 %384 = bitcast float %244 to i32 %385 = insertelement <4 x i32> undef, i32 %382, i32 0 %386 = insertelement <4 x i32> %385, i32 %383, i32 1 %387 = insertelement <4 x i32> %386, i32 %384, i32 2 %388 = bitcast <8 x i32> %97 to <32 x i8> %389 = bitcast <4 x i32> %99 to <16 x i8> %390 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %387, <32 x i8> %388, <16 x i8> %389, i32 2) %391 = extractelement <4 x float> %390, i32 0 %392 = extractelement <4 x float> %390, i32 1 %393 = extractelement <4 x float> %390, i32 2 %394 = fcmp oeq float %temp30.0, 4.000000e+00 %395 = select i1 %394, float 1.000000e+00, float 0.000000e+00 %396 = bitcast float %379 to i32 %397 = bitcast float %381 to i32 %398 = bitcast float %244 to i32 %399 = insertelement <4 x i32> undef, i32 %396, i32 0 %400 = insertelement <4 x i32> %399, i32 %397, i32 1 %401 = insertelement <4 x i32> %400, i32 %398, i32 2 %402 = bitcast <8 x i32> %89 to <32 x i8> %403 = bitcast <4 x i32> %91 to <16 x i8> %404 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %401, <32 x i8> %402, <16 x i8> %403, i32 2) %405 = extractelement <4 x float> %404, i32 0 %406 = extractelement <4 x float> %404, i32 1 %407 = extractelement <4 x float> %404, i32 2 %408 = fcmp oeq float %temp30.0, 3.000000e+00 %409 = select i1 %408, float 1.000000e+00, float 0.000000e+00 %410 = bitcast float %379 to i32 %411 = bitcast float %381 to i32 %412 = bitcast float %244 to i32 %413 = insertelement <4 x i32> undef, i32 %410, i32 0 %414 = insertelement <4 x i32> %413, i32 %411, i32 1 %415 = insertelement <4 x i32> %414, i32 %412, i32 2 %416 = bitcast <8 x i32> %81 to <32 x i8> %417 = bitcast <4 x i32> %83 to <16 x i8> %418 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %415, <32 x i8> %416, <16 x i8> %417, i32 2) %419 = extractelement <4 x float> %418, i32 0 %420 = extractelement <4 x float> %418, i32 1 %421 = extractelement <4 x float> %418, i32 2 %422 = fcmp oeq float %temp30.0, 2.000000e+00 %423 = select i1 %422, float 1.000000e+00, float 0.000000e+00 %424 = bitcast float %379 to i32 %425 = bitcast float %381 to i32 %426 = bitcast float %244 to i32 %427 = insertelement <4 x i32> undef, i32 %424, i32 0 %428 = insertelement <4 x i32> %427, i32 %425, i32 1 %429 = insertelement <4 x i32> %428, i32 %426, i32 2 %430 = bitcast <8 x i32> %73 to <32 x i8> %431 = bitcast <4 x i32> %75 to <16 x i8> %432 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %429, <32 x i8> %430, <16 x i8> %431, i32 2) %433 = extractelement <4 x float> %432, i32 0 %434 = extractelement <4 x float> %432, i32 1 %435 = extractelement <4 x float> %432, i32 2 %436 = fcmp oeq float %temp30.0, 1.000000e+00 %437 = select i1 %436, float 1.000000e+00, float 0.000000e+00 %438 = bitcast float %379 to i32 %439 = bitcast float %381 to i32 %440 = bitcast float %244 to i32 %441 = insertelement <4 x i32> undef, i32 %438, i32 0 %442 = insertelement <4 x i32> %441, i32 %439, i32 1 %443 = insertelement <4 x i32> %442, i32 %440, i32 2 %444 = bitcast <8 x i32> %65 to <32 x i8> %445 = bitcast <4 x i32> %67 to <16 x i8> %446 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %443, <32 x i8> %444, <16 x i8> %445, i32 2) %447 = extractelement <4 x float> %446, i32 0 %448 = extractelement <4 x float> %446, i32 1 %449 = extractelement <4 x float> %446, i32 2 %450 = fcmp oeq float %temp30.0, 0.000000e+00 %451 = select i1 %450, float 1.000000e+00, float 0.000000e+00 %452 = fmul float %447, %451 %453 = fmul float %448, %451 %454 = fmul float %449, %451 %455 = fmul float %433, %437 %456 = fadd float %455, %452 %457 = fmul float %434, %437 %458 = fadd float %457, %453 %459 = fmul float %435, %437 %460 = fadd float %459, %454 %461 = fmul float %419, %423 %462 = fadd float %461, %456 %463 = fmul float %420, %423 %464 = fadd float %463, %458 %465 = fmul float %421, %423 %466 = fadd float %465, %460 %467 = fmul float %405, %409 %468 = fadd float %467, %462 %469 = fmul float %406, %409 %470 = fadd float %469, %464 %471 = fmul float %407, %409 %472 = fadd float %471, %466 %473 = fmul float %391, %395 %474 = fadd float %473, %468 %475 = fmul float %392, %395 %476 = fadd float %475, %470 %477 = fmul float %393, %395 %478 = fadd float %477, %472 %479 = fcmp une float %47, %temp16.0 %.sink210 = select i1 %479, float %50, float %49 %temp52.0 = select i1 %479, float 1.953125e-03, float 3.906250e-03 %480 = fdiv float 1.000000e+00, %.sink210 %481 = fmul float %118, %480 %482 = fmul float %116, %480 %483 = call float @llvm.floor.f32(float %481) %484 = fsub float %481, %483 %485 = call float @llvm.floor.f32(float %482) %486 = fsub float %482, %485 %487 = fmul float %51, 2.000000e+00 %488 = fmul float %487, %temp52.0 %489 = fsub float 1.000000e+00, %488 %490 = fmul float %temp52.0, %51 %491 = fmul float %484, %489 %492 = fadd float %491, %490 %493 = fmul float %486, %489 %494 = fadd float %493, %490 %495 = fmul float %492, %temp16.0 %496 = fadd float %495, %temp28.0 %497 = fmul float %494, %temp16.0 %498 = fadd float %497, %temp29.0 %499 = bitcast float %496 to i32 %500 = bitcast float %498 to i32 %501 = bitcast float %244 to i32 %502 = insertelement <4 x i32> undef, i32 %499, i32 0 %503 = insertelement <4 x i32> %502, i32 %500, i32 1 %504 = insertelement <4 x i32> %503, i32 %501, i32 2 %505 = bitcast <8 x i32> %97 to <32 x i8> %506 = bitcast <4 x i32> %99 to <16 x i8> %507 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %504, <32 x i8> %505, <16 x i8> %506, i32 2) %508 = extractelement <4 x float> %507, i32 0 %509 = extractelement <4 x float> %507, i32 1 %510 = extractelement <4 x float> %507, i32 2 %511 = fcmp oeq float %temp30.0, 4.000000e+00 %512 = select i1 %511, float 1.000000e+00, float 0.000000e+00 %513 = bitcast float %496 to i32 %514 = bitcast float %498 to i32 %515 = bitcast float %244 to i32 %516 = insertelement <4 x i32> undef, i32 %513, i32 0 %517 = insertelement <4 x i32> %516, i32 %514, i32 1 %518 = insertelement <4 x i32> %517, i32 %515, i32 2 %519 = bitcast <8 x i32> %89 to <32 x i8> %520 = bitcast <4 x i32> %91 to <16 x i8> %521 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %518, <32 x i8> %519, <16 x i8> %520, i32 2) %522 = extractelement <4 x float> %521, i32 0 %523 = extractelement <4 x float> %521, i32 1 %524 = extractelement <4 x float> %521, i32 2 %525 = fcmp oeq float %temp30.0, 3.000000e+00 %526 = select i1 %525, float 1.000000e+00, float 0.000000e+00 %527 = bitcast float %496 to i32 %528 = bitcast float %498 to i32 %529 = bitcast float %244 to i32 %530 = insertelement <4 x i32> undef, i32 %527, i32 0 %531 = insertelement <4 x i32> %530, i32 %528, i32 1 %532 = insertelement <4 x i32> %531, i32 %529, i32 2 %533 = bitcast <8 x i32> %81 to <32 x i8> %534 = bitcast <4 x i32> %83 to <16 x i8> %535 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %532, <32 x i8> %533, <16 x i8> %534, i32 2) %536 = extractelement <4 x float> %535, i32 0 %537 = extractelement <4 x float> %535, i32 1 %538 = extractelement <4 x float> %535, i32 2 %539 = fcmp oeq float %temp30.0, 2.000000e+00 %540 = select i1 %539, float 1.000000e+00, float 0.000000e+00 %541 = bitcast float %496 to i32 %542 = bitcast float %498 to i32 %543 = bitcast float %244 to i32 %544 = insertelement <4 x i32> undef, i32 %541, i32 0 %545 = insertelement <4 x i32> %544, i32 %542, i32 1 %546 = insertelement <4 x i32> %545, i32 %543, i32 2 %547 = bitcast <8 x i32> %73 to <32 x i8> %548 = bitcast <4 x i32> %75 to <16 x i8> %549 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %546, <32 x i8> %547, <16 x i8> %548, i32 2) %550 = extractelement <4 x float> %549, i32 0 %551 = extractelement <4 x float> %549, i32 1 %552 = extractelement <4 x float> %549, i32 2 %553 = fcmp oeq float %temp30.0, 1.000000e+00 %554 = select i1 %553, float 1.000000e+00, float 0.000000e+00 %555 = bitcast float %496 to i32 %556 = bitcast float %498 to i32 %557 = bitcast float %244 to i32 %558 = insertelement <4 x i32> undef, i32 %555, i32 0 %559 = insertelement <4 x i32> %558, i32 %556, i32 1 %560 = insertelement <4 x i32> %559, i32 %557, i32 2 %561 = bitcast <8 x i32> %65 to <32 x i8> %562 = bitcast <4 x i32> %67 to <16 x i8> %563 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %560, <32 x i8> %561, <16 x i8> %562, i32 2) %564 = extractelement <4 x float> %563, i32 0 %565 = extractelement <4 x float> %563, i32 1 %566 = extractelement <4 x float> %563, i32 2 %567 = fcmp oeq float %temp30.0, 0.000000e+00 %568 = select i1 %567, float 1.000000e+00, float 0.000000e+00 %569 = fmul float %564, %568 %570 = fmul float %565, %568 %571 = fmul float %566, %568 %572 = fmul float %550, %554 %573 = fadd float %572, %569 %574 = fmul float %551, %554 %575 = fadd float %574, %570 %576 = fmul float %552, %554 %577 = fadd float %576, %571 %578 = fmul float %536, %540 %579 = fadd float %578, %573 %580 = fmul float %537, %540 %581 = fadd float %580, %575 %582 = fmul float %538, %540 %583 = fadd float %582, %577 %584 = fmul float %522, %526 %585 = fadd float %584, %579 %586 = fmul float %523, %526 %587 = fadd float %586, %581 %588 = fmul float %524, %526 %589 = fadd float %588, %583 %590 = fmul float %508, %512 %591 = fadd float %590, %585 %592 = fmul float %509, %512 %593 = fadd float %592, %587 %594 = fmul float %510, %512 %595 = fadd float %594, %589 %596 = fcmp une float %47, %temp20.0 %.sink211 = select i1 %596, float %50, float %49 %temp56.0 = select i1 %596, float 1.953125e-03, float 3.906250e-03 %597 = fdiv float 1.000000e+00, %.sink211 %598 = fmul float %116, %597 %599 = fmul float %117, %597 %600 = call float @llvm.floor.f32(float %598) %601 = fsub float %598, %600 %602 = call float @llvm.floor.f32(float %599) %603 = fsub float %599, %602 %604 = fmul float %51, 2.000000e+00 %605 = fmul float %604, %temp56.0 %606 = fsub float 1.000000e+00, %605 %607 = fmul float %temp56.0, %51 %608 = fmul float %601, %606 %609 = fadd float %608, %607 %610 = fmul float %603, %606 %611 = fadd float %610, %607 %612 = fmul float %609, %temp20.0 %613 = fadd float %612, %temp36.0 %614 = fmul float %611, %temp20.0 %615 = fadd float %614, %temp37.0 %616 = bitcast float %613 to i32 %617 = bitcast float %615 to i32 %618 = bitcast float %244 to i32 %619 = insertelement <4 x i32> undef, i32 %616, i32 0 %620 = insertelement <4 x i32> %619, i32 %617, i32 1 %621 = insertelement <4 x i32> %620, i32 %618, i32 2 %622 = bitcast <8 x i32> %97 to <32 x i8> %623 = bitcast <4 x i32> %99 to <16 x i8> %624 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %621, <32 x i8> %622, <16 x i8> %623, i32 2) %625 = extractelement <4 x float> %624, i32 0 %626 = extractelement <4 x float> %624, i32 1 %627 = extractelement <4 x float> %624, i32 2 %628 = fcmp oeq float %temp38.0, 4.000000e+00 %629 = select i1 %628, float 1.000000e+00, float 0.000000e+00 %630 = bitcast float %613 to i32 %631 = bitcast float %615 to i32 %632 = bitcast float %244 to i32 %633 = insertelement <4 x i32> undef, i32 %630, i32 0 %634 = insertelement <4 x i32> %633, i32 %631, i32 1 %635 = insertelement <4 x i32> %634, i32 %632, i32 2 %636 = bitcast <8 x i32> %89 to <32 x i8> %637 = bitcast <4 x i32> %91 to <16 x i8> %638 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %635, <32 x i8> %636, <16 x i8> %637, i32 2) %639 = extractelement <4 x float> %638, i32 0 %640 = extractelement <4 x float> %638, i32 1 %641 = extractelement <4 x float> %638, i32 2 %642 = fcmp oeq float %temp38.0, 3.000000e+00 %643 = select i1 %642, float 1.000000e+00, float 0.000000e+00 %644 = bitcast float %613 to i32 %645 = bitcast float %615 to i32 %646 = bitcast float %244 to i32 %647 = insertelement <4 x i32> undef, i32 %644, i32 0 %648 = insertelement <4 x i32> %647, i32 %645, i32 1 %649 = insertelement <4 x i32> %648, i32 %646, i32 2 %650 = bitcast <8 x i32> %81 to <32 x i8> %651 = bitcast <4 x i32> %83 to <16 x i8> %652 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %649, <32 x i8> %650, <16 x i8> %651, i32 2) %653 = extractelement <4 x float> %652, i32 0 %654 = extractelement <4 x float> %652, i32 1 %655 = extractelement <4 x float> %652, i32 2 %656 = fcmp oeq float %temp38.0, 2.000000e+00 %657 = select i1 %656, float 1.000000e+00, float 0.000000e+00 %658 = bitcast float %613 to i32 %659 = bitcast float %615 to i32 %660 = bitcast float %244 to i32 %661 = insertelement <4 x i32> undef, i32 %658, i32 0 %662 = insertelement <4 x i32> %661, i32 %659, i32 1 %663 = insertelement <4 x i32> %662, i32 %660, i32 2 %664 = bitcast <8 x i32> %73 to <32 x i8> %665 = bitcast <4 x i32> %75 to <16 x i8> %666 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %663, <32 x i8> %664, <16 x i8> %665, i32 2) %667 = extractelement <4 x float> %666, i32 0 %668 = extractelement <4 x float> %666, i32 1 %669 = extractelement <4 x float> %666, i32 2 %670 = fcmp oeq float %temp38.0, 1.000000e+00 %671 = select i1 %670, float 1.000000e+00, float 0.000000e+00 %672 = bitcast float %613 to i32 %673 = bitcast float %615 to i32 %674 = bitcast float %244 to i32 %675 = insertelement <4 x i32> undef, i32 %672, i32 0 %676 = insertelement <4 x i32> %675, i32 %673, i32 1 %677 = insertelement <4 x i32> %676, i32 %674, i32 2 %678 = bitcast <8 x i32> %65 to <32 x i8> %679 = bitcast <4 x i32> %67 to <16 x i8> %680 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %677, <32 x i8> %678, <16 x i8> %679, i32 2) %681 = extractelement <4 x float> %680, i32 0 %682 = extractelement <4 x float> %680, i32 1 %683 = extractelement <4 x float> %680, i32 2 %684 = fcmp oeq float %temp38.0, 0.000000e+00 %685 = select i1 %684, float 1.000000e+00, float 0.000000e+00 %686 = fmul float %681, %685 %687 = fmul float %682, %685 %688 = fmul float %683, %685 %689 = fmul float %667, %671 %690 = fadd float %689, %686 %691 = fmul float %668, %671 %692 = fadd float %691, %687 %693 = fmul float %669, %671 %694 = fadd float %693, %688 %695 = fmul float %653, %657 %696 = fadd float %695, %690 %697 = fmul float %654, %657 %698 = fadd float %697, %692 %699 = fmul float %655, %657 %700 = fadd float %699, %694 %701 = fmul float %639, %643 %702 = fadd float %701, %696 %703 = fmul float %640, %643 %704 = fadd float %703, %698 %705 = fmul float %641, %643 %706 = fadd float %705, %700 %707 = fmul float %625, %629 %708 = fadd float %707, %702 %709 = fmul float %626, %629 %710 = fadd float %709, %704 %711 = fmul float %627, %629 %712 = fadd float %711, %706 %713 = fcmp une float %47, %temp20.0 %.sink212 = select i1 %713, float %50, float %49 %temp60.0 = select i1 %713, float 1.953125e-03, float 3.906250e-03 %714 = fdiv float 1.000000e+00, %.sink212 %715 = fmul float %118, %714 %716 = fmul float %117, %714 %717 = call float @llvm.floor.f32(float %715) %718 = fsub float %715, %717 %719 = call float @llvm.floor.f32(float %716) %720 = fsub float %716, %719 %721 = fmul float %51, 2.000000e+00 %722 = fmul float %721, %temp60.0 %723 = fsub float 1.000000e+00, %722 %724 = fmul float %temp60.0, %51 %725 = fmul float %718, %723 %726 = fadd float %725, %724 %727 = fmul float %720, %723 %728 = fadd float %727, %724 %729 = fmul float %726, %temp20.0 %730 = fadd float %729, %temp36.0 %731 = fmul float %728, %temp20.0 %732 = fadd float %731, %temp37.0 %733 = bitcast float %730 to i32 %734 = bitcast float %732 to i32 %735 = bitcast float %244 to i32 %736 = insertelement <4 x i32> undef, i32 %733, i32 0 %737 = insertelement <4 x i32> %736, i32 %734, i32 1 %738 = insertelement <4 x i32> %737, i32 %735, i32 2 %739 = bitcast <8 x i32> %97 to <32 x i8> %740 = bitcast <4 x i32> %99 to <16 x i8> %741 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %738, <32 x i8> %739, <16 x i8> %740, i32 2) %742 = extractelement <4 x float> %741, i32 0 %743 = extractelement <4 x float> %741, i32 1 %744 = extractelement <4 x float> %741, i32 2 %745 = fcmp oeq float %temp38.0, 4.000000e+00 %746 = select i1 %745, float 1.000000e+00, float 0.000000e+00 %747 = bitcast float %730 to i32 %748 = bitcast float %732 to i32 %749 = bitcast float %244 to i32 %750 = insertelement <4 x i32> undef, i32 %747, i32 0 %751 = insertelement <4 x i32> %750, i32 %748, i32 1 %752 = insertelement <4 x i32> %751, i32 %749, i32 2 %753 = bitcast <8 x i32> %89 to <32 x i8> %754 = bitcast <4 x i32> %91 to <16 x i8> %755 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %752, <32 x i8> %753, <16 x i8> %754, i32 2) %756 = extractelement <4 x float> %755, i32 0 %757 = extractelement <4 x float> %755, i32 1 %758 = extractelement <4 x float> %755, i32 2 %759 = fcmp oeq float %temp38.0, 3.000000e+00 %760 = select i1 %759, float 1.000000e+00, float 0.000000e+00 %761 = bitcast float %730 to i32 %762 = bitcast float %732 to i32 %763 = bitcast float %244 to i32 %764 = insertelement <4 x i32> undef, i32 %761, i32 0 %765 = insertelement <4 x i32> %764, i32 %762, i32 1 %766 = insertelement <4 x i32> %765, i32 %763, i32 2 %767 = bitcast <8 x i32> %81 to <32 x i8> %768 = bitcast <4 x i32> %83 to <16 x i8> %769 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %766, <32 x i8> %767, <16 x i8> %768, i32 2) %770 = extractelement <4 x float> %769, i32 0 %771 = extractelement <4 x float> %769, i32 1 %772 = extractelement <4 x float> %769, i32 2 %773 = fcmp oeq float %temp38.0, 2.000000e+00 %774 = select i1 %773, float 1.000000e+00, float 0.000000e+00 %775 = bitcast float %730 to i32 %776 = bitcast float %732 to i32 %777 = bitcast float %244 to i32 %778 = insertelement <4 x i32> undef, i32 %775, i32 0 %779 = insertelement <4 x i32> %778, i32 %776, i32 1 %780 = insertelement <4 x i32> %779, i32 %777, i32 2 %781 = bitcast <8 x i32> %73 to <32 x i8> %782 = bitcast <4 x i32> %75 to <16 x i8> %783 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %780, <32 x i8> %781, <16 x i8> %782, i32 2) %784 = extractelement <4 x float> %783, i32 0 %785 = extractelement <4 x float> %783, i32 1 %786 = extractelement <4 x float> %783, i32 2 %787 = fcmp oeq float %temp38.0, 1.000000e+00 %788 = select i1 %787, float 1.000000e+00, float 0.000000e+00 %789 = bitcast float %730 to i32 %790 = bitcast float %732 to i32 %791 = bitcast float %244 to i32 %792 = insertelement <4 x i32> undef, i32 %789, i32 0 %793 = insertelement <4 x i32> %792, i32 %790, i32 1 %794 = insertelement <4 x i32> %793, i32 %791, i32 2 %795 = bitcast <8 x i32> %65 to <32 x i8> %796 = bitcast <4 x i32> %67 to <16 x i8> %797 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %794, <32 x i8> %795, <16 x i8> %796, i32 2) %798 = extractelement <4 x float> %797, i32 0 %799 = extractelement <4 x float> %797, i32 1 %800 = extractelement <4 x float> %797, i32 2 %801 = fcmp oeq float %temp38.0, 0.000000e+00 %802 = select i1 %801, float 1.000000e+00, float 0.000000e+00 %803 = fmul float %798, %802 %804 = fmul float %799, %802 %805 = fmul float %800, %802 %806 = fmul float %784, %788 %807 = fadd float %806, %803 %808 = fmul float %785, %788 %809 = fadd float %808, %804 %810 = fmul float %786, %788 %811 = fadd float %810, %805 %812 = fmul float %770, %774 %813 = fadd float %812, %807 %814 = fmul float %771, %774 %815 = fadd float %814, %809 %816 = fmul float %772, %774 %817 = fadd float %816, %811 %818 = fmul float %756, %760 %819 = fadd float %818, %813 %820 = fmul float %757, %760 %821 = fadd float %820, %815 %822 = fmul float %758, %760 %823 = fadd float %822, %817 %824 = fmul float %742, %746 %825 = fadd float %824, %819 %826 = fmul float %743, %746 %827 = fadd float %826, %821 %828 = fmul float %744, %746 %829 = fadd float %828, %823 %830 = fcmp une float %47, %temp20.0 %.sink213 = select i1 %830, float %50, float %49 %temp64.0 = select i1 %830, float 1.953125e-03, float 3.906250e-03 %831 = fdiv float 1.000000e+00, %.sink213 %832 = fmul float %118, %831 %833 = fmul float %116, %831 %834 = call float @llvm.floor.f32(float %832) %835 = fsub float %832, %834 %836 = call float @llvm.floor.f32(float %833) %837 = fsub float %833, %836 %838 = fmul float %51, 2.000000e+00 %839 = fmul float %838, %temp64.0 %840 = fsub float 1.000000e+00, %839 %841 = fmul float %temp64.0, %51 %842 = fmul float %835, %840 %843 = fadd float %842, %841 %844 = fmul float %837, %840 %845 = fadd float %844, %841 %846 = fmul float %843, %temp20.0 %847 = fadd float %846, %temp36.0 %848 = fmul float %845, %temp20.0 %849 = fadd float %848, %temp37.0 %850 = bitcast float %847 to i32 %851 = bitcast float %849 to i32 %852 = bitcast float %244 to i32 %853 = insertelement <4 x i32> undef, i32 %850, i32 0 %854 = insertelement <4 x i32> %853, i32 %851, i32 1 %855 = insertelement <4 x i32> %854, i32 %852, i32 2 %856 = bitcast <8 x i32> %97 to <32 x i8> %857 = bitcast <4 x i32> %99 to <16 x i8> %858 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %855, <32 x i8> %856, <16 x i8> %857, i32 2) %859 = extractelement <4 x float> %858, i32 0 %860 = extractelement <4 x float> %858, i32 1 %861 = extractelement <4 x float> %858, i32 2 %862 = fcmp oeq float %temp38.0, 4.000000e+00 %863 = select i1 %862, float 1.000000e+00, float 0.000000e+00 %864 = bitcast float %847 to i32 %865 = bitcast float %849 to i32 %866 = bitcast float %244 to i32 %867 = insertelement <4 x i32> undef, i32 %864, i32 0 %868 = insertelement <4 x i32> %867, i32 %865, i32 1 %869 = insertelement <4 x i32> %868, i32 %866, i32 2 %870 = bitcast <8 x i32> %89 to <32 x i8> %871 = bitcast <4 x i32> %91 to <16 x i8> %872 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %869, <32 x i8> %870, <16 x i8> %871, i32 2) %873 = extractelement <4 x float> %872, i32 0 %874 = extractelement <4 x float> %872, i32 1 %875 = extractelement <4 x float> %872, i32 2 %876 = fcmp oeq float %temp38.0, 3.000000e+00 %877 = select i1 %876, float 1.000000e+00, float 0.000000e+00 %878 = bitcast float %847 to i32 %879 = bitcast float %849 to i32 %880 = bitcast float %244 to i32 %881 = insertelement <4 x i32> undef, i32 %878, i32 0 %882 = insertelement <4 x i32> %881, i32 %879, i32 1 %883 = insertelement <4 x i32> %882, i32 %880, i32 2 %884 = bitcast <8 x i32> %81 to <32 x i8> %885 = bitcast <4 x i32> %83 to <16 x i8> %886 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %883, <32 x i8> %884, <16 x i8> %885, i32 2) %887 = extractelement <4 x float> %886, i32 0 %888 = extractelement <4 x float> %886, i32 1 %889 = extractelement <4 x float> %886, i32 2 %890 = fcmp oeq float %temp38.0, 2.000000e+00 %891 = select i1 %890, float 1.000000e+00, float 0.000000e+00 %892 = bitcast float %847 to i32 %893 = bitcast float %849 to i32 %894 = bitcast float %244 to i32 %895 = insertelement <4 x i32> undef, i32 %892, i32 0 %896 = insertelement <4 x i32> %895, i32 %893, i32 1 %897 = insertelement <4 x i32> %896, i32 %894, i32 2 %898 = bitcast <8 x i32> %73 to <32 x i8> %899 = bitcast <4 x i32> %75 to <16 x i8> %900 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %897, <32 x i8> %898, <16 x i8> %899, i32 2) %901 = extractelement <4 x float> %900, i32 0 %902 = extractelement <4 x float> %900, i32 1 %903 = extractelement <4 x float> %900, i32 2 %904 = fcmp oeq float %temp38.0, 1.000000e+00 %905 = select i1 %904, float 1.000000e+00, float 0.000000e+00 %906 = bitcast float %847 to i32 %907 = bitcast float %849 to i32 %908 = bitcast float %244 to i32 %909 = insertelement <4 x i32> undef, i32 %906, i32 0 %910 = insertelement <4 x i32> %909, i32 %907, i32 1 %911 = insertelement <4 x i32> %910, i32 %908, i32 2 %912 = bitcast <8 x i32> %65 to <32 x i8> %913 = bitcast <4 x i32> %67 to <16 x i8> %914 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %911, <32 x i8> %912, <16 x i8> %913, i32 2) %915 = extractelement <4 x float> %914, i32 0 %916 = extractelement <4 x float> %914, i32 1 %917 = extractelement <4 x float> %914, i32 2 %918 = fcmp oeq float %temp38.0, 0.000000e+00 %919 = select i1 %918, float 1.000000e+00, float 0.000000e+00 %920 = fmul float %915, %919 %921 = fmul float %916, %919 %922 = fmul float %917, %919 %923 = fmul float %901, %905 %924 = fadd float %923, %920 %925 = fmul float %902, %905 %926 = fadd float %925, %921 %927 = fmul float %903, %905 %928 = fadd float %927, %922 %929 = fmul float %887, %891 %930 = fadd float %929, %924 %931 = fmul float %888, %891 %932 = fadd float %931, %926 %933 = fmul float %889, %891 %934 = fadd float %933, %928 %935 = fmul float %873, %877 %936 = fadd float %935, %930 %937 = fmul float %874, %877 %938 = fadd float %937, %932 %939 = fmul float %875, %877 %940 = fadd float %939, %934 %941 = fmul float %859, %863 %942 = fadd float %941, %936 %943 = fmul float %860, %863 %944 = fadd float %943, %938 %945 = fmul float %861, %863 %946 = fadd float %945, %940 %947 = fcmp une float %47, %temp24.0 %.sink214 = select i1 %947, float %50, float %49 %temp68.0 = select i1 %947, float 1.953125e-03, float 3.906250e-03 %948 = fdiv float 1.000000e+00, %.sink214 %949 = fmul float %116, %948 %950 = fmul float %117, %948 %951 = call float @llvm.floor.f32(float %949) %952 = fsub float %949, %951 %953 = call float @llvm.floor.f32(float %950) %954 = fsub float %950, %953 %955 = fmul float %51, 2.000000e+00 %956 = fmul float %955, %temp68.0 %957 = fsub float 1.000000e+00, %956 %958 = fmul float %temp68.0, %51 %959 = fmul float %952, %957 %960 = fadd float %959, %958 %961 = fmul float %954, %957 %962 = fadd float %961, %958 %963 = fmul float %960, %temp24.0 %964 = fadd float %963, %temp12.0 %965 = fmul float %962, %temp24.0 %966 = fadd float %965, %temp13.0 %967 = bitcast float %964 to i32 %968 = bitcast float %966 to i32 %969 = bitcast float %244 to i32 %970 = insertelement <4 x i32> undef, i32 %967, i32 0 %971 = insertelement <4 x i32> %970, i32 %968, i32 1 %972 = insertelement <4 x i32> %971, i32 %969, i32 2 %973 = bitcast <8 x i32> %97 to <32 x i8> %974 = bitcast <4 x i32> %99 to <16 x i8> %975 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %972, <32 x i8> %973, <16 x i8> %974, i32 2) %976 = extractelement <4 x float> %975, i32 0 %977 = extractelement <4 x float> %975, i32 1 %978 = extractelement <4 x float> %975, i32 2 %979 = fcmp oeq float %temp14.0, 4.000000e+00 %980 = select i1 %979, float 1.000000e+00, float 0.000000e+00 %981 = bitcast float %964 to i32 %982 = bitcast float %966 to i32 %983 = bitcast float %244 to i32 %984 = insertelement <4 x i32> undef, i32 %981, i32 0 %985 = insertelement <4 x i32> %984, i32 %982, i32 1 %986 = insertelement <4 x i32> %985, i32 %983, i32 2 %987 = bitcast <8 x i32> %89 to <32 x i8> %988 = bitcast <4 x i32> %91 to <16 x i8> %989 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %986, <32 x i8> %987, <16 x i8> %988, i32 2) %990 = extractelement <4 x float> %989, i32 0 %991 = extractelement <4 x float> %989, i32 1 %992 = extractelement <4 x float> %989, i32 2 %993 = fcmp oeq float %temp14.0, 3.000000e+00 %994 = select i1 %993, float 1.000000e+00, float 0.000000e+00 %995 = bitcast float %964 to i32 %996 = bitcast float %966 to i32 %997 = bitcast float %244 to i32 %998 = insertelement <4 x i32> undef, i32 %995, i32 0 %999 = insertelement <4 x i32> %998, i32 %996, i32 1 %1000 = insertelement <4 x i32> %999, i32 %997, i32 2 %1001 = bitcast <8 x i32> %81 to <32 x i8> %1002 = bitcast <4 x i32> %83 to <16 x i8> %1003 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1000, <32 x i8> %1001, <16 x i8> %1002, i32 2) %1004 = extractelement <4 x float> %1003, i32 0 %1005 = extractelement <4 x float> %1003, i32 1 %1006 = extractelement <4 x float> %1003, i32 2 %1007 = fcmp oeq float %temp14.0, 2.000000e+00 %1008 = select i1 %1007, float 1.000000e+00, float 0.000000e+00 %1009 = bitcast float %964 to i32 %1010 = bitcast float %966 to i32 %1011 = bitcast float %244 to i32 %1012 = insertelement <4 x i32> undef, i32 %1009, i32 0 %1013 = insertelement <4 x i32> %1012, i32 %1010, i32 1 %1014 = insertelement <4 x i32> %1013, i32 %1011, i32 2 %1015 = bitcast <8 x i32> %73 to <32 x i8> %1016 = bitcast <4 x i32> %75 to <16 x i8> %1017 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1014, <32 x i8> %1015, <16 x i8> %1016, i32 2) %1018 = extractelement <4 x float> %1017, i32 0 %1019 = extractelement <4 x float> %1017, i32 1 %1020 = extractelement <4 x float> %1017, i32 2 %1021 = fcmp oeq float %temp14.0, 1.000000e+00 %1022 = select i1 %1021, float 1.000000e+00, float 0.000000e+00 %1023 = bitcast float %964 to i32 %1024 = bitcast float %966 to i32 %1025 = bitcast float %244 to i32 %1026 = insertelement <4 x i32> undef, i32 %1023, i32 0 %1027 = insertelement <4 x i32> %1026, i32 %1024, i32 1 %1028 = insertelement <4 x i32> %1027, i32 %1025, i32 2 %1029 = bitcast <8 x i32> %65 to <32 x i8> %1030 = bitcast <4 x i32> %67 to <16 x i8> %1031 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1028, <32 x i8> %1029, <16 x i8> %1030, i32 2) %1032 = extractelement <4 x float> %1031, i32 0 %1033 = extractelement <4 x float> %1031, i32 1 %1034 = extractelement <4 x float> %1031, i32 2 %1035 = fcmp oeq float %temp14.0, 0.000000e+00 %1036 = select i1 %1035, float 1.000000e+00, float 0.000000e+00 %1037 = fmul float %1032, %1036 %1038 = fmul float %1033, %1036 %1039 = fmul float %1034, %1036 %1040 = fmul float %1018, %1022 %1041 = fadd float %1040, %1037 %1042 = fmul float %1019, %1022 %1043 = fadd float %1042, %1038 %1044 = fmul float %1020, %1022 %1045 = fadd float %1044, %1039 %1046 = fmul float %1004, %1008 %1047 = fadd float %1046, %1041 %1048 = fmul float %1005, %1008 %1049 = fadd float %1048, %1043 %1050 = fmul float %1006, %1008 %1051 = fadd float %1050, %1045 %1052 = fmul float %990, %994 %1053 = fadd float %1052, %1047 %1054 = fmul float %991, %994 %1055 = fadd float %1054, %1049 %1056 = fmul float %992, %994 %1057 = fadd float %1056, %1051 %1058 = fmul float %976, %980 %1059 = fadd float %1058, %1053 %1060 = fmul float %977, %980 %1061 = fadd float %1060, %1055 %1062 = fmul float %978, %980 %1063 = fadd float %1062, %1057 %1064 = fcmp une float %47, %temp24.0 %.sink215 = select i1 %1064, float %50, float %49 %temp72.0 = select i1 %1064, float 1.953125e-03, float 3.906250e-03 %1065 = fdiv float 1.000000e+00, %.sink215 %1066 = fmul float %118, %1065 %1067 = fmul float %117, %1065 %1068 = call float @llvm.floor.f32(float %1066) %1069 = fsub float %1066, %1068 %1070 = call float @llvm.floor.f32(float %1067) %1071 = fsub float %1067, %1070 %1072 = fmul float %51, 2.000000e+00 %1073 = fmul float %1072, %temp72.0 %1074 = fsub float 1.000000e+00, %1073 %1075 = fmul float %temp72.0, %51 %1076 = fmul float %1069, %1074 %1077 = fadd float %1076, %1075 %1078 = fmul float %1071, %1074 %1079 = fadd float %1078, %1075 %1080 = fmul float %1077, %temp24.0 %1081 = fadd float %1080, %temp12.0 %1082 = fmul float %1079, %temp24.0 %1083 = fadd float %1082, %temp13.0 %1084 = bitcast float %1081 to i32 %1085 = bitcast float %1083 to i32 %1086 = bitcast float %244 to i32 %1087 = insertelement <4 x i32> undef, i32 %1084, i32 0 %1088 = insertelement <4 x i32> %1087, i32 %1085, i32 1 %1089 = insertelement <4 x i32> %1088, i32 %1086, i32 2 %1090 = bitcast <8 x i32> %97 to <32 x i8> %1091 = bitcast <4 x i32> %99 to <16 x i8> %1092 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1089, <32 x i8> %1090, <16 x i8> %1091, i32 2) %1093 = extractelement <4 x float> %1092, i32 0 %1094 = extractelement <4 x float> %1092, i32 1 %1095 = extractelement <4 x float> %1092, i32 2 %1096 = fcmp oeq float %temp14.0, 4.000000e+00 %1097 = select i1 %1096, float 1.000000e+00, float 0.000000e+00 %1098 = bitcast float %1081 to i32 %1099 = bitcast float %1083 to i32 %1100 = bitcast float %244 to i32 %1101 = insertelement <4 x i32> undef, i32 %1098, i32 0 %1102 = insertelement <4 x i32> %1101, i32 %1099, i32 1 %1103 = insertelement <4 x i32> %1102, i32 %1100, i32 2 %1104 = bitcast <8 x i32> %89 to <32 x i8> %1105 = bitcast <4 x i32> %91 to <16 x i8> %1106 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1103, <32 x i8> %1104, <16 x i8> %1105, i32 2) %1107 = extractelement <4 x float> %1106, i32 0 %1108 = extractelement <4 x float> %1106, i32 1 %1109 = extractelement <4 x float> %1106, i32 2 %1110 = fcmp oeq float %temp14.0, 3.000000e+00 %1111 = select i1 %1110, float 1.000000e+00, float 0.000000e+00 %1112 = bitcast float %1081 to i32 %1113 = bitcast float %1083 to i32 %1114 = bitcast float %244 to i32 %1115 = insertelement <4 x i32> undef, i32 %1112, i32 0 %1116 = insertelement <4 x i32> %1115, i32 %1113, i32 1 %1117 = insertelement <4 x i32> %1116, i32 %1114, i32 2 %1118 = bitcast <8 x i32> %81 to <32 x i8> %1119 = bitcast <4 x i32> %83 to <16 x i8> %1120 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1117, <32 x i8> %1118, <16 x i8> %1119, i32 2) %1121 = extractelement <4 x float> %1120, i32 0 %1122 = extractelement <4 x float> %1120, i32 1 %1123 = extractelement <4 x float> %1120, i32 2 %1124 = fcmp oeq float %temp14.0, 2.000000e+00 %1125 = select i1 %1124, float 1.000000e+00, float 0.000000e+00 %1126 = bitcast float %1081 to i32 %1127 = bitcast float %1083 to i32 %1128 = bitcast float %244 to i32 %1129 = insertelement <4 x i32> undef, i32 %1126, i32 0 %1130 = insertelement <4 x i32> %1129, i32 %1127, i32 1 %1131 = insertelement <4 x i32> %1130, i32 %1128, i32 2 %1132 = bitcast <8 x i32> %73 to <32 x i8> %1133 = bitcast <4 x i32> %75 to <16 x i8> %1134 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1131, <32 x i8> %1132, <16 x i8> %1133, i32 2) %1135 = extractelement <4 x float> %1134, i32 0 %1136 = extractelement <4 x float> %1134, i32 1 %1137 = extractelement <4 x float> %1134, i32 2 %1138 = fcmp oeq float %temp14.0, 1.000000e+00 %1139 = select i1 %1138, float 1.000000e+00, float 0.000000e+00 %1140 = bitcast float %1081 to i32 %1141 = bitcast float %1083 to i32 %1142 = bitcast float %244 to i32 %1143 = insertelement <4 x i32> undef, i32 %1140, i32 0 %1144 = insertelement <4 x i32> %1143, i32 %1141, i32 1 %1145 = insertelement <4 x i32> %1144, i32 %1142, i32 2 %1146 = bitcast <8 x i32> %65 to <32 x i8> %1147 = bitcast <4 x i32> %67 to <16 x i8> %1148 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1145, <32 x i8> %1146, <16 x i8> %1147, i32 2) %1149 = extractelement <4 x float> %1148, i32 0 %1150 = extractelement <4 x float> %1148, i32 1 %1151 = extractelement <4 x float> %1148, i32 2 %1152 = fcmp oeq float %temp14.0, 0.000000e+00 %1153 = select i1 %1152, float 1.000000e+00, float 0.000000e+00 %1154 = fmul float %1149, %1153 %1155 = fmul float %1150, %1153 %1156 = fmul float %1151, %1153 %1157 = fmul float %1135, %1139 %1158 = fadd float %1157, %1154 %1159 = fmul float %1136, %1139 %1160 = fadd float %1159, %1155 %1161 = fmul float %1137, %1139 %1162 = fadd float %1161, %1156 %1163 = fmul float %1121, %1125 %1164 = fadd float %1163, %1158 %1165 = fmul float %1122, %1125 %1166 = fadd float %1165, %1160 %1167 = fmul float %1123, %1125 %1168 = fadd float %1167, %1162 %1169 = fmul float %1107, %1111 %1170 = fadd float %1169, %1164 %1171 = fmul float %1108, %1111 %1172 = fadd float %1171, %1166 %1173 = fmul float %1109, %1111 %1174 = fadd float %1173, %1168 %1175 = fmul float %1093, %1097 %1176 = fadd float %1175, %1170 %1177 = fmul float %1094, %1097 %1178 = fadd float %1177, %1172 %1179 = fmul float %1095, %1097 %1180 = fadd float %1179, %1174 %1181 = fcmp une float %47, %temp24.0 %.sink216 = select i1 %1181, float %50, float %49 %temp76.0 = select i1 %1181, float 1.953125e-03, float 3.906250e-03 %1182 = fdiv float 1.000000e+00, %.sink216 %1183 = fmul float %118, %1182 %1184 = fmul float %116, %1182 %1185 = call float @llvm.floor.f32(float %1183) %1186 = fsub float %1183, %1185 %1187 = call float @llvm.floor.f32(float %1184) %1188 = fsub float %1184, %1187 %1189 = fmul float %51, 2.000000e+00 %1190 = fmul float %1189, %temp76.0 %1191 = fsub float 1.000000e+00, %1190 %1192 = fmul float %temp76.0, %51 %1193 = fmul float %1186, %1191 %1194 = fadd float %1193, %1192 %1195 = fmul float %1188, %1191 %1196 = fadd float %1195, %1192 %1197 = fmul float %1194, %temp24.0 %1198 = fadd float %1197, %temp12.0 %1199 = fmul float %1196, %temp24.0 %1200 = fadd float %1199, %temp13.0 %1201 = bitcast float %1198 to i32 %1202 = bitcast float %1200 to i32 %1203 = bitcast float %244 to i32 %1204 = insertelement <4 x i32> undef, i32 %1201, i32 0 %1205 = insertelement <4 x i32> %1204, i32 %1202, i32 1 %1206 = insertelement <4 x i32> %1205, i32 %1203, i32 2 %1207 = bitcast <8 x i32> %97 to <32 x i8> %1208 = bitcast <4 x i32> %99 to <16 x i8> %1209 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1206, <32 x i8> %1207, <16 x i8> %1208, i32 2) %1210 = extractelement <4 x float> %1209, i32 0 %1211 = extractelement <4 x float> %1209, i32 1 %1212 = extractelement <4 x float> %1209, i32 2 %1213 = fcmp oeq float %temp14.0, 4.000000e+00 %1214 = select i1 %1213, float 1.000000e+00, float 0.000000e+00 %1215 = bitcast float %1198 to i32 %1216 = bitcast float %1200 to i32 %1217 = bitcast float %244 to i32 %1218 = insertelement <4 x i32> undef, i32 %1215, i32 0 %1219 = insertelement <4 x i32> %1218, i32 %1216, i32 1 %1220 = insertelement <4 x i32> %1219, i32 %1217, i32 2 %1221 = bitcast <8 x i32> %89 to <32 x i8> %1222 = bitcast <4 x i32> %91 to <16 x i8> %1223 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1220, <32 x i8> %1221, <16 x i8> %1222, i32 2) %1224 = extractelement <4 x float> %1223, i32 0 %1225 = extractelement <4 x float> %1223, i32 1 %1226 = extractelement <4 x float> %1223, i32 2 %1227 = fcmp oeq float %temp14.0, 3.000000e+00 %1228 = select i1 %1227, float 1.000000e+00, float 0.000000e+00 %1229 = bitcast float %1198 to i32 %1230 = bitcast float %1200 to i32 %1231 = bitcast float %244 to i32 %1232 = insertelement <4 x i32> undef, i32 %1229, i32 0 %1233 = insertelement <4 x i32> %1232, i32 %1230, i32 1 %1234 = insertelement <4 x i32> %1233, i32 %1231, i32 2 %1235 = bitcast <8 x i32> %81 to <32 x i8> %1236 = bitcast <4 x i32> %83 to <16 x i8> %1237 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1234, <32 x i8> %1235, <16 x i8> %1236, i32 2) %1238 = extractelement <4 x float> %1237, i32 0 %1239 = extractelement <4 x float> %1237, i32 1 %1240 = extractelement <4 x float> %1237, i32 2 %1241 = fcmp oeq float %temp14.0, 2.000000e+00 %1242 = select i1 %1241, float 1.000000e+00, float 0.000000e+00 %1243 = bitcast float %1198 to i32 %1244 = bitcast float %1200 to i32 %1245 = bitcast float %244 to i32 %1246 = insertelement <4 x i32> undef, i32 %1243, i32 0 %1247 = insertelement <4 x i32> %1246, i32 %1244, i32 1 %1248 = insertelement <4 x i32> %1247, i32 %1245, i32 2 %1249 = bitcast <8 x i32> %73 to <32 x i8> %1250 = bitcast <4 x i32> %75 to <16 x i8> %1251 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1248, <32 x i8> %1249, <16 x i8> %1250, i32 2) %1252 = extractelement <4 x float> %1251, i32 0 %1253 = extractelement <4 x float> %1251, i32 1 %1254 = extractelement <4 x float> %1251, i32 2 %1255 = fcmp oeq float %temp14.0, 1.000000e+00 %1256 = select i1 %1255, float 1.000000e+00, float 0.000000e+00 %1257 = bitcast float %1198 to i32 %1258 = bitcast float %1200 to i32 %1259 = bitcast float %244 to i32 %1260 = insertelement <4 x i32> undef, i32 %1257, i32 0 %1261 = insertelement <4 x i32> %1260, i32 %1258, i32 1 %1262 = insertelement <4 x i32> %1261, i32 %1259, i32 2 %1263 = bitcast <8 x i32> %65 to <32 x i8> %1264 = bitcast <4 x i32> %67 to <16 x i8> %1265 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1262, <32 x i8> %1263, <16 x i8> %1264, i32 2) %1266 = extractelement <4 x float> %1265, i32 0 %1267 = extractelement <4 x float> %1265, i32 1 %1268 = extractelement <4 x float> %1265, i32 2 %1269 = fcmp oeq float %temp14.0, 0.000000e+00 %1270 = select i1 %1269, float 1.000000e+00, float 0.000000e+00 %1271 = fmul float %1266, %1270 %1272 = fmul float %1267, %1270 %1273 = fmul float %1268, %1270 %1274 = fmul float %1252, %1256 %1275 = fadd float %1274, %1271 %1276 = fmul float %1253, %1256 %1277 = fadd float %1276, %1272 %1278 = fmul float %1254, %1256 %1279 = fadd float %1278, %1273 %1280 = fmul float %1238, %1242 %1281 = fadd float %1280, %1275 %1282 = fmul float %1239, %1242 %1283 = fadd float %1282, %1277 %1284 = fmul float %1240, %1242 %1285 = fadd float %1284, %1279 %1286 = fmul float %1224, %1228 %1287 = fadd float %1286, %1281 %1288 = fmul float %1225, %1228 %1289 = fadd float %1288, %1283 %1290 = fmul float %1226, %1228 %1291 = fadd float %1290, %1285 %1292 = fmul float %1210, %1214 %1293 = fadd float %1292, %1287 %1294 = fmul float %1211, %1214 %1295 = fadd float %1294, %1289 %1296 = fmul float %1212, %1214 %1297 = fadd float %1296, %1291 %1298 = fmul float %1059, %169 %1299 = fmul float %1061, %169 %1300 = fmul float %1063, %169 %1301 = fmul float %1176, %167 %1302 = fadd float %1301, %1298 %1303 = fmul float %1178, %167 %1304 = fadd float %1303, %1299 %1305 = fmul float %1180, %167 %1306 = fadd float %1305, %1300 %1307 = fmul float %1293, %168 %1308 = fadd float %1307, %1302 %1309 = fmul float %1295, %168 %1310 = fadd float %1309, %1304 %1311 = fmul float %1297, %168 %1312 = fadd float %1311, %1306 %1313 = fmul float %708, %169 %1314 = fmul float %710, %169 %1315 = fmul float %712, %169 %1316 = fmul float %825, %167 %1317 = fadd float %1316, %1313 %1318 = fmul float %827, %167 %1319 = fadd float %1318, %1314 %1320 = fmul float %829, %167 %1321 = fadd float %1320, %1315 %1322 = fmul float %942, %168 %1323 = fadd float %1322, %1317 %1324 = fmul float %944, %168 %1325 = fadd float %1324, %1319 %1326 = fmul float %946, %168 %1327 = fadd float %1326, %1321 %1328 = fmul float %357, %169 %1329 = fmul float %359, %169 %1330 = fmul float %361, %169 %1331 = fmul float %474, %167 %1332 = fadd float %1331, %1328 %1333 = fmul float %476, %167 %1334 = fadd float %1333, %1329 %1335 = fmul float %478, %167 %1336 = fadd float %1335, %1330 %1337 = fmul float %591, %168 %1338 = fadd float %1337, %1332 %1339 = fmul float %593, %168 %1340 = fadd float %1339, %1334 %1341 = fmul float %595, %168 %1342 = fadd float %1341, %1336 %1343 = fmul float %108, %1338 %1344 = fmul float %108, %1340 %1345 = fmul float %108, %1342 %1346 = fmul float %109, %1323 %1347 = fadd float %1346, %1343 %1348 = fmul float %109, %1325 %1349 = fadd float %1348, %1344 %1350 = fmul float %109, %1327 %1351 = fadd float %1350, %1345 %1352 = fmul float %110, %1308 %1353 = fadd float %1352, %1347 %1354 = fmul float %110, %1310 %1355 = fadd float %1354, %1349 %1356 = fmul float %110, %1312 %1357 = fadd float %1356, %1351 %1358 = fcmp une float %47, %temp16.0 %.sink217 = select i1 %1358, float %50, float %49 %temp48.2 = select i1 %1358, float 1.953125e-03, float 3.906250e-03 %1359 = fdiv float 1.000000e+00, %.sink217 %1360 = fmul float %118, %1359 %1361 = fmul float %117, %1359 %1362 = call float @llvm.floor.f32(float %1360) %1363 = fsub float %1360, %1362 %1364 = call float @llvm.floor.f32(float %1361) %1365 = fsub float %1361, %1364 %1366 = fmul float %51, 2.000000e+00 %1367 = fmul float %1366, %temp48.2 %1368 = fsub float 1.000000e+00, %1367 %1369 = fmul float %temp48.2, %51 %1370 = fmul float %1363, %1368 %1371 = fadd float %1370, %1369 %1372 = fmul float %1365, %1368 %1373 = fadd float %1372, %1369 %1374 = fmul float %1371, %temp16.0 %1375 = fadd float %1374, %temp28.0 %1376 = fmul float %1373, %temp16.0 %1377 = fadd float %1376, %temp29.0 %1378 = bitcast float %1375 to i32 %1379 = bitcast float %1377 to i32 %1380 = bitcast float %244 to i32 %1381 = insertelement <4 x i32> undef, i32 %1378, i32 0 %1382 = insertelement <4 x i32> %1381, i32 %1379, i32 1 %1383 = insertelement <4 x i32> %1382, i32 %1380, i32 2 %1384 = bitcast <8 x i32> %101 to <32 x i8> %1385 = bitcast <4 x i32> %103 to <16 x i8> %1386 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1383, <32 x i8> %1384, <16 x i8> %1385, i32 2) %1387 = extractelement <4 x float> %1386, i32 1 %1388 = extractelement <4 x float> %1386, i32 3 %1389 = fcmp oeq float %temp30.0, 4.000000e+00 %1390 = select i1 %1389, float 1.000000e+00, float 0.000000e+00 %1391 = bitcast float %1375 to i32 %1392 = bitcast float %1377 to i32 %1393 = bitcast float %244 to i32 %1394 = insertelement <4 x i32> undef, i32 %1391, i32 0 %1395 = insertelement <4 x i32> %1394, i32 %1392, i32 1 %1396 = insertelement <4 x i32> %1395, i32 %1393, i32 2 %1397 = bitcast <8 x i32> %93 to <32 x i8> %1398 = bitcast <4 x i32> %95 to <16 x i8> %1399 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1396, <32 x i8> %1397, <16 x i8> %1398, i32 2) %1400 = extractelement <4 x float> %1399, i32 1 %1401 = extractelement <4 x float> %1399, i32 3 %1402 = fcmp oeq float %temp30.0, 3.000000e+00 %1403 = select i1 %1402, float 1.000000e+00, float 0.000000e+00 %1404 = bitcast float %1375 to i32 %1405 = bitcast float %1377 to i32 %1406 = bitcast float %244 to i32 %1407 = insertelement <4 x i32> undef, i32 %1404, i32 0 %1408 = insertelement <4 x i32> %1407, i32 %1405, i32 1 %1409 = insertelement <4 x i32> %1408, i32 %1406, i32 2 %1410 = bitcast <8 x i32> %85 to <32 x i8> %1411 = bitcast <4 x i32> %87 to <16 x i8> %1412 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1409, <32 x i8> %1410, <16 x i8> %1411, i32 2) %1413 = extractelement <4 x float> %1412, i32 1 %1414 = extractelement <4 x float> %1412, i32 3 %1415 = fcmp oeq float %temp30.0, 2.000000e+00 %1416 = select i1 %1415, float 1.000000e+00, float 0.000000e+00 %1417 = bitcast float %1375 to i32 %1418 = bitcast float %1377 to i32 %1419 = bitcast float %244 to i32 %1420 = insertelement <4 x i32> undef, i32 %1417, i32 0 %1421 = insertelement <4 x i32> %1420, i32 %1418, i32 1 %1422 = insertelement <4 x i32> %1421, i32 %1419, i32 2 %1423 = bitcast <8 x i32> %77 to <32 x i8> %1424 = bitcast <4 x i32> %79 to <16 x i8> %1425 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1422, <32 x i8> %1423, <16 x i8> %1424, i32 2) %1426 = extractelement <4 x float> %1425, i32 1 %1427 = extractelement <4 x float> %1425, i32 3 %1428 = fcmp oeq float %temp30.0, 1.000000e+00 %1429 = select i1 %1428, float 1.000000e+00, float 0.000000e+00 %1430 = bitcast float %1375 to i32 %1431 = bitcast float %1377 to i32 %1432 = bitcast float %244 to i32 %1433 = insertelement <4 x i32> undef, i32 %1430, i32 0 %1434 = insertelement <4 x i32> %1433, i32 %1431, i32 1 %1435 = insertelement <4 x i32> %1434, i32 %1432, i32 2 %1436 = bitcast <8 x i32> %69 to <32 x i8> %1437 = bitcast <4 x i32> %71 to <16 x i8> %1438 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1435, <32 x i8> %1436, <16 x i8> %1437, i32 2) %1439 = extractelement <4 x float> %1438, i32 1 %1440 = extractelement <4 x float> %1438, i32 3 %1441 = fcmp oeq float %temp30.0, 0.000000e+00 %1442 = select i1 %1441, float 1.000000e+00, float 0.000000e+00 %1443 = fmul float %1439, %1442 %1444 = fmul float %1440, %1442 %1445 = fmul float %1426, %1429 %1446 = fadd float %1445, %1443 %1447 = fmul float %1427, %1429 %1448 = fadd float %1447, %1444 %1449 = fmul float %1413, %1416 %1450 = fadd float %1449, %1446 %1451 = fmul float %1414, %1416 %1452 = fadd float %1451, %1448 %1453 = fmul float %1400, %1403 %1454 = fadd float %1453, %1450 %1455 = fmul float %1401, %1403 %1456 = fadd float %1455, %1452 %1457 = fmul float %1387, %1390 %1458 = fadd float %1457, %1454 %1459 = fmul float %1388, %1390 %1460 = fadd float %1459, %1456 %1461 = fmul float %1460, 2.000000e+00 %1462 = fadd float %1461, -1.000000e+00 %1463 = fmul float %1458, 2.000000e+00 %1464 = fadd float %1463, -1.000000e+00 %1465 = fmul float %1462, %1462 %1466 = fmul float %1464, %1464 %1467 = fadd float %1465, %1466 %1468 = call float @llvm.AMDIL.clamp.(float %1467, float 0.000000e+00, float 1.000000e+00) %1469 = fcmp une float %47, %temp16.0 %.sink218 = select i1 %1469, float %50, float %49 %temp52.2 = select i1 %1469, float 1.953125e-03, float 3.906250e-03 %1470 = fdiv float 1.000000e+00, %.sink218 %1471 = fmul float %118, %1470 %1472 = fmul float %116, %1470 %1473 = call float @llvm.floor.f32(float %1471) %1474 = fsub float %1471, %1473 %1475 = call float @llvm.floor.f32(float %1472) %1476 = fsub float %1472, %1475 %1477 = fmul float %51, 2.000000e+00 %1478 = fmul float %1477, %temp52.2 %1479 = fsub float 1.000000e+00, %1478 %1480 = fmul float %temp52.2, %51 %1481 = fmul float %1474, %1479 %1482 = fadd float %1481, %1480 %1483 = fmul float %1476, %1479 %1484 = fadd float %1483, %1480 %1485 = fmul float %1482, %temp16.0 %1486 = fadd float %1485, %temp28.0 %1487 = fmul float %1484, %temp16.0 %1488 = fadd float %1487, %temp29.0 %1489 = bitcast float %1486 to i32 %1490 = bitcast float %1488 to i32 %1491 = bitcast float %244 to i32 %1492 = insertelement <4 x i32> undef, i32 %1489, i32 0 %1493 = insertelement <4 x i32> %1492, i32 %1490, i32 1 %1494 = insertelement <4 x i32> %1493, i32 %1491, i32 2 %1495 = bitcast <8 x i32> %101 to <32 x i8> %1496 = bitcast <4 x i32> %103 to <16 x i8> %1497 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1494, <32 x i8> %1495, <16 x i8> %1496, i32 2) %1498 = extractelement <4 x float> %1497, i32 1 %1499 = extractelement <4 x float> %1497, i32 3 %1500 = fcmp oeq float %temp30.0, 4.000000e+00 %1501 = select i1 %1500, float 1.000000e+00, float 0.000000e+00 %1502 = bitcast float %1486 to i32 %1503 = bitcast float %1488 to i32 %1504 = bitcast float %244 to i32 %1505 = insertelement <4 x i32> undef, i32 %1502, i32 0 %1506 = insertelement <4 x i32> %1505, i32 %1503, i32 1 %1507 = insertelement <4 x i32> %1506, i32 %1504, i32 2 %1508 = bitcast <8 x i32> %93 to <32 x i8> %1509 = bitcast <4 x i32> %95 to <16 x i8> %1510 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1507, <32 x i8> %1508, <16 x i8> %1509, i32 2) %1511 = extractelement <4 x float> %1510, i32 1 %1512 = extractelement <4 x float> %1510, i32 3 %1513 = fcmp oeq float %temp30.0, 3.000000e+00 %1514 = select i1 %1513, float 1.000000e+00, float 0.000000e+00 %1515 = bitcast float %1486 to i32 %1516 = bitcast float %1488 to i32 %1517 = bitcast float %244 to i32 %1518 = insertelement <4 x i32> undef, i32 %1515, i32 0 %1519 = insertelement <4 x i32> %1518, i32 %1516, i32 1 %1520 = insertelement <4 x i32> %1519, i32 %1517, i32 2 %1521 = bitcast <8 x i32> %85 to <32 x i8> %1522 = bitcast <4 x i32> %87 to <16 x i8> %1523 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1520, <32 x i8> %1521, <16 x i8> %1522, i32 2) %1524 = extractelement <4 x float> %1523, i32 1 %1525 = extractelement <4 x float> %1523, i32 3 %1526 = fcmp oeq float %temp30.0, 2.000000e+00 %1527 = select i1 %1526, float 1.000000e+00, float 0.000000e+00 %1528 = bitcast float %1486 to i32 %1529 = bitcast float %1488 to i32 %1530 = bitcast float %244 to i32 %1531 = insertelement <4 x i32> undef, i32 %1528, i32 0 %1532 = insertelement <4 x i32> %1531, i32 %1529, i32 1 %1533 = insertelement <4 x i32> %1532, i32 %1530, i32 2 %1534 = bitcast <8 x i32> %77 to <32 x i8> %1535 = bitcast <4 x i32> %79 to <16 x i8> %1536 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1533, <32 x i8> %1534, <16 x i8> %1535, i32 2) %1537 = extractelement <4 x float> %1536, i32 1 %1538 = extractelement <4 x float> %1536, i32 3 %1539 = fcmp oeq float %temp30.0, 1.000000e+00 %1540 = select i1 %1539, float 1.000000e+00, float 0.000000e+00 %1541 = bitcast float %1486 to i32 %1542 = bitcast float %1488 to i32 %1543 = bitcast float %244 to i32 %1544 = insertelement <4 x i32> undef, i32 %1541, i32 0 %1545 = insertelement <4 x i32> %1544, i32 %1542, i32 1 %1546 = insertelement <4 x i32> %1545, i32 %1543, i32 2 %1547 = bitcast <8 x i32> %69 to <32 x i8> %1548 = bitcast <4 x i32> %71 to <16 x i8> %1549 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1546, <32 x i8> %1547, <16 x i8> %1548, i32 2) %1550 = extractelement <4 x float> %1549, i32 1 %1551 = extractelement <4 x float> %1549, i32 3 %1552 = fcmp oeq float %temp30.0, 0.000000e+00 %1553 = select i1 %1552, float 1.000000e+00, float 0.000000e+00 %1554 = fmul float %1550, %1553 %1555 = fmul float %1551, %1553 %1556 = fmul float %1537, %1540 %1557 = fadd float %1556, %1554 %1558 = fmul float %1538, %1540 %1559 = fadd float %1558, %1555 %1560 = fmul float %1524, %1527 %1561 = fadd float %1560, %1557 %1562 = fmul float %1525, %1527 %1563 = fadd float %1562, %1559 %1564 = fmul float %1511, %1514 %1565 = fadd float %1564, %1561 %1566 = fmul float %1512, %1514 %1567 = fadd float %1566, %1563 %1568 = fmul float %1498, %1501 %1569 = fadd float %1568, %1565 %1570 = fmul float %1499, %1501 %1571 = fadd float %1570, %1567 %1572 = fmul float %1571, 2.000000e+00 %1573 = fadd float %1572, -1.000000e+00 %1574 = fmul float %1569, 2.000000e+00 %1575 = fadd float %1574, -1.000000e+00 %1576 = fmul float %1573, %1573 %1577 = fmul float %1575, %1575 %1578 = fadd float %1576, %1577 %1579 = call float @llvm.AMDIL.clamp.(float %1578, float 0.000000e+00, float 1.000000e+00) %1580 = fcmp une float %47, %temp16.0 %.sink219 = select i1 %1580, float %50, float %49 %temp56.2 = select i1 %1580, float 1.953125e-03, float 3.906250e-03 %1581 = fdiv float 1.000000e+00, %.sink219 %1582 = fmul float %116, %1581 %1583 = fmul float %117, %1581 %1584 = call float @llvm.floor.f32(float %1582) %1585 = fsub float %1582, %1584 %1586 = call float @llvm.floor.f32(float %1583) %1587 = fsub float %1583, %1586 %1588 = fmul float %51, 2.000000e+00 %1589 = fmul float %1588, %temp56.2 %1590 = fsub float 1.000000e+00, %1589 %1591 = fmul float %temp56.2, %51 %1592 = fmul float %1585, %1590 %1593 = fadd float %1592, %1591 %1594 = fmul float %1587, %1590 %1595 = fadd float %1594, %1591 %1596 = fmul float %1593, %temp16.0 %1597 = fadd float %1596, %temp28.0 %1598 = fmul float %1595, %temp16.0 %1599 = fadd float %1598, %temp29.0 %1600 = bitcast float %1597 to i32 %1601 = bitcast float %1599 to i32 %1602 = bitcast float %244 to i32 %1603 = insertelement <4 x i32> undef, i32 %1600, i32 0 %1604 = insertelement <4 x i32> %1603, i32 %1601, i32 1 %1605 = insertelement <4 x i32> %1604, i32 %1602, i32 2 %1606 = bitcast <8 x i32> %101 to <32 x i8> %1607 = bitcast <4 x i32> %103 to <16 x i8> %1608 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1605, <32 x i8> %1606, <16 x i8> %1607, i32 2) %1609 = extractelement <4 x float> %1608, i32 1 %1610 = extractelement <4 x float> %1608, i32 3 %1611 = fcmp oeq float %temp30.0, 4.000000e+00 %1612 = select i1 %1611, float 1.000000e+00, float 0.000000e+00 %1613 = bitcast float %1597 to i32 %1614 = bitcast float %1599 to i32 %1615 = bitcast float %244 to i32 %1616 = insertelement <4 x i32> undef, i32 %1613, i32 0 %1617 = insertelement <4 x i32> %1616, i32 %1614, i32 1 %1618 = insertelement <4 x i32> %1617, i32 %1615, i32 2 %1619 = bitcast <8 x i32> %93 to <32 x i8> %1620 = bitcast <4 x i32> %95 to <16 x i8> %1621 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1618, <32 x i8> %1619, <16 x i8> %1620, i32 2) %1622 = extractelement <4 x float> %1621, i32 1 %1623 = extractelement <4 x float> %1621, i32 3 %1624 = fcmp oeq float %temp30.0, 3.000000e+00 %1625 = select i1 %1624, float 1.000000e+00, float 0.000000e+00 %1626 = bitcast float %1597 to i32 %1627 = bitcast float %1599 to i32 %1628 = bitcast float %244 to i32 %1629 = insertelement <4 x i32> undef, i32 %1626, i32 0 %1630 = insertelement <4 x i32> %1629, i32 %1627, i32 1 %1631 = insertelement <4 x i32> %1630, i32 %1628, i32 2 %1632 = bitcast <8 x i32> %85 to <32 x i8> %1633 = bitcast <4 x i32> %87 to <16 x i8> %1634 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1631, <32 x i8> %1632, <16 x i8> %1633, i32 2) %1635 = extractelement <4 x float> %1634, i32 1 %1636 = extractelement <4 x float> %1634, i32 3 %1637 = fcmp oeq float %temp30.0, 2.000000e+00 %1638 = select i1 %1637, float 1.000000e+00, float 0.000000e+00 %1639 = bitcast float %1597 to i32 %1640 = bitcast float %1599 to i32 %1641 = bitcast float %244 to i32 %1642 = insertelement <4 x i32> undef, i32 %1639, i32 0 %1643 = insertelement <4 x i32> %1642, i32 %1640, i32 1 %1644 = insertelement <4 x i32> %1643, i32 %1641, i32 2 %1645 = bitcast <8 x i32> %77 to <32 x i8> %1646 = bitcast <4 x i32> %79 to <16 x i8> %1647 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1644, <32 x i8> %1645, <16 x i8> %1646, i32 2) %1648 = extractelement <4 x float> %1647, i32 1 %1649 = extractelement <4 x float> %1647, i32 3 %1650 = fcmp oeq float %temp30.0, 1.000000e+00 %1651 = select i1 %1650, float 1.000000e+00, float 0.000000e+00 %1652 = bitcast float %1597 to i32 %1653 = bitcast float %1599 to i32 %1654 = bitcast float %244 to i32 %1655 = insertelement <4 x i32> undef, i32 %1652, i32 0 %1656 = insertelement <4 x i32> %1655, i32 %1653, i32 1 %1657 = insertelement <4 x i32> %1656, i32 %1654, i32 2 %1658 = bitcast <8 x i32> %69 to <32 x i8> %1659 = bitcast <4 x i32> %71 to <16 x i8> %1660 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1657, <32 x i8> %1658, <16 x i8> %1659, i32 2) %1661 = extractelement <4 x float> %1660, i32 1 %1662 = extractelement <4 x float> %1660, i32 3 %1663 = fcmp oeq float %temp30.0, 0.000000e+00 %1664 = select i1 %1663, float 1.000000e+00, float 0.000000e+00 %1665 = fmul float %1661, %1664 %1666 = fmul float %1662, %1664 %1667 = fmul float %1648, %1651 %1668 = fadd float %1667, %1665 %1669 = fmul float %1649, %1651 %1670 = fadd float %1669, %1666 %1671 = fmul float %1635, %1638 %1672 = fadd float %1671, %1668 %1673 = fmul float %1636, %1638 %1674 = fadd float %1673, %1670 %1675 = fmul float %1622, %1625 %1676 = fadd float %1675, %1672 %1677 = fmul float %1623, %1625 %1678 = fadd float %1677, %1674 %1679 = fmul float %1609, %1612 %1680 = fadd float %1679, %1676 %1681 = fmul float %1610, %1612 %1682 = fadd float %1681, %1678 %1683 = fmul float %1682, 2.000000e+00 %1684 = fadd float %1683, -1.000000e+00 %1685 = fmul float %1680, 2.000000e+00 %1686 = fadd float %1685, -1.000000e+00 %1687 = fmul float %1684, %1684 %1688 = fmul float %1686, %1686 %1689 = fadd float %1687, %1688 %1690 = call float @llvm.AMDIL.clamp.(float %1689, float 0.000000e+00, float 1.000000e+00) %1691 = fmul float %167, 0.000000e+00 %1692 = fmul float %1462, %167 %1693 = fmul float %1464, %167 %1694 = fmul float %1575, %168 %1695 = fadd float %1694, %1691 %1696 = fmul float %168, 0.000000e+00 %1697 = fadd float %1696, %1692 %1698 = fmul float %1573, %168 %1699 = fadd float %1698, %1693 %1700 = fmul float %1684, %169 %1701 = fadd float %1700, %1695 %1702 = fmul float %1686, %169 %1703 = fadd float %1702, %1697 %1704 = fmul float %169, 0.000000e+00 %1705 = fadd float %1704, %1699 %1706 = fcmp une float %47, %temp20.0 %.sink220 = select i1 %1706, float %50, float %49 %temp44.3 = select i1 %1706, float 1.953125e-03, float 3.906250e-03 %1707 = fdiv float 1.000000e+00, %.sink220 %1708 = fmul float %118, %1707 %1709 = fmul float %117, %1707 %1710 = call float @llvm.floor.f32(float %1708) %1711 = fsub float %1708, %1710 %1712 = call float @llvm.floor.f32(float %1709) %1713 = fsub float %1709, %1712 %1714 = fmul float %51, 2.000000e+00 %1715 = fmul float %1714, %temp44.3 %1716 = fsub float 1.000000e+00, %1715 %1717 = fmul float %temp44.3, %51 %1718 = fmul float %1711, %1716 %1719 = fadd float %1718, %1717 %1720 = fmul float %1713, %1716 %1721 = fadd float %1720, %1717 %1722 = fmul float %1719, %temp20.0 %1723 = fadd float %1722, %temp36.0 %1724 = fmul float %1721, %temp20.0 %1725 = fadd float %1724, %temp37.0 %1726 = bitcast float %1723 to i32 %1727 = bitcast float %1725 to i32 %1728 = bitcast float %244 to i32 %1729 = insertelement <4 x i32> undef, i32 %1726, i32 0 %1730 = insertelement <4 x i32> %1729, i32 %1727, i32 1 %1731 = insertelement <4 x i32> %1730, i32 %1728, i32 2 %1732 = bitcast <8 x i32> %101 to <32 x i8> %1733 = bitcast <4 x i32> %103 to <16 x i8> %1734 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1731, <32 x i8> %1732, <16 x i8> %1733, i32 2) %1735 = extractelement <4 x float> %1734, i32 1 %1736 = extractelement <4 x float> %1734, i32 3 %1737 = fcmp oeq float %temp38.0, 4.000000e+00 %1738 = select i1 %1737, float 1.000000e+00, float 0.000000e+00 %1739 = bitcast float %1723 to i32 %1740 = bitcast float %1725 to i32 %1741 = bitcast float %244 to i32 %1742 = insertelement <4 x i32> undef, i32 %1739, i32 0 %1743 = insertelement <4 x i32> %1742, i32 %1740, i32 1 %1744 = insertelement <4 x i32> %1743, i32 %1741, i32 2 %1745 = bitcast <8 x i32> %93 to <32 x i8> %1746 = bitcast <4 x i32> %95 to <16 x i8> %1747 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1744, <32 x i8> %1745, <16 x i8> %1746, i32 2) %1748 = extractelement <4 x float> %1747, i32 1 %1749 = extractelement <4 x float> %1747, i32 3 %1750 = fcmp oeq float %temp38.0, 3.000000e+00 %1751 = select i1 %1750, float 1.000000e+00, float 0.000000e+00 %1752 = bitcast float %1723 to i32 %1753 = bitcast float %1725 to i32 %1754 = bitcast float %244 to i32 %1755 = insertelement <4 x i32> undef, i32 %1752, i32 0 %1756 = insertelement <4 x i32> %1755, i32 %1753, i32 1 %1757 = insertelement <4 x i32> %1756, i32 %1754, i32 2 %1758 = bitcast <8 x i32> %85 to <32 x i8> %1759 = bitcast <4 x i32> %87 to <16 x i8> %1760 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1757, <32 x i8> %1758, <16 x i8> %1759, i32 2) %1761 = extractelement <4 x float> %1760, i32 1 %1762 = extractelement <4 x float> %1760, i32 3 %1763 = fcmp oeq float %temp38.0, 2.000000e+00 %1764 = select i1 %1763, float 1.000000e+00, float 0.000000e+00 %1765 = bitcast float %1723 to i32 %1766 = bitcast float %1725 to i32 %1767 = bitcast float %244 to i32 %1768 = insertelement <4 x i32> undef, i32 %1765, i32 0 %1769 = insertelement <4 x i32> %1768, i32 %1766, i32 1 %1770 = insertelement <4 x i32> %1769, i32 %1767, i32 2 %1771 = bitcast <8 x i32> %77 to <32 x i8> %1772 = bitcast <4 x i32> %79 to <16 x i8> %1773 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1770, <32 x i8> %1771, <16 x i8> %1772, i32 2) %1774 = extractelement <4 x float> %1773, i32 1 %1775 = extractelement <4 x float> %1773, i32 3 %1776 = fcmp oeq float %temp38.0, 1.000000e+00 %1777 = select i1 %1776, float 1.000000e+00, float 0.000000e+00 %1778 = bitcast float %1723 to i32 %1779 = bitcast float %1725 to i32 %1780 = bitcast float %244 to i32 %1781 = insertelement <4 x i32> undef, i32 %1778, i32 0 %1782 = insertelement <4 x i32> %1781, i32 %1779, i32 1 %1783 = insertelement <4 x i32> %1782, i32 %1780, i32 2 %1784 = bitcast <8 x i32> %69 to <32 x i8> %1785 = bitcast <4 x i32> %71 to <16 x i8> %1786 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1783, <32 x i8> %1784, <16 x i8> %1785, i32 2) %1787 = extractelement <4 x float> %1786, i32 1 %1788 = extractelement <4 x float> %1786, i32 3 %1789 = fcmp oeq float %temp38.0, 0.000000e+00 %1790 = select i1 %1789, float 1.000000e+00, float 0.000000e+00 %1791 = fmul float %1787, %1790 %1792 = fmul float %1788, %1790 %1793 = fmul float %1774, %1777 %1794 = fadd float %1793, %1791 %1795 = fmul float %1775, %1777 %1796 = fadd float %1795, %1792 %1797 = fmul float %1761, %1764 %1798 = fadd float %1797, %1794 %1799 = fmul float %1762, %1764 %1800 = fadd float %1799, %1796 %1801 = fmul float %1748, %1751 %1802 = fadd float %1801, %1798 %1803 = fmul float %1749, %1751 %1804 = fadd float %1803, %1800 %1805 = fmul float %1735, %1738 %1806 = fadd float %1805, %1802 %1807 = fmul float %1736, %1738 %1808 = fadd float %1807, %1804 %1809 = fmul float %1808, 2.000000e+00 %1810 = fadd float %1809, -1.000000e+00 %1811 = fmul float %1806, 2.000000e+00 %1812 = fadd float %1811, -1.000000e+00 %1813 = fmul float %1810, %1810 %1814 = fmul float %1812, %1812 %1815 = fadd float %1813, %1814 %1816 = call float @llvm.AMDIL.clamp.(float %1815, float 0.000000e+00, float 1.000000e+00) %1817 = fcmp une float %47, %temp20.0 %.sink221 = select i1 %1817, float %50, float %49 %temp48.4 = select i1 %1817, float 1.953125e-03, float 3.906250e-03 %1818 = fdiv float 1.000000e+00, %.sink221 %1819 = fmul float %118, %1818 %1820 = fmul float %116, %1818 %1821 = call float @llvm.floor.f32(float %1819) %1822 = fsub float %1819, %1821 %1823 = call float @llvm.floor.f32(float %1820) %1824 = fsub float %1820, %1823 %1825 = fmul float %51, 2.000000e+00 %1826 = fmul float %1825, %temp48.4 %1827 = fsub float 1.000000e+00, %1826 %1828 = fmul float %temp48.4, %51 %1829 = fmul float %1822, %1827 %1830 = fadd float %1829, %1828 %1831 = fmul float %1824, %1827 %1832 = fadd float %1831, %1828 %1833 = fmul float %1830, %temp20.0 %1834 = fadd float %1833, %temp36.0 %1835 = fmul float %1832, %temp20.0 %1836 = fadd float %1835, %temp37.0 %1837 = bitcast float %1834 to i32 %1838 = bitcast float %1836 to i32 %1839 = bitcast float %244 to i32 %1840 = insertelement <4 x i32> undef, i32 %1837, i32 0 %1841 = insertelement <4 x i32> %1840, i32 %1838, i32 1 %1842 = insertelement <4 x i32> %1841, i32 %1839, i32 2 %1843 = bitcast <8 x i32> %101 to <32 x i8> %1844 = bitcast <4 x i32> %103 to <16 x i8> %1845 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1842, <32 x i8> %1843, <16 x i8> %1844, i32 2) %1846 = extractelement <4 x float> %1845, i32 1 %1847 = extractelement <4 x float> %1845, i32 3 %1848 = fcmp oeq float %temp38.0, 4.000000e+00 %1849 = select i1 %1848, float 1.000000e+00, float 0.000000e+00 %1850 = bitcast float %1834 to i32 %1851 = bitcast float %1836 to i32 %1852 = bitcast float %244 to i32 %1853 = insertelement <4 x i32> undef, i32 %1850, i32 0 %1854 = insertelement <4 x i32> %1853, i32 %1851, i32 1 %1855 = insertelement <4 x i32> %1854, i32 %1852, i32 2 %1856 = bitcast <8 x i32> %93 to <32 x i8> %1857 = bitcast <4 x i32> %95 to <16 x i8> %1858 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1855, <32 x i8> %1856, <16 x i8> %1857, i32 2) %1859 = extractelement <4 x float> %1858, i32 1 %1860 = extractelement <4 x float> %1858, i32 3 %1861 = fcmp oeq float %temp38.0, 3.000000e+00 %1862 = select i1 %1861, float 1.000000e+00, float 0.000000e+00 %1863 = bitcast float %1834 to i32 %1864 = bitcast float %1836 to i32 %1865 = bitcast float %244 to i32 %1866 = insertelement <4 x i32> undef, i32 %1863, i32 0 %1867 = insertelement <4 x i32> %1866, i32 %1864, i32 1 %1868 = insertelement <4 x i32> %1867, i32 %1865, i32 2 %1869 = bitcast <8 x i32> %85 to <32 x i8> %1870 = bitcast <4 x i32> %87 to <16 x i8> %1871 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1868, <32 x i8> %1869, <16 x i8> %1870, i32 2) %1872 = extractelement <4 x float> %1871, i32 1 %1873 = extractelement <4 x float> %1871, i32 3 %1874 = fcmp oeq float %temp38.0, 2.000000e+00 %1875 = select i1 %1874, float 1.000000e+00, float 0.000000e+00 %1876 = bitcast float %1834 to i32 %1877 = bitcast float %1836 to i32 %1878 = bitcast float %244 to i32 %1879 = insertelement <4 x i32> undef, i32 %1876, i32 0 %1880 = insertelement <4 x i32> %1879, i32 %1877, i32 1 %1881 = insertelement <4 x i32> %1880, i32 %1878, i32 2 %1882 = bitcast <8 x i32> %77 to <32 x i8> %1883 = bitcast <4 x i32> %79 to <16 x i8> %1884 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1881, <32 x i8> %1882, <16 x i8> %1883, i32 2) %1885 = extractelement <4 x float> %1884, i32 1 %1886 = extractelement <4 x float> %1884, i32 3 %1887 = fcmp oeq float %temp38.0, 1.000000e+00 %1888 = select i1 %1887, float 1.000000e+00, float 0.000000e+00 %1889 = bitcast float %1834 to i32 %1890 = bitcast float %1836 to i32 %1891 = bitcast float %244 to i32 %1892 = insertelement <4 x i32> undef, i32 %1889, i32 0 %1893 = insertelement <4 x i32> %1892, i32 %1890, i32 1 %1894 = insertelement <4 x i32> %1893, i32 %1891, i32 2 %1895 = bitcast <8 x i32> %69 to <32 x i8> %1896 = bitcast <4 x i32> %71 to <16 x i8> %1897 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1894, <32 x i8> %1895, <16 x i8> %1896, i32 2) %1898 = extractelement <4 x float> %1897, i32 1 %1899 = extractelement <4 x float> %1897, i32 3 %1900 = fcmp oeq float %temp38.0, 0.000000e+00 %1901 = select i1 %1900, float 1.000000e+00, float 0.000000e+00 %1902 = fmul float %1898, %1901 %1903 = fmul float %1899, %1901 %1904 = fmul float %1885, %1888 %1905 = fadd float %1904, %1902 %1906 = fmul float %1886, %1888 %1907 = fadd float %1906, %1903 %1908 = fmul float %1872, %1875 %1909 = fadd float %1908, %1905 %1910 = fmul float %1873, %1875 %1911 = fadd float %1910, %1907 %1912 = fmul float %1859, %1862 %1913 = fadd float %1912, %1909 %1914 = fmul float %1860, %1862 %1915 = fadd float %1914, %1911 %1916 = fmul float %1846, %1849 %1917 = fadd float %1916, %1913 %1918 = fmul float %1847, %1849 %1919 = fadd float %1918, %1915 %1920 = fmul float %1919, 2.000000e+00 %1921 = fadd float %1920, -1.000000e+00 %1922 = fmul float %1917, 2.000000e+00 %1923 = fadd float %1922, -1.000000e+00 %1924 = fmul float %1921, %1921 %1925 = fmul float %1923, %1923 %1926 = fadd float %1924, %1925 %1927 = call float @llvm.AMDIL.clamp.(float %1926, float 0.000000e+00, float 1.000000e+00) %1928 = fcmp une float %47, %temp20.0 %.sink222 = select i1 %1928, float %50, float %49 %temp52.4 = select i1 %1928, float 1.953125e-03, float 3.906250e-03 %1929 = fdiv float 1.000000e+00, %.sink222 %1930 = fmul float %116, %1929 %1931 = fmul float %117, %1929 %1932 = call float @llvm.floor.f32(float %1930) %1933 = fsub float %1930, %1932 %1934 = call float @llvm.floor.f32(float %1931) %1935 = fsub float %1931, %1934 %1936 = fmul float %51, 2.000000e+00 %1937 = fmul float %1936, %temp52.4 %1938 = fsub float 1.000000e+00, %1937 %1939 = fmul float %temp52.4, %51 %1940 = fmul float %1933, %1938 %1941 = fadd float %1940, %1939 %1942 = fmul float %1935, %1938 %1943 = fadd float %1942, %1939 %1944 = fmul float %1941, %temp20.0 %1945 = fadd float %1944, %temp36.0 %1946 = fmul float %1943, %temp20.0 %1947 = fadd float %1946, %temp37.0 %1948 = bitcast float %1945 to i32 %1949 = bitcast float %1947 to i32 %1950 = bitcast float %244 to i32 %1951 = insertelement <4 x i32> undef, i32 %1948, i32 0 %1952 = insertelement <4 x i32> %1951, i32 %1949, i32 1 %1953 = insertelement <4 x i32> %1952, i32 %1950, i32 2 %1954 = bitcast <8 x i32> %101 to <32 x i8> %1955 = bitcast <4 x i32> %103 to <16 x i8> %1956 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1953, <32 x i8> %1954, <16 x i8> %1955, i32 2) %1957 = extractelement <4 x float> %1956, i32 1 %1958 = extractelement <4 x float> %1956, i32 3 %1959 = fcmp oeq float %temp38.0, 4.000000e+00 %1960 = select i1 %1959, float 1.000000e+00, float 0.000000e+00 %1961 = bitcast float %1945 to i32 %1962 = bitcast float %1947 to i32 %1963 = bitcast float %244 to i32 %1964 = insertelement <4 x i32> undef, i32 %1961, i32 0 %1965 = insertelement <4 x i32> %1964, i32 %1962, i32 1 %1966 = insertelement <4 x i32> %1965, i32 %1963, i32 2 %1967 = bitcast <8 x i32> %93 to <32 x i8> %1968 = bitcast <4 x i32> %95 to <16 x i8> %1969 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1966, <32 x i8> %1967, <16 x i8> %1968, i32 2) %1970 = extractelement <4 x float> %1969, i32 1 %1971 = extractelement <4 x float> %1969, i32 3 %1972 = fcmp oeq float %temp38.0, 3.000000e+00 %1973 = select i1 %1972, float 1.000000e+00, float 0.000000e+00 %1974 = bitcast float %1945 to i32 %1975 = bitcast float %1947 to i32 %1976 = bitcast float %244 to i32 %1977 = insertelement <4 x i32> undef, i32 %1974, i32 0 %1978 = insertelement <4 x i32> %1977, i32 %1975, i32 1 %1979 = insertelement <4 x i32> %1978, i32 %1976, i32 2 %1980 = bitcast <8 x i32> %85 to <32 x i8> %1981 = bitcast <4 x i32> %87 to <16 x i8> %1982 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1979, <32 x i8> %1980, <16 x i8> %1981, i32 2) %1983 = extractelement <4 x float> %1982, i32 1 %1984 = extractelement <4 x float> %1982, i32 3 %1985 = fcmp oeq float %temp38.0, 2.000000e+00 %1986 = select i1 %1985, float 1.000000e+00, float 0.000000e+00 %1987 = bitcast float %1945 to i32 %1988 = bitcast float %1947 to i32 %1989 = bitcast float %244 to i32 %1990 = insertelement <4 x i32> undef, i32 %1987, i32 0 %1991 = insertelement <4 x i32> %1990, i32 %1988, i32 1 %1992 = insertelement <4 x i32> %1991, i32 %1989, i32 2 %1993 = bitcast <8 x i32> %77 to <32 x i8> %1994 = bitcast <4 x i32> %79 to <16 x i8> %1995 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1992, <32 x i8> %1993, <16 x i8> %1994, i32 2) %1996 = extractelement <4 x float> %1995, i32 1 %1997 = extractelement <4 x float> %1995, i32 3 %1998 = fcmp oeq float %temp38.0, 1.000000e+00 %1999 = select i1 %1998, float 1.000000e+00, float 0.000000e+00 %2000 = bitcast float %1945 to i32 %2001 = bitcast float %1947 to i32 %2002 = bitcast float %244 to i32 %2003 = insertelement <4 x i32> undef, i32 %2000, i32 0 %2004 = insertelement <4 x i32> %2003, i32 %2001, i32 1 %2005 = insertelement <4 x i32> %2004, i32 %2002, i32 2 %2006 = bitcast <8 x i32> %69 to <32 x i8> %2007 = bitcast <4 x i32> %71 to <16 x i8> %2008 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2005, <32 x i8> %2006, <16 x i8> %2007, i32 2) %2009 = extractelement <4 x float> %2008, i32 1 %2010 = extractelement <4 x float> %2008, i32 3 %2011 = fcmp oeq float %temp38.0, 0.000000e+00 %2012 = select i1 %2011, float 1.000000e+00, float 0.000000e+00 %2013 = fmul float %2009, %2012 %2014 = fmul float %2010, %2012 %2015 = fmul float %1996, %1999 %2016 = fadd float %2015, %2013 %2017 = fmul float %1997, %1999 %2018 = fadd float %2017, %2014 %2019 = fmul float %1983, %1986 %2020 = fadd float %2019, %2016 %2021 = fmul float %1984, %1986 %2022 = fadd float %2021, %2018 %2023 = fmul float %1970, %1973 %2024 = fadd float %2023, %2020 %2025 = fmul float %1971, %1973 %2026 = fadd float %2025, %2022 %2027 = fmul float %1957, %1960 %2028 = fadd float %2027, %2024 %2029 = fmul float %1958, %1960 %2030 = fadd float %2029, %2026 %2031 = fmul float %2030, 2.000000e+00 %2032 = fadd float %2031, -1.000000e+00 %2033 = fmul float %2028, 2.000000e+00 %2034 = fadd float %2033, -1.000000e+00 %2035 = fmul float %2032, %2032 %2036 = fmul float %2034, %2034 %2037 = fadd float %2035, %2036 %2038 = call float @llvm.AMDIL.clamp.(float %2037, float 0.000000e+00, float 1.000000e+00) %2039 = fmul float %167, 0.000000e+00 %2040 = fmul float %1810, %167 %2041 = fmul float %1812, %167 %2042 = fmul float %1923, %168 %2043 = fadd float %2042, %2039 %2044 = fmul float %168, 0.000000e+00 %2045 = fadd float %2044, %2040 %2046 = fmul float %1921, %168 %2047 = fadd float %2046, %2041 %2048 = fmul float %2032, %169 %2049 = fadd float %2048, %2043 %2050 = fmul float %2034, %169 %2051 = fadd float %2050, %2045 %2052 = fmul float %169, 0.000000e+00 %2053 = fadd float %2052, %2047 %2054 = fcmp une float %47, %temp24.0 %.sink223 = select i1 %2054, float %50, float %49 %temp36.1 = select i1 %2054, float 1.953125e-03, float 3.906250e-03 %2055 = fdiv float 1.000000e+00, %.sink223 %2056 = fmul float %118, %2055 %2057 = fmul float %117, %2055 %2058 = call float @llvm.floor.f32(float %2056) %2059 = fsub float %2056, %2058 %2060 = call float @llvm.floor.f32(float %2057) %2061 = fsub float %2057, %2060 %2062 = fmul float %51, 2.000000e+00 %2063 = fmul float %2062, %temp36.1 %2064 = fsub float 1.000000e+00, %2063 %2065 = fmul float %temp36.1, %51 %2066 = fmul float %2059, %2064 %2067 = fadd float %2066, %2065 %2068 = fmul float %2061, %2064 %2069 = fadd float %2068, %2065 %2070 = fmul float %2067, %temp24.0 %2071 = fadd float %2070, %temp12.0 %2072 = fmul float %2069, %temp24.0 %2073 = fadd float %2072, %temp13.0 %2074 = bitcast float %2071 to i32 %2075 = bitcast float %2073 to i32 %2076 = bitcast float %244 to i32 %2077 = insertelement <4 x i32> undef, i32 %2074, i32 0 %2078 = insertelement <4 x i32> %2077, i32 %2075, i32 1 %2079 = insertelement <4 x i32> %2078, i32 %2076, i32 2 %2080 = bitcast <8 x i32> %101 to <32 x i8> %2081 = bitcast <4 x i32> %103 to <16 x i8> %2082 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2079, <32 x i8> %2080, <16 x i8> %2081, i32 2) %2083 = extractelement <4 x float> %2082, i32 1 %2084 = extractelement <4 x float> %2082, i32 3 %2085 = fcmp oeq float %temp14.0, 4.000000e+00 %2086 = select i1 %2085, float 1.000000e+00, float 0.000000e+00 %2087 = bitcast float %2071 to i32 %2088 = bitcast float %2073 to i32 %2089 = bitcast float %244 to i32 %2090 = insertelement <4 x i32> undef, i32 %2087, i32 0 %2091 = insertelement <4 x i32> %2090, i32 %2088, i32 1 %2092 = insertelement <4 x i32> %2091, i32 %2089, i32 2 %2093 = bitcast <8 x i32> %93 to <32 x i8> %2094 = bitcast <4 x i32> %95 to <16 x i8> %2095 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2092, <32 x i8> %2093, <16 x i8> %2094, i32 2) %2096 = extractelement <4 x float> %2095, i32 1 %2097 = extractelement <4 x float> %2095, i32 3 %2098 = fcmp oeq float %temp14.0, 3.000000e+00 %2099 = select i1 %2098, float 1.000000e+00, float 0.000000e+00 %2100 = bitcast float %2071 to i32 %2101 = bitcast float %2073 to i32 %2102 = bitcast float %244 to i32 %2103 = insertelement <4 x i32> undef, i32 %2100, i32 0 %2104 = insertelement <4 x i32> %2103, i32 %2101, i32 1 %2105 = insertelement <4 x i32> %2104, i32 %2102, i32 2 %2106 = bitcast <8 x i32> %85 to <32 x i8> %2107 = bitcast <4 x i32> %87 to <16 x i8> %2108 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2105, <32 x i8> %2106, <16 x i8> %2107, i32 2) %2109 = extractelement <4 x float> %2108, i32 1 %2110 = extractelement <4 x float> %2108, i32 3 %2111 = fcmp oeq float %temp14.0, 2.000000e+00 %2112 = select i1 %2111, float 1.000000e+00, float 0.000000e+00 %2113 = bitcast float %2071 to i32 %2114 = bitcast float %2073 to i32 %2115 = bitcast float %244 to i32 %2116 = insertelement <4 x i32> undef, i32 %2113, i32 0 %2117 = insertelement <4 x i32> %2116, i32 %2114, i32 1 %2118 = insertelement <4 x i32> %2117, i32 %2115, i32 2 %2119 = bitcast <8 x i32> %77 to <32 x i8> %2120 = bitcast <4 x i32> %79 to <16 x i8> %2121 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2118, <32 x i8> %2119, <16 x i8> %2120, i32 2) %2122 = extractelement <4 x float> %2121, i32 1 %2123 = extractelement <4 x float> %2121, i32 3 %2124 = fcmp oeq float %temp14.0, 1.000000e+00 %2125 = select i1 %2124, float 1.000000e+00, float 0.000000e+00 %2126 = bitcast float %2071 to i32 %2127 = bitcast float %2073 to i32 %2128 = bitcast float %244 to i32 %2129 = insertelement <4 x i32> undef, i32 %2126, i32 0 %2130 = insertelement <4 x i32> %2129, i32 %2127, i32 1 %2131 = insertelement <4 x i32> %2130, i32 %2128, i32 2 %2132 = bitcast <8 x i32> %69 to <32 x i8> %2133 = bitcast <4 x i32> %71 to <16 x i8> %2134 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2131, <32 x i8> %2132, <16 x i8> %2133, i32 2) %2135 = extractelement <4 x float> %2134, i32 1 %2136 = extractelement <4 x float> %2134, i32 3 %2137 = fcmp oeq float %temp14.0, 0.000000e+00 %2138 = select i1 %2137, float 1.000000e+00, float 0.000000e+00 %2139 = fmul float %2135, %2138 %2140 = fmul float %2136, %2138 %2141 = fmul float %2122, %2125 %2142 = fadd float %2141, %2139 %2143 = fmul float %2123, %2125 %2144 = fadd float %2143, %2140 %2145 = fmul float %2109, %2112 %2146 = fadd float %2145, %2142 %2147 = fmul float %2110, %2112 %2148 = fadd float %2147, %2144 %2149 = fmul float %2096, %2099 %2150 = fadd float %2149, %2146 %2151 = fmul float %2097, %2099 %2152 = fadd float %2151, %2148 %2153 = fmul float %2083, %2086 %2154 = fadd float %2153, %2150 %2155 = fmul float %2084, %2086 %2156 = fadd float %2155, %2152 %2157 = fmul float %2156, 2.000000e+00 %2158 = fadd float %2157, -1.000000e+00 %2159 = fmul float %2154, 2.000000e+00 %2160 = fadd float %2159, -1.000000e+00 %2161 = fmul float %2158, %2158 %2162 = fmul float %2160, %2160 %2163 = fadd float %2161, %2162 %2164 = call float @llvm.AMDIL.clamp.(float %2163, float 0.000000e+00, float 1.000000e+00) %2165 = fcmp une float %47, %temp24.0 %.sink224 = select i1 %2165, float %50, float %49 %temp44.5 = select i1 %2165, float 1.953125e-03, float 3.906250e-03 %2166 = fdiv float 1.000000e+00, %.sink224 %2167 = fmul float %118, %2166 %2168 = fmul float %116, %2166 %2169 = call float @llvm.floor.f32(float %2167) %2170 = fsub float %2167, %2169 %2171 = call float @llvm.floor.f32(float %2168) %2172 = fsub float %2168, %2171 %2173 = fmul float %51, 2.000000e+00 %2174 = fmul float %2173, %temp44.5 %2175 = fsub float 1.000000e+00, %2174 %2176 = fmul float %temp44.5, %51 %2177 = fmul float %2170, %2175 %2178 = fadd float %2177, %2176 %2179 = fmul float %2172, %2175 %2180 = fadd float %2179, %2176 %2181 = fmul float %2178, %temp24.0 %2182 = fadd float %2181, %temp12.0 %2183 = fmul float %2180, %temp24.0 %2184 = fadd float %2183, %temp13.0 %2185 = bitcast float %2182 to i32 %2186 = bitcast float %2184 to i32 %2187 = bitcast float %244 to i32 %2188 = insertelement <4 x i32> undef, i32 %2185, i32 0 %2189 = insertelement <4 x i32> %2188, i32 %2186, i32 1 %2190 = insertelement <4 x i32> %2189, i32 %2187, i32 2 %2191 = bitcast <8 x i32> %101 to <32 x i8> %2192 = bitcast <4 x i32> %103 to <16 x i8> %2193 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2190, <32 x i8> %2191, <16 x i8> %2192, i32 2) %2194 = extractelement <4 x float> %2193, i32 1 %2195 = extractelement <4 x float> %2193, i32 3 %2196 = fcmp oeq float %temp14.0, 4.000000e+00 %2197 = select i1 %2196, float 1.000000e+00, float 0.000000e+00 %2198 = bitcast float %2182 to i32 %2199 = bitcast float %2184 to i32 %2200 = bitcast float %244 to i32 %2201 = insertelement <4 x i32> undef, i32 %2198, i32 0 %2202 = insertelement <4 x i32> %2201, i32 %2199, i32 1 %2203 = insertelement <4 x i32> %2202, i32 %2200, i32 2 %2204 = bitcast <8 x i32> %93 to <32 x i8> %2205 = bitcast <4 x i32> %95 to <16 x i8> %2206 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2203, <32 x i8> %2204, <16 x i8> %2205, i32 2) %2207 = extractelement <4 x float> %2206, i32 1 %2208 = extractelement <4 x float> %2206, i32 3 %2209 = fcmp oeq float %temp14.0, 3.000000e+00 %2210 = select i1 %2209, float 1.000000e+00, float 0.000000e+00 %2211 = bitcast float %2182 to i32 %2212 = bitcast float %2184 to i32 %2213 = bitcast float %244 to i32 %2214 = insertelement <4 x i32> undef, i32 %2211, i32 0 %2215 = insertelement <4 x i32> %2214, i32 %2212, i32 1 %2216 = insertelement <4 x i32> %2215, i32 %2213, i32 2 %2217 = bitcast <8 x i32> %85 to <32 x i8> %2218 = bitcast <4 x i32> %87 to <16 x i8> %2219 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2216, <32 x i8> %2217, <16 x i8> %2218, i32 2) %2220 = extractelement <4 x float> %2219, i32 1 %2221 = extractelement <4 x float> %2219, i32 3 %2222 = fcmp oeq float %temp14.0, 2.000000e+00 %2223 = select i1 %2222, float 1.000000e+00, float 0.000000e+00 %2224 = bitcast float %2182 to i32 %2225 = bitcast float %2184 to i32 %2226 = bitcast float %244 to i32 %2227 = insertelement <4 x i32> undef, i32 %2224, i32 0 %2228 = insertelement <4 x i32> %2227, i32 %2225, i32 1 %2229 = insertelement <4 x i32> %2228, i32 %2226, i32 2 %2230 = bitcast <8 x i32> %77 to <32 x i8> %2231 = bitcast <4 x i32> %79 to <16 x i8> %2232 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2229, <32 x i8> %2230, <16 x i8> %2231, i32 2) %2233 = extractelement <4 x float> %2232, i32 1 %2234 = extractelement <4 x float> %2232, i32 3 %2235 = fcmp oeq float %temp14.0, 1.000000e+00 %2236 = select i1 %2235, float 1.000000e+00, float 0.000000e+00 %2237 = bitcast float %2182 to i32 %2238 = bitcast float %2184 to i32 %2239 = bitcast float %244 to i32 %2240 = insertelement <4 x i32> undef, i32 %2237, i32 0 %2241 = insertelement <4 x i32> %2240, i32 %2238, i32 1 %2242 = insertelement <4 x i32> %2241, i32 %2239, i32 2 %2243 = bitcast <8 x i32> %69 to <32 x i8> %2244 = bitcast <4 x i32> %71 to <16 x i8> %2245 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2242, <32 x i8> %2243, <16 x i8> %2244, i32 2) %2246 = extractelement <4 x float> %2245, i32 1 %2247 = extractelement <4 x float> %2245, i32 3 %2248 = fcmp oeq float %temp14.0, 0.000000e+00 %2249 = select i1 %2248, float 1.000000e+00, float 0.000000e+00 %2250 = fmul float %2246, %2249 %2251 = fmul float %2247, %2249 %2252 = fmul float %2233, %2236 %2253 = fadd float %2252, %2250 %2254 = fmul float %2234, %2236 %2255 = fadd float %2254, %2251 %2256 = fmul float %2220, %2223 %2257 = fadd float %2256, %2253 %2258 = fmul float %2221, %2223 %2259 = fadd float %2258, %2255 %2260 = fmul float %2207, %2210 %2261 = fadd float %2260, %2257 %2262 = fmul float %2208, %2210 %2263 = fadd float %2262, %2259 %2264 = fmul float %2194, %2197 %2265 = fadd float %2264, %2261 %2266 = fmul float %2195, %2197 %2267 = fadd float %2266, %2263 %2268 = fmul float %2267, 2.000000e+00 %2269 = fadd float %2268, -1.000000e+00 %2270 = fmul float %2265, 2.000000e+00 %2271 = fadd float %2270, -1.000000e+00 %2272 = fmul float %2269, %2269 %2273 = fmul float %2271, %2271 %2274 = fadd float %2272, %2273 %2275 = call float @llvm.AMDIL.clamp.(float %2274, float 0.000000e+00, float 1.000000e+00) %2276 = fcmp une float %47, %temp24.0 %.sink225 = select i1 %2276, float %50, float %49 %temp48.6 = select i1 %2276, float 1.953125e-03, float 3.906250e-03 %2277 = fdiv float 1.000000e+00, %.sink225 %2278 = fmul float %116, %2277 %2279 = fmul float %117, %2277 %2280 = call float @llvm.floor.f32(float %2278) %2281 = fsub float %2278, %2280 %2282 = call float @llvm.floor.f32(float %2279) %2283 = fsub float %2279, %2282 %2284 = fmul float %51, 2.000000e+00 %2285 = fmul float %2284, %temp48.6 %2286 = fsub float 1.000000e+00, %2285 %2287 = fmul float %temp48.6, %51 %2288 = fmul float %2281, %2286 %2289 = fadd float %2288, %2287 %2290 = fmul float %2283, %2286 %2291 = fadd float %2290, %2287 %2292 = fmul float %2289, %temp24.0 %2293 = fadd float %2292, %temp12.0 %2294 = fmul float %2291, %temp24.0 %2295 = fadd float %2294, %temp13.0 %2296 = bitcast float %2293 to i32 %2297 = bitcast float %2295 to i32 %2298 = bitcast float %244 to i32 %2299 = insertelement <4 x i32> undef, i32 %2296, i32 0 %2300 = insertelement <4 x i32> %2299, i32 %2297, i32 1 %2301 = insertelement <4 x i32> %2300, i32 %2298, i32 2 %2302 = bitcast <8 x i32> %101 to <32 x i8> %2303 = bitcast <4 x i32> %103 to <16 x i8> %2304 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2301, <32 x i8> %2302, <16 x i8> %2303, i32 2) %2305 = extractelement <4 x float> %2304, i32 1 %2306 = extractelement <4 x float> %2304, i32 3 %2307 = fcmp oeq float %temp14.0, 4.000000e+00 %2308 = select i1 %2307, float 1.000000e+00, float 0.000000e+00 %2309 = bitcast float %2293 to i32 %2310 = bitcast float %2295 to i32 %2311 = bitcast float %244 to i32 %2312 = insertelement <4 x i32> undef, i32 %2309, i32 0 %2313 = insertelement <4 x i32> %2312, i32 %2310, i32 1 %2314 = insertelement <4 x i32> %2313, i32 %2311, i32 2 %2315 = bitcast <8 x i32> %93 to <32 x i8> %2316 = bitcast <4 x i32> %95 to <16 x i8> %2317 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2314, <32 x i8> %2315, <16 x i8> %2316, i32 2) %2318 = extractelement <4 x float> %2317, i32 1 %2319 = extractelement <4 x float> %2317, i32 3 %2320 = fcmp oeq float %temp14.0, 3.000000e+00 %2321 = select i1 %2320, float 1.000000e+00, float 0.000000e+00 %2322 = bitcast float %2293 to i32 %2323 = bitcast float %2295 to i32 %2324 = bitcast float %244 to i32 %2325 = insertelement <4 x i32> undef, i32 %2322, i32 0 %2326 = insertelement <4 x i32> %2325, i32 %2323, i32 1 %2327 = insertelement <4 x i32> %2326, i32 %2324, i32 2 %2328 = bitcast <8 x i32> %85 to <32 x i8> %2329 = bitcast <4 x i32> %87 to <16 x i8> %2330 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2327, <32 x i8> %2328, <16 x i8> %2329, i32 2) %2331 = extractelement <4 x float> %2330, i32 1 %2332 = extractelement <4 x float> %2330, i32 3 %2333 = fcmp oeq float %temp14.0, 2.000000e+00 %2334 = select i1 %2333, float 1.000000e+00, float 0.000000e+00 %2335 = bitcast float %2293 to i32 %2336 = bitcast float %2295 to i32 %2337 = bitcast float %244 to i32 %2338 = insertelement <4 x i32> undef, i32 %2335, i32 0 %2339 = insertelement <4 x i32> %2338, i32 %2336, i32 1 %2340 = insertelement <4 x i32> %2339, i32 %2337, i32 2 %2341 = bitcast <8 x i32> %77 to <32 x i8> %2342 = bitcast <4 x i32> %79 to <16 x i8> %2343 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2340, <32 x i8> %2341, <16 x i8> %2342, i32 2) %2344 = extractelement <4 x float> %2343, i32 1 %2345 = extractelement <4 x float> %2343, i32 3 %2346 = fcmp oeq float %temp14.0, 1.000000e+00 %2347 = select i1 %2346, float 1.000000e+00, float 0.000000e+00 %2348 = bitcast float %2293 to i32 %2349 = bitcast float %2295 to i32 %2350 = bitcast float %244 to i32 %2351 = insertelement <4 x i32> undef, i32 %2348, i32 0 %2352 = insertelement <4 x i32> %2351, i32 %2349, i32 1 %2353 = insertelement <4 x i32> %2352, i32 %2350, i32 2 %2354 = bitcast <8 x i32> %69 to <32 x i8> %2355 = bitcast <4 x i32> %71 to <16 x i8> %2356 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2353, <32 x i8> %2354, <16 x i8> %2355, i32 2) %2357 = extractelement <4 x float> %2356, i32 1 %2358 = extractelement <4 x float> %2356, i32 3 %2359 = fcmp oeq float %temp14.0, 0.000000e+00 %2360 = select i1 %2359, float 1.000000e+00, float 0.000000e+00 %2361 = fmul float %2357, %2360 %2362 = fmul float %2358, %2360 %2363 = fmul float %2344, %2347 %2364 = fadd float %2363, %2361 %2365 = fmul float %2345, %2347 %2366 = fadd float %2365, %2362 %2367 = fmul float %2331, %2334 %2368 = fadd float %2367, %2364 %2369 = fmul float %2332, %2334 %2370 = fadd float %2369, %2366 %2371 = fmul float %2318, %2321 %2372 = fadd float %2371, %2368 %2373 = fmul float %2319, %2321 %2374 = fadd float %2373, %2370 %2375 = fmul float %2305, %2308 %2376 = fadd float %2375, %2372 %2377 = fmul float %2306, %2308 %2378 = fadd float %2377, %2374 %2379 = fmul float %2378, 2.000000e+00 %2380 = fadd float %2379, -1.000000e+00 %2381 = fmul float %2376, 2.000000e+00 %2382 = fadd float %2381, -1.000000e+00 %2383 = fmul float %2380, %2380 %2384 = fmul float %2382, %2382 %2385 = fadd float %2383, %2384 %2386 = call float @llvm.AMDIL.clamp.(float %2385, float 0.000000e+00, float 1.000000e+00) %2387 = fmul float %167, 0.000000e+00 %2388 = fmul float %2158, %167 %2389 = fmul float %2160, %167 %2390 = fmul float %2271, %168 %2391 = fadd float %2390, %2387 %2392 = fmul float %168, 0.000000e+00 %2393 = fadd float %2392, %2388 %2394 = fmul float %2269, %168 %2395 = fadd float %2394, %2389 %2396 = fmul float %2380, %169 %2397 = fadd float %2396, %2391 %2398 = fmul float %2382, %169 %2399 = fadd float %2398, %2393 %2400 = fmul float %169, 0.000000e+00 %2401 = fadd float %2400, %2395 %2402 = fmul float %108, %1701 %2403 = fmul float %108, %1703 %2404 = fmul float %108, %1705 %2405 = fmul float %109, %2049 %2406 = fadd float %2405, %2402 %2407 = fmul float %109, %2051 %2408 = fadd float %2407, %2403 %2409 = fmul float %109, %2053 %2410 = fadd float %2409, %2404 %2411 = fmul float %110, %2397 %2412 = fadd float %2411, %2406 %2413 = fmul float %110, %2399 %2414 = fadd float %2413, %2408 %2415 = fmul float %110, %2401 %2416 = fadd float %2415, %2410 %2417 = fmul float %2412, %2412 %2418 = fmul float %2414, %2414 %2419 = fadd float %2417, %2418 %2420 = fmul float %2416, %2416 %2421 = fadd float %2419, %2420 %2422 = fadd float %2421, 1.000000e+00 %2423 = call float @llvm.AMDGPU.rsq.clamped.f32(float %2422) %2424 = fmul float %2412, %2423 %2425 = fmul float %2414, %2423 %2426 = fmul float %2416, %2423 %2427 = fmul float %2424, %107 %2428 = fmul float %2425, %107 %2429 = fmul float %2426, %107 %2430 = fsub float %113, %2427 %2431 = fsub float %114, %2428 %2432 = fsub float %115, %2429 %2433 = fmul float %2430, %2430 %2434 = fmul float %2431, %2431 %2435 = fadd float %2434, %2433 %2436 = fmul float %2432, %2432 %2437 = fadd float %2435, %2436 %2438 = call float @llvm.AMDGPU.rsq.clamped.f32(float %2437) %2439 = fmul float %2430, %2438 %2440 = fmul float %2431, %2438 %2441 = fmul float %2432, %2438 %2442 = fmul float %35, %116 %2443 = fmul float %36, %116 %2444 = fmul float %37, %116 %2445 = fmul float %38, %117 %2446 = fadd float %2445, %2442 %2447 = fmul float %39, %117 %2448 = fadd float %2447, %2443 %2449 = fmul float %40, %117 %2450 = fadd float %2449, %2444 %2451 = fmul float %41, %118 %2452 = fadd float %2451, %2446 %2453 = fmul float %42, %118 %2454 = fadd float %2453, %2448 %2455 = fmul float %43, %118 %2456 = fadd float %2455, %2450 %2457 = fadd float %2452, %44 %2458 = fadd float %2454, %45 %2459 = fadd float %2456, %46 %2460 = fmul float %2457, %2457 %2461 = fmul float %2458, %2458 %2462 = fadd float %2461, %2460 %2463 = fmul float %2459, %2459 %2464 = fadd float %2462, %2463 %2465 = bitcast float %2464 to i32 %2466 = bitcast float %2464 to i32 %2467 = insertelement <2 x i32> undef, i32 %2465, i32 0 %2468 = insertelement <2 x i32> %2467, i32 %2466, i32 1 %2469 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %2468, <32 x i8> %61, <16 x i8> %63, i32 2) %2470 = extractelement <4 x float> %2469, i32 3 %2471 = fadd float %128, %140 %2472 = fadd float %129, %141 %2473 = fadd float %130, %142 %2474 = fmul float %2471, %2471 %2475 = fmul float %2472, %2472 %2476 = fadd float %2475, %2474 %2477 = fmul float %2473, %2473 %2478 = fadd float %2476, %2477 %2479 = call float @llvm.AMDGPU.rsq.clamped.f32(float %2478) %2480 = fmul float %2471, %2479 %2481 = fmul float %2472, %2479 %2482 = fmul float %2473, %2479 %2483 = fmul float %2439, %2480 %2484 = fmul float %2440, %2481 %2485 = fadd float %2484, %2483 %2486 = fmul float %2441, %2482 %2487 = fadd float %2485, %2486 %2488 = call float @llvm.maxnum.f32(float %2487, float 0x3F1A36E2E0000000) %2489 = fmul float %111, 3.200000e+01 %2490 = call float @llvm.pow.f32(float %2488, float %2489) %2491 = call float @llvm.AMDIL.clamp.(float %2490, float 0.000000e+00, float 1.000000e+00) %2492 = fmul float %2491, 2.000000e+00 %2493 = fsub float 3.000000e+00, %2492 %2494 = fmul float %2491, %2493 %2495 = fmul float %2491, %2494 %2496 = fmul float %2495, %111 %2497 = fmul float %1353, %32 %2498 = fmul float %1355, %33 %2499 = fmul float %1357, %34 %2500 = fmul float %2439, %128 %2501 = fmul float %2440, %129 %2502 = fadd float %2501, %2500 %2503 = fmul float %2441, %130 %2504 = fadd float %2502, %2503 %2505 = call float @llvm.AMDIL.clamp.(float %2504, float 0.000000e+00, float 1.000000e+00) %2506 = fmul float %57, 2.000000e+00 %2507 = fmul float %58, 2.000000e+00 %2508 = fmul float %59, 2.000000e+00 %2509 = fmul float %2506, %2470 %2510 = fmul float %2507, %2470 %2511 = fmul float %2508, %2470 %2512 = call float @llvm.maxnum.f32(float %2509, float %54) %2513 = call float @llvm.maxnum.f32(float %2510, float %55) %2514 = call float @llvm.maxnum.f32(float %2511, float %56) %2515 = call float @llvm.minnum.f32(float %2512, float 1.000000e+00) %2516 = call float @llvm.minnum.f32(float %2513, float 1.000000e+00) %2517 = call float @llvm.minnum.f32(float %2514, float 1.000000e+00) %2518 = fmul float %2515, %1353 %2519 = fmul float %2516, %1355 %2520 = fmul float %2517, %1357 %2521 = fmul float %2497, %2505 %2522 = fadd float %2521, %2518 %2523 = fmul float %2498, %2505 %2524 = fadd float %2523, %2519 %2525 = fmul float %2499, %2505 %2526 = fadd float %2525, %2520 %2527 = fmul float %32, %2496 %2528 = fadd float %2527, %2522 %2529 = fmul float %33, %2496 %2530 = fadd float %2529, %2524 %2531 = fmul float %34, %2496 %2532 = fadd float %2531, %2526 %2533 = fmul float %2528, %2470 %2534 = fmul float %2530, %2470 %2535 = fmul float %2532, %2470 %2536 = fmul float %2533, 5.000000e-01 %2537 = fmul float %2534, 5.000000e-01 %2538 = fmul float %2535, 5.000000e-01 %2539 = fmul float %112, %30 %2540 = fadd float %2539, %31 %2541 = call float @llvm.AMDIL.clamp.(float %2540, float 0.000000e+00, float 1.000000e+00) %2542 = fmul float %2536, %2541 %2543 = fmul float %2537, %2541 %2544 = fmul float %2538, %2541 %2545 = call i32 @llvm.SI.packf16(float %2542, float %2543) %2546 = bitcast i32 %2545 to float %2547 = call i32 @llvm.SI.packf16(float %2544, float 1.000000e+00) %2548 = bitcast i32 %2547 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %2546, float %2548, float %2546, float %2548) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.floor.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b64 s[100:101], s[6:7] ; BEE40406 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v14, v0, 0, 0, [m0] ; C8380000 v_interp_p2_f32 v14, [v14], v1, 0, 0, [m0] ; C8390001 v_interp_p1_f32 v17, v0, 1, 0, [m0] ; C8440100 v_interp_p2_f32 v17, [v17], v1, 1, 0, [m0] ; C8450101 v_interp_p1_f32 v18, v0, 2, 0, [m0] ; C8480200 v_interp_p2_f32 v18, [v18], v1, 2, 0, [m0] ; C8490201 v_interp_p1_f32 v3, v0, 3, 0, [m0] ; C80C0300 v_interp_p2_f32 v3, [v3], v1, 3, 0, [m0] ; C80D0301 v_interp_p1_f32 v8, v0, 0, 1, [m0] ; C8200400 v_interp_p2_f32 v8, [v8], v1, 0, 1, [m0] ; C8210401 v_interp_p1_f32 v9, v0, 1, 1, [m0] ; C8240500 v_interp_p2_f32 v9, [v9], v1, 1, 1, [m0] ; C8250501 v_interp_p1_f32 v4, v0, 2, 1, [m0] ; C8100600 v_interp_p2_f32 v4, [v4], v1, 2, 1, [m0] ; C8110601 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_interp_p1_f32 v2, v0, 3, 1, [m0] ; C8080700 v_interp_p2_f32 v2, [v2], v1, 3, 1, [m0] ; C8090701 v_interp_p1_f32 v12, v0, 0, 2, [m0] ; C8300800 v_interp_p2_f32 v12, [v12], v1, 0, 2, [m0] ; C8310801 v_interp_p1_f32 v5, v0, 1, 2, [m0] ; C8140900 v_interp_p2_f32 v5, [v5], v1, 1, 2, [m0] ; C8150901 v_interp_p1_f32 v6, v0, 2, 2, [m0] ; C8180A00 v_interp_p2_f32 v6, [v6], v1, 2, 2, [m0] ; C8190A01 v_interp_p1_f32 v7, v0, 3, 2, [m0] ; C81C0B00 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 v_interp_p2_f32 v7, [v7], v1, 3, 2, [m0] ; C81D0B01 v_interp_p1_f32 v10, v0, 0, 3, [m0] ; C8280C00 v_interp_p2_f32 v10, [v10], v1, 0, 3, [m0] ; C8290C01 v_interp_p1_f32 v11, v0, 1, 3, [m0] ; C82C0D00 s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 v_interp_p2_f32 v11, [v11], v1, 1, 3, [m0] ; C82D0D01 v_interp_p1_f32 v0, v0, 2, 3, [m0] ; C8000E00 v_interp_p2_f32 v0, [v0], v1, 2, 3, [m0] ; C8010E01 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v1, s8, v10 ; 08021408 v_sub_f32_e32 v13, s9, v11 ; 081A1609 s_buffer_load_dword s12, s[0:3], 0x4c ; C206014C s_buffer_load_dword s8, s[0:3], 0x50 ; C2040150 s_buffer_load_dword s13, s[0:3], 0x0 ; C2068100 s_buffer_load_dword s41, s[0:3], 0x1 ; C2148101 v_sub_f32_e32 v21, s10, v0 ; 082A000A v_mul_f32_e32 v15, v1, v1 ; 101E0301 v_mac_f32_e32 v15, v13, v13 ; 3E1E1B0D v_mac_f32_e32 v15, v21, v21 ; 3E1E2B15 s_buffer_load_dword s40, s[0:3], 0x2 ; C2140102 v_rsq_clamp_f32_e32 v22, v15 ; 7E2C590F v_add_f32_e32 v14, 0.5, v14 ; 061C1CF0 v_floor_f32_e32 v16, v14 ; 7E20490E s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v15, s13, v10 ; 081E140D v_sub_f32_e32 v14, s41, v11 ; 081C1629 v_mov_b32_e32 v19, 0x42800000 ; 7E2602FF 42800000 v_cmp_le_f32_e32 vcc, v19, v16 ; 7C062113 v_mul_f32_e32 v19, v15, v15 ; 10261F0F v_mac_f32_e32 v19, v14, v14 ; 3E261D0E v_sub_f32_e32 v36, s40, v0 ; 08480028 v_mac_f32_e32 v19, v36, v36 ; 3E264924 v_rsq_clamp_f32_e32 v39, v19 ; 7E4E5913 v_mov_b32_e32 v19, 0x7fffffff ; 7E2602FF 7FFFFFFF v_and_b32_e32 v23, v5, v19 ; 362E2705 v_and_b32_e32 v24, v6, v19 ; 36302706 v_and_b32_e32 v34, v7, v19 ; 36442707 v_mul_f32_e64 v19, |v5|, |v5| ; D2100313 00020B05 v_mad_f32 v19, |v6|, |v6|, v19 ; D2820313 044E0D06 v_mad_f32 v19, |v7|, |v7|, v19 ; D2820313 044E0F07 v_rsq_clamp_f32_e32 v35, v19 ; 7E465913 v_add_f32_e32 v17, 0.5, v17 ; 062222F0 v_add_f32_e32 v18, 0.5, v18 ; 062424F0 v_floor_f32_e32 v20, v17 ; 7E284911 v_floor_f32_e32 v17, v18 ; 7E224912 v_mov_b32_e32 v27, s8 ; 7E360208 v_mul_f32_e32 v25, s12, v16 ; 1032200C v_floor_f32_e32 v19, v25 ; 7E264919 s_and_saveexec_b64 s[10:11], vcc ; BE8A246A s_xor_b64 s[10:11], exec, s[10:11] ; 898A0A7E v_mov_b32_e32 v18, 0xc2800000 ; 7E2402FF C2800000 v_add_f32_e32 v16, v16, v18 ; 06202510 v_mul_f32_e32 v18, s8, v16 ; 10242008 v_floor_f32_e32 v18, v18 ; 7E244912 v_mul_f32_e32 v26, s8, v18 ; 10342408 v_mad_f32 v31, v16, s8, -v18 ; D282001F 84481110 v_floor_f32_e32 v16, v26 ; 7E20491A v_mad_f32 v32, v18, s8, -v16 ; D2820020 84401112 v_add_f32_e32 v18, 4.0, v16 ; 062420F6 s_or_saveexec_b64 s[10:11], s[10:11] ; BE8A250A v_mov_b32_e32 v16, s12 ; 7E20020C v_mov_b32_e32 v38, v27 ; 7E4C031B s_xor_b64 exec, exec, s[10:11] ; 89FE0A7E v_mul_f32_e32 v18, s12, v19 ; 1024260C v_floor_f32_e32 v26, v25 ; 7E344919 v_subrev_f32_e32 v31, v26, v25 ; 0A3E331A v_floor_f32_e32 v18, v18 ; 7E244912 v_mad_f32 v32, v19, s12, -v18 ; D2820020 84481913 v_mov_b32_e32 v38, v16 ; 7E4C0310 s_or_b64 exec, exec, s[10:11] ; 88FE0A7E v_mul_f32_e32 v26, s12, v20 ; 1034280C v_floor_f32_e32 v25, v26 ; 7E32491A v_mov_b32_e32 v19, 0x42800000 ; 7E2602FF 42800000 v_cmp_le_f32_e32 vcc, v19, v20 ; 7C062913 s_and_saveexec_b64 s[10:11], vcc ; BE8A246A s_xor_b64 s[10:11], exec, s[10:11] ; 898A0A7E v_mov_b32_e32 v19, 0xc2800000 ; 7E2602FF C2800000 v_add_f32_e32 v19, v20, v19 ; 06262714 v_mul_f32_e32 v20, s8, v19 ; 10282608 v_floor_f32_e32 v20, v20 ; 7E284914 v_mul_f32_e32 v29, s8, v20 ; 103A2808 v_mad_f32 v28, v19, s8, -v20 ; D282001C 84501113 v_floor_f32_e32 v19, v29 ; 7E26491D v_mad_f32 v29, v20, s8, -v19 ; D282001D 844C1114 v_add_f32_e32 v19, 4.0, v19 ; 062626F6 s_or_saveexec_b64 s[10:11], s[10:11] ; BE8A250A v_mov_b32_e32 v30, v27 ; 7E3C031B s_xor_b64 exec, exec, s[10:11] ; 89FE0A7E v_mul_f32_e32 v19, s12, v25 ; 1026320C v_floor_f32_e32 v20, v26 ; 7E28491A v_subrev_f32_e32 v28, v20, v26 ; 0A383514 v_floor_f32_e32 v19, v19 ; 7E264913 v_mad_f32 v29, v25, s12, -v19 ; D282001D 844C1919 v_mov_b32_e32 v30, v16 ; 7E3C0310 s_or_b64 exec, exec, s[10:11] ; 88FE0A7E s_buffer_load_dword s14, s[0:3], 0xb ; C207010B s_buffer_load_dword s15, s[0:3], 0x54 ; C2078154 s_buffer_load_dword s9, s[0:3], 0x58 ; C2048158 v_mul_f32_e32 v41, s12, v17 ; 1052220C v_floor_f32_e32 v40, v41 ; 7E504929 v_mov_b32_e32 v20, 0x42800000 ; 7E2802FF 42800000 v_cmp_le_f32_e32 vcc, v20, v17 ; 7C062314 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[10:11], vcc ; BE8A246A s_xor_b64 s[10:11], exec, s[10:11] ; 898A0A7E v_mov_b32_e32 v20, 0xc2800000 ; 7E2802FF C2800000 v_add_f32_e32 v17, v17, v20 ; 06222911 v_mul_f32_e32 v20, s8, v17 ; 10282208 v_floor_f32_e32 v20, v20 ; 7E284914 v_mul_f32_e32 v26, s8, v20 ; 10342808 v_mad_f32 v25, v17, s8, -v20 ; D2820019 84501111 v_floor_f32_e32 v17, v26 ; 7E22491A v_mad_f32 v26, v20, s8, -v17 ; D282001A 84441114 v_add_f32_e32 v20, 4.0, v17 ; 062822F6 s_or_saveexec_b64 s[10:11], s[10:11] ; BE8A250A v_mov_b32_e32 v42, s14 ; 7E54020E v_mov_b32_e32 v33, s15 ; 7E42020F s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xc ; C207810C s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v253, s15, 0 ; 05FB000F s_buffer_load_dword s15, s[0:3], 0xd ; C207810D s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v253, s15, 1 ; 05FB020F s_buffer_load_dword s15, s[0:3], 0xe ; C207810E s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v253, s15, 2 ; 05FB040F s_buffer_load_dword s15, s[0:3], 0x14 ; C2078114 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v253, s15, 10 ; 05FB140F s_buffer_load_dword s15, s[0:3], 0x15 ; C2078115 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v253, s15, 11 ; 05FB160F s_buffer_load_dword s15, s[0:3], 0x16 ; C2078116 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v253, s15, 12 ; 05FB180F s_buffer_load_dword s15, s[0:3], 0x18 ; C2078118 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v253, s15, 13 ; 05FB1A0F s_buffer_load_dword s15, s[0:3], 0x19 ; C2078119 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v253, s15, 14 ; 05FB1C0F s_buffer_load_dword s15, s[0:3], 0x1a ; C207811A s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v253, s15, 15 ; 05FB1E0F s_buffer_load_dword s15, s[0:3], 0x1c ; C207811C s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v253, s15, 16 ; 05FB200F s_buffer_load_dword s15, s[0:3], 0x1d ; C207811D s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v253, s15, 17 ; 05FB220F s_buffer_load_dword s15, s[0:3], 0x1e ; C207811E s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v253, s15, 18 ; 05FB240F s_buffer_load_dword s15, s[0:3], 0x20 ; C2078120 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v253, s15, 19 ; 05FB260F s_buffer_load_dword s15, s[0:3], 0x21 ; C2078121 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v253, s15, 20 ; 05FB280F s_buffer_load_dword s15, s[0:3], 0x22 ; C2078122 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v253, s15, 9 ; 05FB120F s_buffer_load_dword s39, s[0:3], 0x5c ; C213815C s_buffer_load_dword s42, s[0:3], 0x60 ; C2150160 s_buffer_load_dword s24, s[0:3], 0x64 ; C20C0164 s_buffer_load_dword s15, s[0:3], 0x68 ; C2078168 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v253, s15, 5 ; 05FB0A0F s_buffer_load_dword s15, s[0:3], 0x69 ; C2078169 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v253, s15, 3 ; 05FB060F s_buffer_load_dword s15, s[0:3], 0x6a ; C207816A s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v253, s15, 4 ; 05FB080F s_buffer_load_dword s15, s[0:3], 0x6c ; C207816C s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v253, s15, 8 ; 05FB100F s_buffer_load_dword s15, s[0:3], 0x6d ; C207816D s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v253, s15, 6 ; 05FB0C0F s_buffer_load_dword s0, s[0:3], 0x6e ; C200016E s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v253, s0, 7 ; 05FB0E00 v_mov_b32_e32 v37, s9 ; 7E4A0209 s_xor_b64 exec, exec, s[10:11] ; 89FE0A7E v_mul_f32_e32 v17, s12, v40 ; 1022500C v_floor_f32_e32 v20, v41 ; 7E284929 v_subrev_f32_e32 v25, v20, v41 ; 0A325314 v_floor_f32_e32 v20, v17 ; 7E284911 v_mad_f32 v26, v40, s12, -v20 ; D282001A 84501928 v_mov_b32_e32 v27, v16 ; 7E360310 s_or_b64 exec, exec, s[10:11] ; 88FE0A7E v_mac_f32_e32 v42, s14, v12 ; 3E54180E v_mul_f32_e32 v16, v39, v15 ; 10201F27 v_mul_f32_e32 v17, v39, v14 ; 10221D27 v_mul_f32_e32 v15, v39, v36 ; 101E4927 v_mac_f32_e32 v16, v22, v1 ; 3E200316 v_mul_f32_e32 v14, v22, v1 ; 101C0316 v_mac_f32_e32 v17, v22, v13 ; 3E221B16 v_mul_f32_e32 v13, v22, v13 ; 101A1B16 v_mac_f32_e32 v15, v22, v21 ; 3E1E2B16 v_mul_f32_e32 v12, v22, v21 ; 10182B16 v_add_f32_e64 v1, 0, v42 clamp ; D2060801 00025480 v_cmp_neq_f32_e64 s[16:17], s12, v38 ; D01A0010 00024C0C v_cmp_eq_f32_e64 s[0:1], 4.0, v18 ; D0040000 000224F6 v_writelane_b32 v253, s0, 23 ; 05FB2E00 v_writelane_b32 v253, s1, 24 ; 05FB3001 v_cmp_eq_f32_e64 s[0:1], 2.0, v18 ; D0040000 000224F4 v_writelane_b32 v253, s0, 25 ; 05FB3200 v_writelane_b32 v253, s1, 26 ; 05FB3401 v_cmp_eq_f32_e64 s[0:1], 1.0, v18 ; D0040000 000224F2 v_writelane_b32 v253, s0, 27 ; 05FB3600 v_writelane_b32 v253, s1, 28 ; 05FB3801 v_cmp_eq_f32_e32 vcc, 0, v18 ; 7C042480 v_cmp_neq_f32_e64 s[14:15], s12, v30 ; D01A000E 00023C0C v_subrev_f32_e32 v21, s13, v10 ; 0A2A140D v_mul_f32_e32 v36, v21, v21 ; 10482B15 v_cmp_neq_f32_e64 s[12:13], s12, v27 ; D01A000C 0002360C v_subrev_f32_e32 v21, s41, v11 ; 0A2A1629 v_mac_f32_e32 v36, v21, v21 ; 3E482B15 v_mov_b32_e32 v21, 0xbe4ccccd ; 7E2A02FF BE4CCCCD v_mad_f32 v22, v35, v23, v21 ; D2820016 04562F23 v_mad_f32 v23, v35, v24, v21 ; D2820017 04563123 v_mac_f32_e32 v21, v35, v34 ; 3E2A4523 v_mov_b32_e32 v24, 0x40e00000 ; 7E3002FF 40E00000 v_mul_f32_e32 v22, v24, v22 ; 102C2D18 v_mul_f32_e32 v23, v24, v23 ; 102E2F18 v_mul_f32_e32 v21, v24, v21 ; 102A2B18 v_mov_b32_e32 v24, 0x3c23d70a ; 7E3002FF 3C23D70A v_max_f32_e32 v22, v24, v22 ; 202C2D18 v_max_f32_e32 v23, v24, v23 ; 202E2F18 v_max_f32_e32 v21, v24, v21 ; 202A2B18 v_add_f32_e32 v24, v23, v22 ; 06302D17 v_add_f32_e32 v24, v21, v24 ; 06303115 v_rcp_f32_e32 v24, v24 ; 7E305518 v_subrev_f32_e32 v34, s40, v0 ; 0A440028 v_mac_f32_e32 v36, v34, v34 ; 3E484522 v_mul_f32_e32 v34, s24, v36 ; 10444818 v_log_f32_e32 v34, v34 ; 7E444F22 v_cmp_eq_f32_e64 s[0:1], 4.0, v19 ; D0040000 000226F6 v_writelane_b32 v253, s0, 21 ; 05FB2A00 v_writelane_b32 v253, s1, 22 ; 05FB2C01 v_cndmask_b32_e64 v35, v33, v37, s[16:17] ; D2000023 00424B21 v_rcp_f32_e32 v35, v35 ; 7E465523 v_mul_f32_e32 v34, 0x3f317218, v34 ; 104444FF 3F317218 v_mov_b32_e32 v39, 0x3b000000 ; 7E4E02FF 3B000000 v_mov_b32_e32 v40, 0x3b800000 ; 7E5002FF 3B800000 v_cndmask_b32_e64 v36, v40, v39, s[16:17] ; D2000024 00424F28 v_mul_f32_e32 v41, v35, v10 ; 10521523 v_floor_f32_e32 v41, v41 ; 7E524929 v_mad_f32 v41, v10, v35, -v41 ; D2820029 84A6470A v_mul_f32_e32 v42, v35, v11 ; 10541723 v_floor_f32_e32 v42, v42 ; 7E54492A v_mad_f32 v42, v11, v35, -v42 ; D282002A 84AA470B v_mul_f32_e32 v43, v35, v0 ; 10560123 v_floor_f32_e32 v43, v43 ; 7E56492B v_mad_f32 v35, v0, v35, -v43 ; D2820023 84AE4700 v_add_f32_e64 v43, s39, s39 ; D206002B 00004E27 v_mad_f32 v44, -v43, v36, 1.0 ; D282002C 23CA492B v_mul_f32_e32 v45, s39, v36 ; 105A4827 v_mad_f32 v41, v44, v41, v45 ; D2820029 04B6532C v_mad_f32 v42, v44, v42, v45 ; D282002A 04B6552C v_mac_f32_e32 v45, v44, v35 ; 3E5A472C v_mul_f32_e32 v36, s42, v34 ; 1048442A v_mad_f32 v34, v38, v41, v31 ; D2820022 047E5326 v_mad_f32 v35, v38, v42, v32 ; D2820023 04825526 v_cndmask_b32_e64 v42, v33, v37, s[14:15] ; D200002A 003A4B21 v_rcp_f32_e32 v42, v42 ; 7E54552A v_mac_f32_e32 v31, v38, v45 ; 3E3E5B26 v_mov_b32_e32 v44, v31 ; 7E58031F v_mov_b32_e32 v45, v32 ; 7E5A0320 v_mov_b32_e32 v46, v33 ; 7E5C0321 v_mov_b32_e32 v47, v34 ; 7E5E0322 v_mac_f32_e32 v32, v38, v41 ; 3E405326 v_mul_f32_e32 v38, v42, v10 ; 104C152A v_floor_f32_e32 v38, v38 ; 7E4C4926 v_mad_f32 v38, v10, v42, -v38 ; D2820026 849A550A v_mul_f32_e32 v41, v42, v11 ; 1052172A v_floor_f32_e32 v41, v41 ; 7E524929 v_mad_f32 v41, v11, v42, -v41 ; D2820029 84A6550B v_mul_f32_e32 v45, v42, v0 ; 105A012A v_floor_f32_e32 v45, v45 ; 7E5A492D v_mad_f32 v42, v0, v42, -v45 ; D282002A 84B65500 v_cndmask_b32_e64 v33, v33, v37, s[12:13] ; D2000021 00324B21 v_cndmask_b32_e64 v37, v40, v39, s[14:15] ; D2000025 003A4F28 v_mad_f32 v45, -v43, v37, 1.0 ; D282002D 23CA4B2B v_mul_f32_e32 v37, s39, v37 ; 104A4A27 v_mad_f32 v46, v45, v38, v37 ; D282002E 04964D2D v_rcp_f32_e32 v33, v33 ; 7E425521 v_mad_f32 v38, v45, v41, v37 ; D2820026 0496532D v_mac_f32_e32 v37, v45, v42 ; 3E4A552D v_cndmask_b32_e64 v39, v40, v39, s[12:13] ; D2000027 00324F28 v_mul_f32_e32 v40, v33, v10 ; 10501521 v_floor_f32_e32 v40, v40 ; 7E504928 v_mad_f32 v40, v10, v33, -v40 ; D2820028 84A2430A v_mul_f32_e32 v41, v33, v11 ; 10521721 v_floor_f32_e32 v41, v41 ; 7E524929 v_mad_f32 v41, v11, v33, -v41 ; D2820029 84A6430B v_mul_f32_e32 v42, v33, v0 ; 10540121 v_floor_f32_e32 v42, v42 ; 7E54492A v_mad_f32 v33, v0, v33, -v42 ; D2820021 84AA4300 v_mad_f32 v42, -v43, v39, 1.0 ; D282002A 23CA4F2B v_mul_f32_e32 v39, s39, v39 ; 104E4E27 v_mad_f32 v40, v42, v40, v39 ; D2820028 049E512A v_mad_f32 v41, v42, v41, v39 ; D2820029 049E532A v_mac_f32_e32 v39, v42, v33 ; 3E4E432A v_mad_f32 v48, v30, v46, v28 ; D2820030 04725D1E v_mad_f32 v49, v30, v38, v29 ; D2820031 04764D1E s_load_dwordx8 s[48:55], s[100:101], 0x48 ; C0D86548 s_load_dwordx4 s[96:99], s[4:5], 0x24 ; C0B00524 v_mac_f32_e32 v28, v30, v37 ; 3E384B1E v_mad_f32 v37, v27, v40, v25 ; D2820025 0466511B v_mad_f32 v38, v27, v41, v26 ; D2820026 046A531B v_mac_f32_e32 v25, v27, v39 ; 3E324F1B v_mov_b32_e32 v51, v28 ; 7E66031C v_mov_b32_e32 v52, v29 ; 7E68031D v_mov_b32_e32 v53, v30 ; 7E6A031E v_mov_b32_e32 v54, v31 ; 7E6C031F v_mac_f32_e32 v29, v30, v46 ; 3E3A5D1E v_mov_b32_e32 v55, v25 ; 7E6E0319 v_mov_b32_e32 v56, v26 ; 7E70031A v_mov_b32_e32 v57, v27 ; 7E72031B v_mov_b32_e32 v58, v28 ; 7E74031C v_mac_f32_e32 v26, v27, v40 ; 3E34511B v_mov_b32_e32 v45, v35 ; 7E5A0323 v_mov_b32_e32 v50, v36 ; 7E640324 v_mov_b32_e32 v52, v49 ; 7E680331 v_mov_b32_e32 v39, v36 ; 7E4E0324 v_mov_b32_e32 v56, v38 ; 7E700326 v_mov_b32_e32 v46, v36 ; 7E5C0324 v_mov_b32_e32 v33, v36 ; 7E420324 v_mov_b32_e32 v53, v36 ; 7E6A0324 v_mov_b32_e32 v30, v36 ; 7E3C0324 v_mov_b32_e32 v57, v36 ; 7E720324 v_mov_b32_e32 v27, v36 ; 7E360324 s_load_dwordx4 s[32:35], s[4:5], 0x1c ; C090051C s_load_dwordx8 s[16:23], s[100:101], 0x38 ; C0C86538 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[40:42], 7, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[48:55], s[96:99] ; F0900700 030C2822 s_load_dwordx4 s[8:11], s[4:5], 0x14 ; C0840514 s_mov_b64 s[84:85], s[4:5] ; BED40404 v_writelane_b32 v253, s84, 29 ; 05FB3A54 v_writelane_b32 v253, s85, 30 ; 05FB3C55 s_load_dwordx8 s[0:7], s[100:101], 0x28 ; C0C06528 s_load_dwordx4 s[60:63], s[84:85], 0x20 ; C09E5520 s_load_dwordx4 s[36:39], s[84:85], 0xc ; C092550C s_load_dwordx8 s[24:31], s[100:101], 0x18 ; C0CC6518 s_load_dwordx4 s[12:15], s[84:85], 0x4 ; C0865504 s_load_dwordx8 s[76:83], s[100:101], 0x40 ; C0E66540 s_load_dwordx8 s[40:47], s[100:101], 0x8 ; C0D46508 image_sample_l v[59:61], 7, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[16:23], s[32:35] ; F0900700 01043B22 s_load_dwordx4 s[64:67], s[84:85], 0x18 ; C0A05518 s_load_dwordx8 s[68:75], s[100:101], 0x30 ; C0E26530 s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 image_sample_l v[62:64], 7, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[0:7], s[8:11] ; F0900700 00403E22 s_load_dwordx4 s[56:59], s[84:85], 0x10 ; C09C5510 s_load_dwordx8 s[88:95], s[100:101], 0x20 ; C0EC6520 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_writelane_b32 v253, s88, 43 ; 05FB5658 v_writelane_b32 v253, s89, 44 ; 05FB5859 v_writelane_b32 v253, s90, 45 ; 05FB5A5A v_writelane_b32 v253, s91, 46 ; 05FB5C5B v_writelane_b32 v253, s92, 47 ; 05FB5E5C v_writelane_b32 v253, s93, 48 ; 05FB605D v_writelane_b32 v253, s94, 49 ; 05FB625E v_writelane_b32 v253, s95, 50 ; 05FB645F image_sample_l v[65:67], 7, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[24:31], s[36:39] ; F0900700 01264122 s_load_dwordx4 s[88:91], s[84:85], 0x8 ; C0AC5508 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_writelane_b32 v253, s88, 31 ; 05FB3E58 v_writelane_b32 v253, s89, 32 ; 05FB4059 v_writelane_b32 v253, s90, 33 ; 05FB425A v_writelane_b32 v253, s91, 34 ; 05FB445B s_load_dwordx8 s[88:95], s[100:101], 0x10 ; C0EC6510 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v253, s88, 35 ; 05FB4658 v_writelane_b32 v253, s89, 36 ; 05FB4859 v_writelane_b32 v253, s90, 37 ; 05FB4A5A v_writelane_b32 v253, s91, 38 ; 05FB4C5B v_writelane_b32 v253, s92, 39 ; 05FB4E5C v_writelane_b32 v253, s93, 40 ; 05FB505D v_writelane_b32 v253, s94, 41 ; 05FB525E v_writelane_b32 v253, s95, 42 ; 05FB545F image_sample_l v[68:70], 7, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[40:47], s[12:15] ; F0900700 006A4422 image_sample_l v[71:73], 7, 0, 0, 0, 0, 0, 0, 0, v[44:47], s[48:55], s[96:99] ; F0900700 030C472C image_sample_l v[74:76], 7, 0, 0, 0, 0, 0, 0, 0, v[44:47], s[16:23], s[32:35] ; F0900700 01044A2C image_sample_l v[77:79], 7, 0, 0, 0, 0, 0, 0, 0, v[44:47], s[0:7], s[8:11] ; F0900700 00404D2C image_sample_l v[80:82], 7, 0, 0, 0, 0, 0, 0, 0, v[44:47], s[24:31], s[36:39] ; F0900700 0126502C image_sample_l v[83:85], 7, 0, 0, 0, 0, 0, 0, 0, v[44:47], s[40:47], s[12:15] ; F0900700 006A532C image_sample_l v[86:88], 7, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[48:55], s[96:99] ; F0900700 030C561F image_sample_l v[89:91], 7, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[16:23], s[32:35] ; F0900700 0104591F image_sample_l v[92:94], 7, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[0:7], s[8:11] ; F0900700 00405C1F image_sample_l v[95:97], 7, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[24:31], s[36:39] ; F0900700 01265F1F image_sample_l v[98:100], 7, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[40:47], s[12:15] ; F0900700 006A621F image_sample_l v[101:103], 7, 0, 0, 0, 0, 0, 0, 0, v[48:51], s[48:55], s[96:99] ; F0900700 030C6530 image_sample_l v[104:106], 7, 0, 0, 0, 0, 0, 0, 0, v[48:51], s[16:23], s[32:35] ; F0900700 01046830 image_sample_l v[107:109], 7, 0, 0, 0, 0, 0, 0, 0, v[48:51], s[0:7], s[8:11] ; F0900700 00406B30 image_sample_l v[110:112], 7, 0, 0, 0, 0, 0, 0, 0, v[48:51], s[24:31], s[36:39] ; F0900700 01266E30 image_sample_l v[113:115], 7, 0, 0, 0, 0, 0, 0, 0, v[48:51], s[40:47], s[12:15] ; F0900700 006A7130 image_sample_l v[116:118], 7, 0, 0, 0, 0, 0, 0, 0, v[51:54], s[48:55], s[96:99] ; F0900700 030C7433 image_sample_l v[119:121], 7, 0, 0, 0, 0, 0, 0, 0, v[51:54], s[16:23], s[32:35] ; F0900700 01047733 image_sample_l v[122:124], 7, 0, 0, 0, 0, 0, 0, 0, v[51:54], s[0:7], s[8:11] ; F0900700 00407A33 image_sample_l v[125:127], 7, 0, 0, 0, 0, 0, 0, 0, v[51:54], s[24:31], s[36:39] ; F0900700 01267D33 image_sample_l v[128:130], 7, 0, 0, 0, 0, 0, 0, 0, v[51:54], s[40:47], s[12:15] ; F0900700 006A8033 image_sample_l v[131:133], 7, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[48:55], s[96:99] ; F0900700 030C831C image_sample_l v[134:136], 7, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[16:23], s[32:35] ; F0900700 0104861C image_sample_l v[137:139], 7, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[0:7], s[8:11] ; F0900700 0040891C image_sample_l v[140:142], 7, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[24:31], s[36:39] ; F0900700 01268C1C image_sample_l v[143:145], 7, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[40:47], s[12:15] ; F0900700 006A8F1C image_sample_l v[146:148], 7, 0, 0, 0, 0, 0, 0, 0, v[37:40], s[48:55], s[96:99] ; F0900700 030C9225 image_sample_l v[149:151], 7, 0, 0, 0, 0, 0, 0, 0, v[37:40], s[16:23], s[32:35] ; F0900700 01049525 image_sample_l v[152:154], 7, 0, 0, 0, 0, 0, 0, 0, v[37:40], s[0:7], s[8:11] ; F0900700 00409825 image_sample_l v[155:157], 7, 0, 0, 0, 0, 0, 0, 0, v[37:40], s[24:31], s[36:39] ; F0900700 01269B25 image_sample_l v[158:160], 7, 0, 0, 0, 0, 0, 0, 0, v[37:40], s[40:47], s[12:15] ; F0900700 006A9E25 image_sample_l v[161:163], 7, 0, 0, 0, 0, 0, 0, 0, v[55:58], s[48:55], s[96:99] ; F0900700 030CA137 s_load_dwordx4 s[84:87], s[84:85], 0x28 ; C0AA5528 s_load_dwordx8 s[88:95], s[100:101], 0x50 ; C0EC6550 image_sample_l v[164:166], 7, 0, 0, 0, 0, 0, 0, 0, v[55:58], s[16:23], s[32:35] ; F0900700 0104A437 image_sample_l v[167:169], 7, 0, 0, 0, 0, 0, 0, 0, v[55:58], s[0:7], s[8:11] ; F0900700 0040A737 image_sample_l v[170:172], 7, 0, 0, 0, 0, 0, 0, 0, v[55:58], s[24:31], s[36:39] ; F0900700 0126AA37 image_sample_l v[173:175], 7, 0, 0, 0, 0, 0, 0, 0, v[55:58], s[40:47], s[12:15] ; F0900700 006AAD37 image_sample_l v[176:178], 7, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[48:55], s[96:99] ; F0900700 030CB019 image_sample_l v[179:181], 7, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[16:23], s[32:35] ; F0900700 0104B319 image_sample_l v[182:184], 7, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[0:7], s[8:11] ; F0900700 0040B619 image_sample_l v[185:187], 7, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[24:31], s[36:39] ; F0900700 0126B919 image_sample_l v[188:190], 7, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[40:47], s[12:15] ; F0900700 006ABC19 s_waitcnt vmcnt(9) lgkmcnt(0) ; BF8C0079 image_sample_l v[191:192], 10, 0, 0, 0, 0, 0, 0, 0, v[44:47], s[88:95], s[84:87] ; F0900A00 02B6BF2C image_sample_l v[193:194], 10, 0, 0, 0, 0, 0, 0, 0, v[44:47], s[76:83], s[60:63] ; F0900A00 01F3C12C image_sample_l v[195:196], 10, 0, 0, 0, 0, 0, 0, 0, v[44:47], s[68:75], s[64:67] ; F0900A00 0211C32C v_readlane_b32 s12, v253, 43 ; 021957FD v_readlane_b32 s13, v253, 44 ; 021B59FD v_readlane_b32 s14, v253, 45 ; 021D5BFD v_readlane_b32 s15, v253, 46 ; 021F5DFD v_readlane_b32 s16, v253, 47 ; 02215FFD v_readlane_b32 s17, v253, 48 ; 022361FD v_readlane_b32 s18, v253, 49 ; 022563FD v_readlane_b32 s19, v253, 50 ; 022765FD s_nop 2 ; BF800002 image_sample_l v[197:198], 10, 0, 0, 0, 0, 0, 0, 0, v[44:47], s[12:19], s[56:59] ; F0900A00 01C3C52C v_readlane_b32 s0, v253, 31 ; 02013FFD v_readlane_b32 s1, v253, 32 ; 020341FD v_readlane_b32 s2, v253, 33 ; 020543FD v_readlane_b32 s3, v253, 34 ; 020745FD s_nop 2 ; BF800002 v_readlane_b32 s4, v253, 35 ; 020947FD v_readlane_b32 s5, v253, 36 ; 020B49FD v_readlane_b32 s6, v253, 37 ; 020D4BFD v_readlane_b32 s7, v253, 38 ; 020F4DFD v_readlane_b32 s8, v253, 39 ; 02114FFD v_readlane_b32 s9, v253, 40 ; 021351FD v_readlane_b32 s10, v253, 41 ; 021553FD v_readlane_b32 s11, v253, 42 ; 021755FD s_nop 2 ; BF800002 image_sample_l v[43:44], 10, 0, 0, 0, 0, 0, 0, 0, v[44:47], s[4:11], s[0:3] ; F0900A00 00012B2C image_sample_l v[45:46], 10, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[88:95], s[84:87] ; F0900A00 02B62D1F image_sample_l v[199:200], 10, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[76:83], s[60:63] ; F0900A00 01F3C71F image_sample_l v[201:202], 10, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[68:75], s[64:67] ; F0900A00 0211C91F image_sample_l v[203:204], 10, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[12:19], s[56:59] ; F0900A00 01C3CB1F s_mov_b32 s16, s12 ; BE90030C s_mov_b32 s17, s13 ; BE91030D s_mov_b32 s18, s14 ; BE92030E s_mov_b32 s19, s15 ; BE93030F s_mov_b32 s20, s16 ; BE940310 s_mov_b32 s21, s17 ; BE950311 s_mov_b32 s22, s18 ; BE960312 s_mov_b32 s23, s19 ; BE970313 image_sample_l v[31:32], 10, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[4:11], s[0:3] ; F0900A00 00011F1F s_mov_b32 s8, s4 ; BE880304 s_mov_b32 s9, s5 ; BE890305 s_mov_b32 s10, s6 ; BE8A0306 s_mov_b32 s11, s7 ; BE8B0307 s_mov_b32 s12, s8 ; BE8C0308 s_mov_b32 s13, s9 ; BE8D0309 s_mov_b32 s14, s10 ; BE8E030A s_mov_b32 s15, s11 ; BE8F030B s_mov_b32 s4, s0 ; BE840300 s_mov_b32 s5, s1 ; BE850301 s_mov_b32 s6, s2 ; BE860302 s_mov_b32 s7, s3 ; BE870303 image_sample_l v[205:206], 10, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[88:95], s[84:87] ; F0900A00 02B6CD22 image_sample_l v[207:208], 10, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[76:83], s[60:63] ; F0900A00 01F3CF22 image_sample_l v[209:210], 10, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[68:75], s[64:67] ; F0900A00 0211D122 image_sample_l v[211:212], 10, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[16:23], s[56:59] ; F0900A00 01C4D322 image_sample_l v[33:34], 10, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[8:15], s[4:7] ; F0900A00 00222122 image_sample_l v[35:36], 10, 0, 0, 0, 0, 0, 0, 0, v[51:54], s[88:95], s[84:87] ; F0900A00 02B62333 image_sample_l v[213:214], 10, 0, 0, 0, 0, 0, 0, 0, v[51:54], s[76:83], s[60:63] ; F0900A00 01F3D533 image_sample_l v[215:216], 10, 0, 0, 0, 0, 0, 0, 0, v[51:54], s[68:75], s[64:67] ; F0900A00 0211D733 image_sample_l v[217:218], 10, 0, 0, 0, 0, 0, 0, 0, v[51:54], s[16:23], s[56:59] ; F0900A00 01C4D933 image_sample_l v[51:52], 10, 0, 0, 0, 0, 0, 0, 0, v[51:54], s[8:15], s[4:7] ; F0900A00 00223333 s_waitcnt vmcnt(10) ; BF8C077A image_sample_l v[53:54], 10, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[88:95], s[84:87] ; F0900A00 02B6351C image_sample_l v[219:220], 10, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[76:83], s[60:63] ; F0900A00 01F3DB1C image_sample_l v[221:222], 10, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[68:75], s[64:67] ; F0900A00 0211DD1C image_sample_l v[223:224], 10, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[16:23], s[56:59] ; F0900A00 01C4DF1C image_sample_l v[28:29], 10, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[8:15], s[4:7] ; F0900A00 00221C1C s_waitcnt vmcnt(5) ; BF8C0775 image_sample_l v[225:226], 10, 0, 0, 0, 0, 0, 0, 0, v[48:51], s[88:95], s[84:87] ; F0900A00 02B6E130 image_sample_l v[227:228], 10, 0, 0, 0, 0, 0, 0, 0, v[48:51], s[76:83], s[60:63] ; F0900A00 01F3E330 image_sample_l v[229:230], 10, 0, 0, 0, 0, 0, 0, 0, v[48:51], s[68:75], s[64:67] ; F0900A00 0211E530 image_sample_l v[231:232], 10, 0, 0, 0, 0, 0, 0, 0, v[48:51], s[16:23], s[56:59] ; F0900A00 01C4E730 image_sample_l v[47:48], 10, 0, 0, 0, 0, 0, 0, 0, v[48:51], s[8:15], s[4:7] ; F0900A00 00222F30 image_sample_l v[49:50], 10, 0, 0, 0, 0, 0, 0, 0, v[55:58], s[88:95], s[84:87] ; F0900A00 02B63137 image_sample_l v[233:234], 10, 0, 0, 0, 0, 0, 0, 0, v[55:58], s[76:83], s[60:63] ; F0900A00 01F3E937 image_sample_l v[235:236], 10, 0, 0, 0, 0, 0, 0, 0, v[55:58], s[68:75], s[64:67] ; F0900A00 0211EB37 image_sample_l v[237:238], 10, 0, 0, 0, 0, 0, 0, 0, v[55:58], s[16:23], s[56:59] ; F0900A00 01C4ED37 image_sample_l v[55:56], 10, 0, 0, 0, 0, 0, 0, 0, v[55:58], s[8:15], s[4:7] ; F0900A00 00223737 s_waitcnt vmcnt(10) ; BF8C077A image_sample_l v[57:58], 10, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[88:95], s[84:87] ; F0900A00 02B63919 image_sample_l v[239:240], 10, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[76:83], s[60:63] ; F0900A00 01F3EF19 image_sample_l v[241:242], 10, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[68:75], s[64:67] ; F0900A00 0211F119 image_sample_l v[243:244], 10, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[16:23], s[56:59] ; F0900A00 01C4F319 image_sample_l v[25:26], 10, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[8:15], s[4:7] ; F0900A00 00221919 image_sample_l v[245:246], 10, 0, 0, 0, 0, 0, 0, 0, v[37:40], s[88:95], s[84:87] ; F0900A00 02B6F525 image_sample_l v[247:248], 10, 0, 0, 0, 0, 0, 0, 0, v[37:40], s[76:83], s[60:63] ; F0900A00 01F3F725 image_sample_l v[249:250], 10, 0, 0, 0, 0, 0, 0, 0, v[37:40], s[68:75], s[64:67] ; F0900A00 0211F925 image_sample_l v[251:252], 10, 0, 0, 0, 0, 0, 0, 0, v[37:40], s[16:23], s[56:59] ; F0900A00 01C4FB25 v_readlane_b32 s0, v253, 27 ; 020137FD v_readlane_b32 s1, v253, 28 ; 020339FD s_nop 2 ; BF800002 v_cndmask_b32_e64 v27, 0, 1.0, s[0:1] ; D200001B 0001E480 v_cndmask_b32_e64 v30, 0, 1.0, vcc ; D200001E 01A9E480 v_mul_f32_e32 v68, v30, v68 ; 1088891E v_mul_f32_e32 v69, v30, v69 ; 108A8B1E v_mul_f32_e32 v70, v30, v70 ; 108C8D1E v_mac_f32_e32 v68, v27, v65 ; 3E88831B v_mac_f32_e32 v69, v27, v66 ; 3E8A851B v_mac_f32_e32 v70, v27, v67 ; 3E8C871B v_readlane_b32 s0, v253, 25 ; 020133FD v_readlane_b32 s1, v253, 26 ; 020335FD s_nop 2 ; BF800002 v_cndmask_b32_e64 v65, 0, 1.0, s[0:1] ; D2000041 0001E480 v_mac_f32_e32 v68, v65, v62 ; 3E887D41 v_mac_f32_e32 v69, v65, v63 ; 3E8A7F41 v_mac_f32_e32 v70, v65, v64 ; 3E8C8141 v_mov_b32_e32 v62, 0x40400000 ; 7E7C02FF 40400000 v_cmp_eq_f32_e64 s[2:3], v18, v62 ; D0040002 00027D12 v_cndmask_b32_e64 v18, 0, 1.0, s[2:3] ; D2000012 0009E480 v_mac_f32_e32 v68, v18, v59 ; 3E887712 v_mac_f32_e32 v69, v18, v60 ; 3E8A7912 v_mac_f32_e32 v70, v18, v61 ; 3E8C7B12 v_readlane_b32 s0, v253, 23 ; 02012FFD v_readlane_b32 s1, v253, 24 ; 020331FD s_nop 2 ; BF800002 v_cndmask_b32_e64 v59, 0, 1.0, s[0:1] ; D200003B 0001E480 v_mac_f32_e32 v68, v59, v40 ; 3E88513B v_mac_f32_e32 v69, v59, v41 ; 3E8A533B v_mac_f32_e32 v70, v59, v42 ; 3E8C553B v_mul_f32_e32 v40, v30, v83 ; 1050A71E v_mul_f32_e32 v41, v30, v84 ; 1052A91E v_mul_f32_e32 v42, v30, v85 ; 1054AB1E v_mac_f32_e32 v40, v27, v80 ; 3E50A11B v_mac_f32_e32 v41, v27, v81 ; 3E52A31B v_mac_f32_e32 v42, v27, v82 ; 3E54A51B v_mac_f32_e32 v40, v65, v77 ; 3E509B41 v_mac_f32_e32 v41, v65, v78 ; 3E529D41 v_mac_f32_e32 v42, v65, v79 ; 3E549F41 v_mac_f32_e32 v40, v18, v74 ; 3E509512 v_mac_f32_e32 v41, v18, v75 ; 3E529712 v_mac_f32_e32 v42, v18, v76 ; 3E549912 v_mac_f32_e32 v40, v59, v71 ; 3E508F3B v_mac_f32_e32 v41, v59, v72 ; 3E52913B v_mac_f32_e32 v42, v59, v73 ; 3E54933B v_mul_f32_e32 v60, v30, v98 ; 1078C51E v_mul_f32_e32 v61, v30, v99 ; 107AC71E v_mul_f32_e32 v63, v30, v100 ; 107EC91E v_mac_f32_e32 v60, v27, v95 ; 3E78BF1B v_mac_f32_e32 v61, v27, v96 ; 3E7AC11B v_mac_f32_e32 v63, v27, v97 ; 3E7EC31B v_mac_f32_e32 v60, v65, v92 ; 3E78B941 v_mac_f32_e32 v61, v65, v93 ; 3E7ABB41 v_mac_f32_e32 v63, v65, v94 ; 3E7EBD41 v_mac_f32_e32 v60, v18, v89 ; 3E78B312 v_mac_f32_e32 v61, v18, v90 ; 3E7AB512 v_mac_f32_e32 v63, v18, v91 ; 3E7EB712 v_mac_f32_e32 v60, v59, v86 ; 3E78AD3B v_mac_f32_e32 v61, v59, v87 ; 3E7AAF3B v_mac_f32_e32 v63, v59, v88 ; 3E7EB13B v_cmp_eq_f32_e64 s[0:1], 0, v19 ; D0040000 00022680 v_cndmask_b32_e64 v64, 0, 1.0, s[0:1] ; D2000040 0001E480 v_mul_f32_e32 v66, v64, v113 ; 1084E340 v_mul_f32_e32 v67, v64, v114 ; 1086E540 v_mul_f32_e32 v71, v64, v115 ; 108EE740 v_cmp_eq_f32_e64 s[0:1], 1.0, v19 ; D0040000 000226F2 v_cndmask_b32_e64 v72, 0, 1.0, s[0:1] ; D2000048 0001E480 v_mac_f32_e32 v66, v72, v110 ; 3E84DD48 v_mac_f32_e32 v67, v72, v111 ; 3E86DF48 v_mac_f32_e32 v71, v72, v112 ; 3E8EE148 v_cmp_eq_f32_e64 s[0:1], 2.0, v19 ; D0040000 000226F4 v_cndmask_b32_e64 v73, 0, 1.0, s[0:1] ; D2000049 0001E480 v_mac_f32_e32 v66, v73, v107 ; 3E84D749 v_mac_f32_e32 v67, v73, v108 ; 3E86D949 v_mac_f32_e32 v71, v73, v109 ; 3E8EDB49 v_cmp_eq_f32_e64 s[0:1], v19, v62 ; D0040000 00027D13 v_cndmask_b32_e64 v19, 0, 1.0, s[0:1] ; D2000013 0001E480 v_mac_f32_e32 v66, v19, v104 ; 3E84D113 v_mac_f32_e32 v67, v19, v105 ; 3E86D313 v_mac_f32_e32 v71, v19, v106 ; 3E8ED513 v_readlane_b32 s0, v253, 21 ; 02012BFD v_readlane_b32 s1, v253, 22 ; 02032DFD s_nop 2 ; BF800002 v_cndmask_b32_e64 v74, 0, 1.0, s[0:1] ; D200004A 0001E480 v_mac_f32_e32 v66, v74, v101 ; 3E84CB4A v_mac_f32_e32 v67, v74, v102 ; 3E86CD4A v_mac_f32_e32 v71, v74, v103 ; 3E8ECF4A v_mul_f32_e32 v75, v64, v128 ; 10970140 v_mul_f32_e32 v76, v64, v129 ; 10990340 v_mul_f32_e32 v77, v64, v130 ; 109B0540 v_mac_f32_e32 v75, v72, v125 ; 3E96FB48 v_mac_f32_e32 v76, v72, v126 ; 3E98FD48 v_mac_f32_e32 v77, v72, v127 ; 3E9AFF48 v_mac_f32_e32 v75, v73, v122 ; 3E96F549 v_mac_f32_e32 v76, v73, v123 ; 3E98F749 v_mac_f32_e32 v77, v73, v124 ; 3E9AF949 v_mac_f32_e32 v75, v19, v119 ; 3E96EF13 v_mac_f32_e32 v76, v19, v120 ; 3E98F113 v_mac_f32_e32 v77, v19, v121 ; 3E9AF313 v_mac_f32_e32 v75, v74, v116 ; 3E96E94A v_mac_f32_e32 v76, v74, v117 ; 3E98EB4A v_mac_f32_e32 v77, v74, v118 ; 3E9AED4A v_mul_f32_e32 v78, v64, v143 ; 109D1F40 v_mul_f32_e32 v79, v64, v144 ; 109F2140 v_mul_f32_e32 v80, v64, v145 ; 10A12340 v_mac_f32_e32 v78, v72, v140 ; 3E9D1948 v_mac_f32_e32 v79, v72, v141 ; 3E9F1B48 v_mac_f32_e32 v80, v72, v142 ; 3EA11D48 v_mac_f32_e32 v78, v73, v137 ; 3E9D1349 v_mac_f32_e32 v79, v73, v138 ; 3E9F1549 v_mac_f32_e32 v80, v73, v139 ; 3EA11749 v_mac_f32_e32 v78, v19, v134 ; 3E9D0D13 v_mac_f32_e32 v79, v19, v135 ; 3E9F0F13 v_mac_f32_e32 v80, v19, v136 ; 3EA11113 v_mac_f32_e32 v78, v74, v131 ; 3E9D074A v_mac_f32_e32 v79, v74, v132 ; 3E9F094A v_mac_f32_e32 v80, v74, v133 ; 3EA10B4A v_cmp_eq_f32_e32 vcc, 0, v20 ; 7C042880 v_cndmask_b32_e64 v81, 0, 1.0, vcc ; D2000051 01A9E480 v_mul_f32_e32 v82, v81, v158 ; 10A53D51 v_mul_f32_e32 v83, v81, v159 ; 10A73F51 v_mul_f32_e32 v84, v81, v160 ; 10A94151 v_cmp_eq_f32_e32 vcc, 1.0, v20 ; 7C0428F2 v_cndmask_b32_e64 v85, 0, 1.0, vcc ; D2000055 01A9E480 v_mac_f32_e32 v82, v85, v155 ; 3EA53755 v_mac_f32_e32 v83, v85, v156 ; 3EA73955 v_mac_f32_e32 v84, v85, v157 ; 3EA93B55 v_cmp_eq_f32_e32 vcc, 2.0, v20 ; 7C0428F4 v_cndmask_b32_e64 v86, 0, 1.0, vcc ; D2000056 01A9E480 v_mac_f32_e32 v82, v86, v152 ; 3EA53156 v_mac_f32_e32 v83, v86, v153 ; 3EA73356 v_mac_f32_e32 v84, v86, v154 ; 3EA93556 v_cmp_eq_f32_e32 vcc, v20, v62 ; 7C047D14 v_cndmask_b32_e64 v87, 0, 1.0, vcc ; D2000057 01A9E480 v_mac_f32_e32 v82, v87, v149 ; 3EA52B57 v_mac_f32_e32 v83, v87, v150 ; 3EA72D57 v_mac_f32_e32 v84, v87, v151 ; 3EA92F57 v_cmp_eq_f32_e32 vcc, 4.0, v20 ; 7C0428F6 v_cndmask_b32_e64 v20, 0, 1.0, vcc ; D2000014 01A9E480 v_mac_f32_e32 v82, v20, v146 ; 3EA52514 v_mac_f32_e32 v83, v20, v147 ; 3EA72714 v_mac_f32_e32 v84, v20, v148 ; 3EA92914 v_mul_f32_e32 v88, v81, v173 ; 10B15B51 v_mul_f32_e32 v89, v81, v174 ; 10B35D51 v_mul_f32_e32 v90, v81, v175 ; 10B55F51 v_mac_f32_e32 v88, v85, v170 ; 3EB15555 v_mac_f32_e32 v89, v85, v171 ; 3EB35755 v_mac_f32_e32 v90, v85, v172 ; 3EB55955 v_mac_f32_e32 v88, v86, v167 ; 3EB14F56 v_mac_f32_e32 v89, v86, v168 ; 3EB35156 v_mac_f32_e32 v90, v86, v169 ; 3EB55356 v_mac_f32_e32 v88, v87, v164 ; 3EB14957 v_mac_f32_e32 v89, v87, v165 ; 3EB34B57 v_mac_f32_e32 v90, v87, v166 ; 3EB54D57 v_mac_f32_e32 v88, v20, v161 ; 3EB14314 v_mac_f32_e32 v89, v20, v162 ; 3EB34514 v_mac_f32_e32 v90, v20, v163 ; 3EB54714 v_mul_f32_e32 v91, v81, v188 ; 10B77951 v_mul_f32_e32 v92, v81, v189 ; 10B97B51 v_mul_f32_e32 v93, v81, v190 ; 10BB7D51 v_mac_f32_e32 v91, v85, v185 ; 3EB77355 v_mac_f32_e32 v92, v85, v186 ; 3EB97555 v_mac_f32_e32 v93, v85, v187 ; 3EBB7755 v_mac_f32_e32 v91, v86, v182 ; 3EB76D56 v_mac_f32_e32 v92, v86, v183 ; 3EB96F56 v_mac_f32_e32 v93, v86, v184 ; 3EBB7156 v_mac_f32_e32 v91, v87, v179 ; 3EB76757 v_mac_f32_e32 v92, v87, v180 ; 3EB96957 v_mac_f32_e32 v93, v87, v181 ; 3EBB6B57 v_mac_f32_e32 v91, v20, v176 ; 3EB76114 v_mac_f32_e32 v92, v20, v177 ; 3EB96314 v_mac_f32_e32 v93, v20, v178 ; 3EBB6514 v_mul_f32_e32 v43, v30, v43 ; 1056571E v_mul_f32_e32 v44, v30, v44 ; 1058591E v_mac_f32_e32 v43, v27, v197 ; 3E578B1B v_mac_f32_e32 v44, v27, v198 ; 3E598D1B v_mul_f32_e32 v31, v30, v31 ; 103E3F1E v_mul_f32_e32 v32, v30, v32 ; 1040411E v_mac_f32_e32 v31, v27, v203 ; 3E3F971B v_mac_f32_e32 v32, v27, v204 ; 3E41991B v_mul_f32_e32 v33, v30, v33 ; 1042431E v_mul_f32_e32 v30, v30, v34 ; 103C451E v_mac_f32_e32 v33, v27, v211 ; 3E43A71B v_mac_f32_e32 v30, v27, v212 ; 3E3DA91B v_mac_f32_e32 v43, v65, v195 ; 3E578741 v_mac_f32_e32 v44, v65, v196 ; 3E598941 v_mac_f32_e32 v31, v65, v201 ; 3E3F9341 v_mac_f32_e32 v32, v65, v202 ; 3E419541 v_mac_f32_e32 v33, v65, v209 ; 3E43A341 v_mac_f32_e32 v30, v65, v210 ; 3E3DA541 v_mac_f32_e32 v43, v18, v193 ; 3E578312 v_mac_f32_e32 v44, v18, v194 ; 3E598512 v_mac_f32_e32 v31, v18, v199 ; 3E3F8F12 v_mac_f32_e32 v32, v18, v200 ; 3E419112 v_mac_f32_e32 v33, v18, v207 ; 3E439F12 v_mac_f32_e32 v30, v18, v208 ; 3E3DA112 v_mac_f32_e32 v43, v59, v191 ; 3E577F3B v_mac_f32_e32 v44, v59, v192 ; 3E59813B v_mac_f32_e32 v31, v59, v45 ; 3E3E5B3B v_mac_f32_e32 v32, v59, v46 ; 3E405D3B v_mac_f32_e32 v33, v59, v205 ; 3E439B3B v_mac_f32_e32 v30, v59, v206 ; 3E3D9D3B v_mul_f32_e32 v18, v64, v51 ; 10246740 v_mul_f32_e32 v27, v64, v52 ; 10366940 v_mac_f32_e32 v18, v72, v217 ; 3E25B348 v_mac_f32_e32 v27, v72, v218 ; 3E37B548 v_mul_f32_e32 v28, v64, v28 ; 10383940 v_mul_f32_e32 v29, v64, v29 ; 103A3B40 v_mac_f32_e32 v28, v72, v223 ; 3E39BF48 v_mac_f32_e32 v29, v72, v224 ; 3E3BC148 s_waitcnt vmcnt(14) ; BF8C077E v_mul_f32_e32 v34, v64, v47 ; 10445F40 v_mul_f32_e32 v45, v64, v48 ; 105A6140 v_mac_f32_e32 v34, v72, v231 ; 3E45CF48 v_mac_f32_e32 v45, v72, v232 ; 3E5BD148 v_mac_f32_e32 v18, v73, v215 ; 3E25AF49 v_mac_f32_e32 v27, v73, v216 ; 3E37B149 v_mac_f32_e32 v28, v73, v221 ; 3E39BB49 v_mac_f32_e32 v29, v73, v222 ; 3E3BBD49 v_mac_f32_e32 v34, v73, v229 ; 3E45CB49 v_mac_f32_e32 v45, v73, v230 ; 3E5BCD49 v_mac_f32_e32 v18, v19, v213 ; 3E25AB13 v_mac_f32_e32 v27, v19, v214 ; 3E37AD13 v_mac_f32_e32 v28, v19, v219 ; 3E39B713 v_mac_f32_e32 v29, v19, v220 ; 3E3BB913 v_mac_f32_e32 v34, v19, v227 ; 3E45C713 v_mac_f32_e32 v45, v19, v228 ; 3E5BC913 v_mac_f32_e32 v18, v74, v35 ; 3E24474A v_mac_f32_e32 v27, v74, v36 ; 3E36494A v_mac_f32_e32 v28, v74, v53 ; 3E386B4A v_mac_f32_e32 v29, v74, v54 ; 3E3A6D4A v_mac_f32_e32 v34, v74, v225 ; 3E45C34A v_mac_f32_e32 v45, v74, v226 ; 3E5BC54A image_sample_l v[35:36], 10, 0, 0, 0, 0, 0, 0, 0, v[37:40], s[8:15], s[4:7] ; F0900A00 00222325 s_waitcnt vmcnt(10) ; BF8C077A v_mul_f32_e32 v19, v81, v55 ; 10266F51 v_mul_f32_e32 v37, v81, v56 ; 104A7151 v_mac_f32_e32 v19, v85, v237 ; 3E27DB55 v_mac_f32_e32 v37, v85, v238 ; 3E4BDD55 s_waitcnt vmcnt(5) ; BF8C0775 v_mul_f32_e32 v25, v81, v25 ; 10323351 v_mul_f32_e32 v26, v81, v26 ; 10343551 v_mac_f32_e32 v25, v85, v243 ; 3E33E755 v_mac_f32_e32 v26, v85, v244 ; 3E35E955 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v35, v81, v35 ; 10464751 v_mul_f32_e32 v36, v81, v36 ; 10484951 v_mac_f32_e32 v35, v85, v251 ; 3E47F755 v_mac_f32_e32 v36, v85, v252 ; 3E49F955 v_mac_f32_e32 v19, v86, v235 ; 3E27D756 v_mac_f32_e32 v37, v86, v236 ; 3E4BD956 v_mac_f32_e32 v25, v86, v241 ; 3E33E356 v_mac_f32_e32 v26, v86, v242 ; 3E35E556 v_mac_f32_e32 v35, v86, v249 ; 3E47F356 v_mac_f32_e32 v36, v86, v250 ; 3E49F556 v_mac_f32_e32 v19, v87, v233 ; 3E27D357 v_mac_f32_e32 v37, v87, v234 ; 3E4BD557 v_mac_f32_e32 v25, v87, v239 ; 3E33DF57 v_mac_f32_e32 v26, v87, v240 ; 3E35E157 v_mac_f32_e32 v35, v87, v247 ; 3E47EF57 v_mac_f32_e32 v36, v87, v248 ; 3E49F157 v_mac_f32_e32 v19, v20, v49 ; 3E266314 v_mac_f32_e32 v37, v20, v50 ; 3E4A6514 v_mac_f32_e32 v25, v20, v57 ; 3E327314 v_mac_f32_e32 v26, v20, v58 ; 3E347514 v_mac_f32_e32 v35, v20, v245 ; 3E47EB14 v_mac_f32_e32 v36, v20, v246 ; 3E49ED14 v_mul_f32_e32 v20, v24, v22 ; 10282D18 v_mul_f32_e32 v22, v24, v23 ; 102C2F18 v_mul_f32_e32 v21, v24, v21 ; 102A2B18 v_mul_f32_e32 v23, v21, v66 ; 102E8515 v_mac_f32_e32 v23, v20, v75 ; 3E2E9714 v_mul_f32_e32 v24, v21, v67 ; 10308715 v_mac_f32_e32 v24, v20, v76 ; 3E309914 v_mul_f32_e32 v38, v21, v71 ; 104C8F15 v_mac_f32_e32 v38, v20, v77 ; 3E4C9B14 v_mac_f32_e32 v23, v22, v78 ; 3E2E9D16 v_mac_f32_e32 v24, v22, v79 ; 3E309F16 v_mac_f32_e32 v38, v22, v80 ; 3E4CA116 v_mul_f32_e32 v39, v21, v68 ; 104E8915 v_mac_f32_e32 v39, v20, v40 ; 3E4E5114 v_mul_f32_e32 v40, v21, v69 ; 10508B15 v_mac_f32_e32 v40, v20, v41 ; 3E505314 v_mul_f32_e32 v41, v21, v70 ; 10528D15 v_mac_f32_e32 v41, v20, v42 ; 3E525514 v_mac_f32_e32 v39, v22, v60 ; 3E4E7916 v_mac_f32_e32 v40, v22, v61 ; 3E507B16 v_mac_f32_e32 v41, v22, v63 ; 3E527F16 v_mul_f32_e32 v39, v39, v8 ; 104E1127 v_mac_f32_e32 v39, v23, v9 ; 3E4E1317 v_mul_f32_e32 v23, v40, v8 ; 102E1128 v_mac_f32_e32 v23, v24, v9 ; 3E2E1318 v_mul_f32_e32 v24, v41, v8 ; 10301129 v_mac_f32_e32 v24, v38, v9 ; 3E301326 v_mad_f32 v38, 2.0, v43, -1.0 ; D2820026 03CE56F4 v_mad_f32 v32, 2.0, v32, -1.0 ; D2820020 03CE40F4 v_mul_f32_e32 v38, v20, v38 ; 104C4D14 v_mac_f32_e32 v38, v22, v32 ; 3E4C4116 v_mad_f32 v31, 2.0, v31, -1.0 ; D282001F 03CE3EF4 v_mad_f32 v30, 2.0, v30, -1.0 ; D282001E 03CE3CF4 v_mul_f32_e32 v32, 0, v20 ; 10402880 v_mad_f32 v31, v22, v31, v32 ; D282001F 04823F16 v_mac_f32_e32 v31, v21, v30 ; 3E3E3D15 v_mad_f32 v30, 2.0, v44, -1.0 ; D282001E 03CE58F4 v_mad_f32 v33, 2.0, v33, -1.0 ; D2820021 03CE42F4 v_mul_f32_e32 v30, v20, v30 ; 103C3D14 v_mac_f32_e32 v30, 0, v22 ; 3E3C2C80 v_mac_f32_e32 v30, v21, v33 ; 3E3C4315 v_mad_f32 v18, 2.0, v18, -1.0 ; D2820012 03CE24F4 v_mad_f32 v29, 2.0, v29, -1.0 ; D282001D 03CE3AF4 v_mul_f32_e32 v18, v20, v18 ; 10242514 v_mac_f32_e32 v18, v22, v29 ; 3E243B16 v_mad_f32 v28, 2.0, v28, -1.0 ; D282001C 03CE38F4 v_mad_f32 v29, 2.0, v45, -1.0 ; D282001D 03CE5AF4 v_mad_f32 v28, v22, v28, v32 ; D282001C 04823916 v_mac_f32_e32 v28, v21, v29 ; 3E383B15 v_mad_f32 v27, 2.0, v27, -1.0 ; D282001B 03CE36F4 v_mad_f32 v29, 2.0, v34, -1.0 ; D282001D 03CE44F4 v_mul_f32_e32 v27, v20, v27 ; 10363714 v_mac_f32_e32 v27, 0, v22 ; 3E362C80 v_mac_f32_e32 v27, v21, v29 ; 3E363B15 v_mac_f32_e32 v38, 0, v21 ; 3E4C2A80 v_mul_f32_e32 v29, v31, v8 ; 103A111F v_mul_f32_e32 v30, v30, v8 ; 103C111E v_mul_f32_e32 v8, v38, v8 ; 10101126 v_mac_f32_e32 v29, v28, v9 ; 3E3A131C v_mac_f32_e32 v30, v27, v9 ; 3E3C131B v_mac_f32_e32 v18, 0, v21 ; 3E242A80 v_mac_f32_e32 v8, v18, v9 ; 3E101312 v_mul_f32_e32 v9, v21, v82 ; 1012A515 v_mac_f32_e32 v9, v20, v88 ; 3E12B114 v_mul_f32_e32 v18, v21, v83 ; 1024A715 v_mac_f32_e32 v18, v20, v89 ; 3E24B314 v_mul_f32_e32 v27, v21, v84 ; 1036A915 v_mac_f32_e32 v27, v20, v90 ; 3E36B514 v_mac_f32_e32 v9, v22, v91 ; 3E12B716 v_mac_f32_e32 v18, v22, v92 ; 3E24B916 v_mac_f32_e32 v27, v22, v93 ; 3E36BB16 v_mac_f32_e32 v39, v9, v4 ; 3E4E0909 v_mac_f32_e32 v23, v18, v4 ; 3E2E0912 v_mad_f32 v9, 2.0, v37, -1.0 ; D2820009 03CE4AF4 v_mad_f32 v18, 2.0, v19, -1.0 ; D2820012 03CE26F4 v_mul_f32_e32 v9, v20, v9 ; 10121314 v_mul_f32_e32 v18, v20, v18 ; 10242514 v_mad_f32 v19, 2.0, v25, -1.0 ; D2820013 03CE32F4 v_mac_f32_e32 v32, v22, v19 ; 3E402716 v_mad_f32 v19, 2.0, v26, -1.0 ; D2820013 03CE34F4 v_mac_f32_e32 v18, v22, v19 ; 3E242716 v_mac_f32_e32 v9, 0, v22 ; 3E122C80 v_mad_f32 v19, 2.0, v36, -1.0 ; D2820013 03CE48F4 v_mac_f32_e32 v32, v21, v19 ; 3E402715 v_mad_f32 v19, 2.0, v35, -1.0 ; D2820013 03CE46F4 v_mac_f32_e32 v9, v21, v19 ; 3E122715 v_mac_f32_e32 v18, 0, v21 ; 3E242A80 v_mac_f32_e32 v29, v32, v4 ; 3E3A0920 v_mac_f32_e32 v30, v9, v4 ; 3E3C0909 v_mac_f32_e32 v8, v18, v4 ; 3E100912 v_readlane_b32 s0, v253, 10 ; 020115FD s_nop 2 ; BF800002 v_mul_f32_e32 v9, s0, v10 ; 10121400 v_readlane_b32 s0, v253, 11 ; 020117FD s_nop 2 ; BF800002 v_mul_f32_e32 v18, s0, v10 ; 10241400 v_readlane_b32 s0, v253, 12 ; 020119FD s_nop 2 ; BF800002 v_mul_f32_e32 v10, s0, v10 ; 10141400 v_readlane_b32 s0, v253, 13 ; 02011BFD s_nop 2 ; BF800002 v_mac_f32_e32 v9, s0, v11 ; 3E121600 v_readlane_b32 s0, v253, 14 ; 02011DFD s_nop 2 ; BF800002 v_mac_f32_e32 v18, s0, v11 ; 3E241600 v_readlane_b32 s0, v253, 15 ; 02011FFD s_nop 2 ; BF800002 v_mac_f32_e32 v10, s0, v11 ; 3E141600 v_readlane_b32 s0, v253, 16 ; 020121FD s_nop 2 ; BF800002 v_mac_f32_e32 v9, s0, v0 ; 3E120000 v_readlane_b32 s0, v253, 17 ; 020123FD s_nop 2 ; BF800002 v_mac_f32_e32 v18, s0, v0 ; 3E240000 v_readlane_b32 s0, v253, 18 ; 020125FD s_nop 2 ; BF800002 v_mac_f32_e32 v10, s0, v0 ; 3E140000 v_readlane_b32 s0, v253, 19 ; 020127FD s_nop 2 ; BF800002 v_add_f32_e32 v0, s0, v9 ; 06001200 v_readlane_b32 s0, v253, 20 ; 020129FD s_nop 2 ; BF800002 v_add_f32_e32 v9, s0, v18 ; 06122400 v_readlane_b32 s0, v253, 29 ; 02013BFD v_readlane_b32 s1, v253, 30 ; 02033DFD s_nop 2 ; BF800002 s_load_dwordx4 s[0:3], s[0:1], 0x0 ; C0800100 s_load_dwordx8 s[4:11], s[100:101], 0x0 ; C0C26500 v_mul_f32_e32 v18, v0, v0 ; 10240100 v_mac_f32_e32 v18, v9, v9 ; 3E241309 v_readlane_b32 s12, v253, 9 ; 021913FD s_nop 2 ; BF800002 v_add_f32_e32 v0, s12, v10 ; 0600140C v_mul_f32_e32 v9, v30, v30 ; 10123D1E v_mac_f32_e32 v9, v29, v29 ; 3E123B1D v_mac_f32_e32 v9, v8, v8 ; 3E121108 v_add_f32_e32 v9, 1.0, v9 ; 061212F2 v_rsq_clamp_f32_e32 v9, v9 ; 7E125909 v_mac_f32_e32 v18, v0, v0 ; 3E240100 v_mac_f32_e32 v24, v27, v4 ; 3E30091B v_mov_b32_e32 v19, v18 ; 7E260312 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v0, 8, 0, 0, 0, 0, 0, 0, 0, v[18:19], s[4:11], s[0:3] ; F0800800 00010012 v_mul_f32_e32 v4, v9, v29 ; 10083B09 v_mul_f32_e32 v10, v9, v30 ; 10143D09 v_mul_f32_e32 v8, v9, v8 ; 10101109 v_mad_f32 v4, -v4, v3, v5 ; D2820004 24160704 v_mad_f32 v5, -v10, v3, v6 ; D2820005 241A070A v_mad_f32 v3, -v8, v3, v7 ; D2820003 241E0708 v_mul_f32_e32 v6, v4, v4 ; 100C0904 v_mac_f32_e32 v6, v5, v5 ; 3E0C0B05 v_mac_f32_e32 v6, v3, v3 ; 3E0C0703 v_rsq_clamp_f32_e32 v6, v6 ; 7E0C5906 v_mul_f32_e32 v7, v16, v16 ; 100E2110 v_mac_f32_e32 v7, v17, v17 ; 3E0E2311 v_mac_f32_e32 v7, v15, v15 ; 3E0E1F0F v_rsq_clamp_f32_e32 v7, v7 ; 7E0E5907 v_mul_f32_e32 v4, v6, v4 ; 10080906 v_mul_f32_e32 v5, v6, v5 ; 100A0B06 v_mul_f32_e32 v3, v6, v3 ; 10060706 v_mul_f32_e32 v6, v7, v16 ; 100C2107 v_mul_f32_e32 v8, v7, v17 ; 10102307 v_mul_f32_e32 v7, v7, v15 ; 100E1F07 v_mul_f32_e32 v6, v6, v4 ; 100C0906 v_mac_f32_e32 v6, v8, v5 ; 3E0C0B08 v_mac_f32_e32 v6, v7, v3 ; 3E0C0707 v_max_f32_e32 v6, 0x38d1b717, v6 ; 200C0CFF 38D1B717 v_log_f32_e32 v6, v6 ; 7E0C4F06 v_mul_f32_e32 v4, v14, v4 ; 1008090E v_mac_f32_e32 v4, v13, v5 ; 3E080B0D v_mul_f32_e32 v5, 0x42000000, v2 ; 100A04FF 42000000 v_mul_legacy_f32_e32 v5, v5, v6 ; 0E0A0D05 v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_add_f32_e64 v5, 0, v5 clamp ; D2060805 00020A80 v_mac_f32_e32 v62, -2.0, v5 ; 3E7C0AF5 v_mul_f32_e32 v6, v62, v5 ; 100C0B3E v_mul_f32_e32 v5, v6, v5 ; 100A0B06 v_mul_f32_e32 v2, v2, v5 ; 10040B02 v_mac_f32_e32 v4, v12, v3 ; 3E08070C v_readlane_b32 s0, v253, 0 ; 020101FD s_nop 2 ; BF800002 v_mul_f32_e32 v3, s0, v39 ; 10064E00 v_readlane_b32 s1, v253, 1 ; 020303FD s_nop 2 ; BF800002 v_mul_f32_e32 v5, s1, v23 ; 100A2E01 v_readlane_b32 s2, v253, 8 ; 020511FD s_nop 2 ; BF800002 v_add_f32_e64 v6, s2, s2 ; D2060006 00000402 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v6, v0, v6 ; 100C0D00 v_readlane_b32 s2, v253, 5 ; 02050BFD s_nop 2 ; BF800002 v_max_f32_e32 v6, s2, v6 ; 200C0C02 v_readlane_b32 s2, v253, 2 ; 020505FD s_nop 2 ; BF800002 v_mul_f32_e32 v7, s2, v24 ; 100E3002 v_add_f32_e64 v4, 0, v4 clamp ; D2060804 00020880 v_readlane_b32 s3, v253, 6 ; 02070DFD s_nop 2 ; BF800002 v_add_f32_e64 v8, s3, s3 ; D2060008 00000603 v_readlane_b32 s3, v253, 7 ; 02070FFD s_nop 2 ; BF800002 v_add_f32_e64 v9, s3, s3 ; D2060009 00000603 v_mul_f32_e32 v8, v0, v8 ; 10101100 v_mul_f32_e32 v9, v0, v9 ; 10121300 v_readlane_b32 s3, v253, 3 ; 020707FD s_nop 2 ; BF800002 v_max_f32_e32 v8, s3, v8 ; 20101003 v_readlane_b32 s3, v253, 4 ; 020709FD s_nop 2 ; BF800002 v_max_f32_e32 v9, s3, v9 ; 20121203 v_min_f32_e32 v6, 1.0, v6 ; 1E0C0CF2 v_min_f32_e32 v8, 1.0, v8 ; 1E1010F2 v_min_f32_e32 v9, 1.0, v9 ; 1E1212F2 v_mul_f32_e32 v6, v39, v6 ; 100C0D27 v_mul_f32_e32 v8, v23, v8 ; 10101117 v_mul_f32_e32 v9, v24, v9 ; 10121318 v_mac_f32_e32 v6, v4, v3 ; 3E0C0704 v_mac_f32_e32 v8, v4, v5 ; 3E100B04 v_mac_f32_e32 v9, v4, v7 ; 3E120F04 v_mac_f32_e32 v6, s0, v2 ; 3E0C0400 v_mac_f32_e32 v8, s1, v2 ; 3E100401 v_mac_f32_e32 v9, s2, v2 ; 3E120402 v_mul_f32_e32 v2, v0, v6 ; 10040D00 v_mul_f32_e32 v3, v0, v8 ; 10061100 v_mul_f32_e32 v0, v0, v9 ; 10001300 v_mul_f32_e32 v2, 0.5, v2 ; 100404F0 v_mul_f32_e32 v3, 0.5, v3 ; 100606F0 v_mul_f32_e32 v0, 0.5, v0 ; 100000F0 v_mul_f32_e32 v2, v1, v2 ; 10040501 v_mul_f32_e32 v3, v1, v3 ; 10060701 v_mul_f32_e32 v0, v1, v0 ; 10000101 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 v_cvt_pkrtz_f16_f32_e64 v0, v0, 1.0 ; D25E0000 0001E500 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 104 VGPRS: 256 Code Size: 5048 bytes LDS: 0 blocks Scratch: 14336 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL OUT[5], GENERIC[4] DCL CONST[0..29] DCL TEMP[0..9], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, -1.0000, 0.0000} 0: MOV TEMP[0].w, IN[4].wwww 1: MUL TEMP[1].xyz, IN[0].xyzz, CONST[16].xyzz 2: MUL TEMP[2].x, IN[2].wwww, IN[2].wwww 3: MOV TEMP[3].y, IMM[0].xxxx 4: MOV TEMP[3].x, TEMP[2].xxxx 5: MOV TEMP[3].z, TEMP[2].xxxx 6: MUL TEMP[2], CONST[17], TEMP[1].xxxx 7: MAD TEMP[2], CONST[18], TEMP[1].yyyy, TEMP[2] 8: MAD TEMP[2].xyz, CONST[19], TEMP[1].zzzz, TEMP[2] 9: LRP TEMP[1].xyz, TEMP[3].xyzz, TEMP[2].xyzz, TEMP[1].xyzz 10: DP3 TEMP[2].x, CONST[21].xyzz, TEMP[1].xyzz 11: ADD TEMP[2].x, TEMP[2].xxxx, CONST[21].wwww 12: MUL TEMP[2].xyz, TEMP[2].xxxx, CONST[21].xyzz 13: ADD TEMP[2].xyz, TEMP[1].xyzz, -TEMP[2].xyzz 14: LRP TEMP[1].xyz, CONST[22].xxxx, TEMP[1].xyzz, TEMP[2].xyzz 15: MOV TEMP[2].xz, IMM[0].xxxx 16: MOV TEMP[2].y, CONST[23].xxxx 17: MUL TEMP[3].xyz, TEMP[2].zxyy, IN[1].yzxx 18: MAD TEMP[2].xyz, TEMP[2].yzxx, IN[1].zxyy, -TEMP[3].xyzz 19: MUL TEMP[3].xyz, IN[1].zxyy, TEMP[2].yzxx 20: MAD TEMP[3].xyz, IN[1].yzxx, TEMP[2].zxyy, -TEMP[3].xyzz 21: MUL TEMP[4].xyz, IN[1].zxyy, TEMP[3].yzxx 22: MAD TEMP[4].xyz, IN[1].yzxx, TEMP[3].zxyy, -TEMP[4].xyzz 23: DP3 TEMP[2].x, TEMP[4].xyzz, TEMP[2].xyzz 24: FSLT TEMP[2].x, TEMP[2].xxxx, IMM[0].xxxx 25: UIF TEMP[2].xxxx :0 26: MOV TEMP[0].w, IMM[0].zzzz 27: ELSE :0 28: MOV TEMP[0].w, IMM[0].yyyy 29: ENDIF 30: MUL TEMP[2], CONST[26], TEMP[1].xxxx 31: MAD TEMP[2], CONST[27], TEMP[1].yyyy, TEMP[2] 32: MAD TEMP[2], CONST[28], TEMP[1].zzzz, TEMP[2] 33: ADD TEMP[2], TEMP[2], CONST[29] 34: MAD TEMP[4].xy, IN[3].xyyy, CONST[24].xyyy, CONST[24].zwww 35: MAD TEMP[5].xy, IN[3].xyyy, CONST[25].xyyy, CONST[25].zwww 36: MOV TEMP[4].zw, TEMP[5].yyxy 37: MUL TEMP[5], CONST[7], TEMP[1].xxxx 38: MAD TEMP[5], CONST[8], TEMP[1].yyyy, TEMP[5] 39: MAD TEMP[1], CONST[9], TEMP[1].zzzz, TEMP[5] 40: ADD TEMP[1].xyz, TEMP[1], CONST[10] 41: MOV TEMP[5].x, CONST[11].xxxx 42: MOV TEMP[5].y, CONST[12].xxxx 43: MOV TEMP[5].z, CONST[13].xxxx 44: MOV TEMP[6].x, CONST[11].yyyy 45: MOV TEMP[6].y, CONST[12].yyyy 46: MOV TEMP[6].z, CONST[13].yyyy 47: MOV TEMP[7].x, CONST[11].zzzz 48: MOV TEMP[7].y, CONST[12].zzzz 49: MOV TEMP[7].z, CONST[13].zzzz 50: MUL TEMP[5].xyz, TEMP[5].xyzz, IN[1].xxxx 51: MAD TEMP[5].xyz, TEMP[6].xyzz, IN[1].yyyy, TEMP[5].xyzz 52: MAD TEMP[5].xyz, TEMP[7].xyzz, IN[1].zzzz, TEMP[5].xyzz 53: DP3 TEMP[6].x, TEMP[5].xyzz, TEMP[5].xyzz 54: RSQ TEMP[6].x, TEMP[6].xxxx 55: MUL TEMP[5].xyz, TEMP[5].xyzz, TEMP[6].xxxx 56: MUL TEMP[6].xyz, CONST[7].xyzz, TEMP[3].xxxx 57: MAD TEMP[6].xyz, CONST[8].xyzz, TEMP[3].yyyy, TEMP[6].xyzz 58: MAD TEMP[3].xyz, CONST[9].xyzz, TEMP[3].zzzz, TEMP[6].xyzz 59: DP3 TEMP[6].x, TEMP[3].xyzz, TEMP[3].xyzz 60: RSQ TEMP[6].x, TEMP[6].xxxx 61: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[6].xxxx 62: MUL TEMP[6].xyz, TEMP[5].zxyy, TEMP[3].yzxx 63: MAD TEMP[6].xyz, TEMP[5].yzxx, TEMP[3].zxyy, -TEMP[6].xyzz 64: MUL TEMP[0].xyz, TEMP[6].xyzz, TEMP[0].wwww 65: MOV TEMP[6].x, TEMP[3].xxxx 66: MOV TEMP[6].y, TEMP[0].xxxx 67: MOV TEMP[6].z, TEMP[5].xxxx 68: MOV TEMP[6].w, TEMP[1].xxxx 69: MOV TEMP[7].x, TEMP[3].yyyy 70: MOV TEMP[7].y, TEMP[0].yyyy 71: MOV TEMP[7].z, TEMP[5].yyyy 72: MOV TEMP[7].w, TEMP[1].yyyy 73: MOV TEMP[3].x, TEMP[3].zzzz 74: MOV TEMP[3].y, TEMP[0].zzzz 75: MOV TEMP[3].z, TEMP[5].zzzz 76: MOV TEMP[3].w, TEMP[1].zzzz 77: MOV TEMP[0].w, IMM[0].yyyy 78: MOV TEMP[0].xyz, TEMP[5].xyzx 79: DP4 TEMP[1].x, CONST[0], TEMP[0] 80: DP4 TEMP[8].x, CONST[1], TEMP[0] 81: MOV TEMP[1].y, TEMP[8].xxxx 82: DP4 TEMP[0].x, CONST[2], TEMP[0] 83: MOV TEMP[1].z, TEMP[0].xxxx 84: MUL TEMP[0], TEMP[5].xyzz, TEMP[5].yzzx 85: DP4 TEMP[8].x, CONST[3], TEMP[0] 86: DP4 TEMP[9].x, CONST[4], TEMP[0] 87: MOV TEMP[8].y, TEMP[9].xxxx 88: DP4 TEMP[0].x, CONST[5], TEMP[0] 89: MOV TEMP[8].z, TEMP[0].xxxx 90: MUL TEMP[0].x, TEMP[5].yyyy, TEMP[5].yyyy 91: MAD TEMP[0].x, TEMP[5].xxxx, TEMP[5].xxxx, -TEMP[0].xxxx 92: MAD TEMP[0].xyz, CONST[6].xyzz, TEMP[0].xxxx, TEMP[8].xyzz 93: ADD TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xyzz 94: MOV TEMP[0].yzw, TEMP[0].yxyz 95: MAD TEMP[0].x, TEMP[2].zzzz, CONST[15].zzzz, CONST[15].wwww 96: MOV OUT[1], TEMP[4] 97: MOV OUT[2], TEMP[6] 98: MOV OUT[3], TEMP[7] 99: MOV OUT[4], TEMP[3] 100: MOV OUT[0], TEMP[2] 101: MOV OUT[5], TEMP[0] 102: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 252) %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256) %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260) %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264) %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272) %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276) %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280) %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288) %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292) %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296) %72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304) %73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308) %74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312) %75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 336) %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 340) %77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 344) %78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 348) %79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 352) %80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 368) %81 = call float @llvm.SI.load.const(<16 x i8> %12, i32 384) %82 = call float @llvm.SI.load.const(<16 x i8> %12, i32 388) %83 = call float @llvm.SI.load.const(<16 x i8> %12, i32 392) %84 = call float @llvm.SI.load.const(<16 x i8> %12, i32 396) %85 = call float @llvm.SI.load.const(<16 x i8> %12, i32 400) %86 = call float @llvm.SI.load.const(<16 x i8> %12, i32 404) %87 = call float @llvm.SI.load.const(<16 x i8> %12, i32 408) %88 = call float @llvm.SI.load.const(<16 x i8> %12, i32 412) %89 = call float @llvm.SI.load.const(<16 x i8> %12, i32 416) %90 = call float @llvm.SI.load.const(<16 x i8> %12, i32 420) %91 = call float @llvm.SI.load.const(<16 x i8> %12, i32 424) %92 = call float @llvm.SI.load.const(<16 x i8> %12, i32 428) %93 = call float @llvm.SI.load.const(<16 x i8> %12, i32 432) %94 = call float @llvm.SI.load.const(<16 x i8> %12, i32 436) %95 = call float @llvm.SI.load.const(<16 x i8> %12, i32 440) %96 = call float @llvm.SI.load.const(<16 x i8> %12, i32 444) %97 = call float @llvm.SI.load.const(<16 x i8> %12, i32 448) %98 = call float @llvm.SI.load.const(<16 x i8> %12, i32 452) %99 = call float @llvm.SI.load.const(<16 x i8> %12, i32 456) %100 = call float @llvm.SI.load.const(<16 x i8> %12, i32 460) %101 = call float @llvm.SI.load.const(<16 x i8> %12, i32 464) %102 = call float @llvm.SI.load.const(<16 x i8> %12, i32 468) %103 = call float @llvm.SI.load.const(<16 x i8> %12, i32 472) %104 = call float @llvm.SI.load.const(<16 x i8> %12, i32 476) %105 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %106 = load <16 x i8>, <16 x i8> addrspace(2)* %105, align 16, !tbaa !0 %107 = add i32 %5, %7 %108 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %106, i32 0, i32 %107) %109 = extractelement <4 x float> %108, i32 0 %110 = extractelement <4 x float> %108, i32 1 %111 = extractelement <4 x float> %108, i32 2 %112 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %113 = load <16 x i8>, <16 x i8> addrspace(2)* %112, align 16, !tbaa !0 %114 = add i32 %5, %7 %115 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %113, i32 0, i32 %114) %116 = extractelement <4 x float> %115, i32 0 %117 = extractelement <4 x float> %115, i32 1 %118 = extractelement <4 x float> %115, i32 2 %119 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %120 = load <16 x i8>, <16 x i8> addrspace(2)* %119, align 16, !tbaa !0 %121 = add i32 %5, %7 %122 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %120, i32 0, i32 %121) %123 = extractelement <4 x float> %122, i32 3 %124 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %125 = load <16 x i8>, <16 x i8> addrspace(2)* %124, align 16, !tbaa !0 %126 = add i32 %5, %7 %127 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %125, i32 0, i32 %126) %128 = extractelement <4 x float> %127, i32 0 %129 = extractelement <4 x float> %127, i32 1 %130 = fmul float %109, %63 %131 = fmul float %110, %64 %132 = fmul float %111, %65 %133 = fmul float %123, %123 %134 = fmul float %66, %130 %135 = fmul float %67, %130 %136 = fmul float %68, %130 %137 = fmul float %69, %131 %138 = fadd float %137, %134 %139 = fmul float %70, %131 %140 = fadd float %139, %135 %141 = fmul float %71, %131 %142 = fadd float %141, %136 %143 = fmul float %72, %132 %144 = fadd float %143, %138 %145 = fmul float %73, %132 %146 = fadd float %145, %140 %147 = fmul float %74, %132 %148 = fadd float %147, %142 %149 = call float @llvm.AMDGPU.lrp(float %133, float %144, float %130) %150 = call float @llvm.AMDGPU.lrp(float 0.000000e+00, float %146, float %131) %151 = call float @llvm.AMDGPU.lrp(float %133, float %148, float %132) %152 = fmul float %75, %149 %153 = fmul float %76, %150 %154 = fadd float %153, %152 %155 = fmul float %77, %151 %156 = fadd float %154, %155 %157 = fadd float %156, %78 %158 = fmul float %157, %75 %159 = fmul float %157, %76 %160 = fmul float %157, %77 %161 = fsub float %149, %158 %162 = fsub float %150, %159 %163 = fsub float %151, %160 %164 = call float @llvm.AMDGPU.lrp(float %79, float %149, float %161) %165 = call float @llvm.AMDGPU.lrp(float %79, float %150, float %162) %166 = call float @llvm.AMDGPU.lrp(float %79, float %151, float %163) %167 = fmul float %117, 0.000000e+00 %168 = fmul float %118, 0.000000e+00 %169 = fmul float %80, %116 %170 = fmul float %80, %118 %171 = fsub float %170, %167 %172 = fmul float %116, 0.000000e+00 %173 = fsub float %172, %168 %174 = fmul float %117, 0.000000e+00 %175 = fsub float %174, %169 %176 = fmul float %118, %173 %177 = fmul float %116, %175 %178 = fmul float %117, %171 %179 = fmul float %117, %175 %180 = fsub float %179, %176 %181 = fmul float %118, %171 %182 = fsub float %181, %177 %183 = fmul float %116, %173 %184 = fsub float %183, %178 %185 = fmul float %118, %182 %186 = fmul float %116, %184 %187 = fmul float %117, %180 %188 = fmul float %117, %184 %189 = fsub float %188, %185 %190 = fmul float %118, %180 %191 = fsub float %190, %186 %192 = fmul float %116, %182 %193 = fsub float %192, %187 %194 = fmul float %189, %171 %195 = fmul float %191, %173 %196 = fadd float %195, %194 %197 = fmul float %193, %175 %198 = fadd float %196, %197 %199 = fcmp olt float %198, 0.000000e+00 %. = select i1 %199, float -1.000000e+00, float 1.000000e+00 %200 = fmul float %89, %164 %201 = fmul float %90, %164 %202 = fmul float %91, %164 %203 = fmul float %92, %164 %204 = fmul float %93, %165 %205 = fadd float %204, %200 %206 = fmul float %94, %165 %207 = fadd float %206, %201 %208 = fmul float %95, %165 %209 = fadd float %208, %202 %210 = fmul float %96, %165 %211 = fadd float %210, %203 %212 = fmul float %97, %166 %213 = fadd float %212, %205 %214 = fmul float %98, %166 %215 = fadd float %214, %207 %216 = fmul float %99, %166 %217 = fadd float %216, %209 %218 = fmul float %100, %166 %219 = fadd float %218, %211 %220 = fadd float %213, %101 %221 = fadd float %215, %102 %222 = fadd float %217, %103 %223 = fadd float %219, %104 %224 = fmul float %128, %81 %225 = fadd float %224, %83 %226 = fmul float %129, %82 %227 = fadd float %226, %84 %228 = fmul float %128, %85 %229 = fadd float %228, %87 %230 = fmul float %129, %86 %231 = fadd float %230, %88 %232 = fmul float %40, %164 %233 = fmul float %41, %164 %234 = fmul float %42, %164 %235 = fmul float %43, %165 %236 = fadd float %235, %232 %237 = fmul float %44, %165 %238 = fadd float %237, %233 %239 = fmul float %45, %165 %240 = fadd float %239, %234 %241 = fmul float %46, %166 %242 = fadd float %241, %236 %243 = fmul float %47, %166 %244 = fadd float %243, %238 %245 = fmul float %48, %166 %246 = fadd float %245, %240 %247 = fadd float %242, %49 %248 = fadd float %244, %50 %249 = fadd float %246, %51 %250 = fmul float %52, %116 %251 = fmul float %55, %116 %252 = fmul float %58, %116 %253 = fmul float %53, %117 %254 = fadd float %253, %250 %255 = fmul float %56, %117 %256 = fadd float %255, %251 %257 = fmul float %59, %117 %258 = fadd float %257, %252 %259 = fmul float %54, %118 %260 = fadd float %259, %254 %261 = fmul float %57, %118 %262 = fadd float %261, %256 %263 = fmul float %60, %118 %264 = fadd float %263, %258 %265 = fmul float %260, %260 %266 = fmul float %262, %262 %267 = fadd float %266, %265 %268 = fmul float %264, %264 %269 = fadd float %267, %268 %270 = call float @llvm.AMDGPU.rsq.clamped.f32(float %269) %271 = fmul float %260, %270 %272 = fmul float %262, %270 %273 = fmul float %264, %270 %274 = fmul float %40, %180 %275 = fmul float %41, %180 %276 = fmul float %42, %180 %277 = fmul float %43, %182 %278 = fadd float %277, %274 %279 = fmul float %44, %182 %280 = fadd float %279, %275 %281 = fmul float %45, %182 %282 = fadd float %281, %276 %283 = fmul float %46, %184 %284 = fadd float %283, %278 %285 = fmul float %47, %184 %286 = fadd float %285, %280 %287 = fmul float %48, %184 %288 = fadd float %287, %282 %289 = fmul float %284, %284 %290 = fmul float %286, %286 %291 = fadd float %290, %289 %292 = fmul float %288, %288 %293 = fadd float %291, %292 %294 = call float @llvm.AMDGPU.rsq.clamped.f32(float %293) %295 = fmul float %284, %294 %296 = fmul float %286, %294 %297 = fmul float %288, %294 %298 = fmul float %273, %296 %299 = fmul float %271, %297 %300 = fmul float %272, %295 %301 = fmul float %272, %297 %302 = fsub float %301, %298 %303 = fmul float %273, %295 %304 = fsub float %303, %299 %305 = fmul float %271, %296 %306 = fsub float %305, %300 %307 = fmul float %302, %. %308 = fmul float %304, %. %309 = fmul float %306, %. %310 = fmul float %13, %271 %311 = fmul float %14, %272 %312 = fadd float %310, %311 %313 = fmul float %15, %273 %314 = fadd float %312, %313 %315 = fadd float %314, %16 %316 = fmul float %17, %271 %317 = fmul float %18, %272 %318 = fadd float %316, %317 %319 = fmul float %19, %273 %320 = fadd float %318, %319 %321 = fadd float %320, %20 %322 = fmul float %21, %271 %323 = fmul float %22, %272 %324 = fadd float %322, %323 %325 = fmul float %23, %273 %326 = fadd float %324, %325 %327 = fadd float %326, %24 %328 = fmul float %271, %272 %329 = fmul float %272, %273 %330 = fmul float %273, %273 %331 = fmul float %273, %271 %332 = fmul float %25, %328 %333 = fmul float %26, %329 %334 = fadd float %332, %333 %335 = fmul float %27, %330 %336 = fadd float %334, %335 %337 = fmul float %28, %331 %338 = fadd float %336, %337 %339 = fmul float %29, %328 %340 = fmul float %30, %329 %341 = fadd float %339, %340 %342 = fmul float %31, %330 %343 = fadd float %341, %342 %344 = fmul float %32, %331 %345 = fadd float %343, %344 %346 = fmul float %33, %328 %347 = fmul float %34, %329 %348 = fadd float %346, %347 %349 = fmul float %35, %330 %350 = fadd float %348, %349 %351 = fmul float %36, %331 %352 = fadd float %350, %351 %353 = fmul float %272, %272 %354 = fmul float %271, %271 %355 = fsub float %354, %353 %356 = fmul float %37, %355 %357 = fadd float %356, %338 %358 = fmul float %38, %355 %359 = fadd float %358, %345 %360 = fmul float %39, %355 %361 = fadd float %360, %352 %362 = fadd float %357, %315 %363 = fadd float %359, %321 %364 = fadd float %361, %327 %365 = fmul float %222, %61 %366 = fadd float %365, %62 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %225, float %227, float %229, float %231) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %295, float %307, float %271, float %247) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %296, float %308, float %272, float %248) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %297, float %309, float %273, float %249) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %366, float %362, float %363, float %364) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %220, float %221, float %222, float %223) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 s_load_dwordx4 s[20:23], s[8:9], 0xc ; C08A090C v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[4:7], v0, s[12:15], 0 idxen ; E00C2000 80030400 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[7:10], v0, s[16:19], 0 idxen ; E00C2000 80040700 buffer_load_format_xyzw v[11:14], v0, s[20:23], 0 idxen ; E00C2000 80050B00 s_load_dwordx4 s[44:47], s[2:3], 0x0 ; C0960300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[44:47], 0x0 ; C2022D00 s_buffer_load_dword s6, s[44:47], 0x1 ; C2032D01 s_buffer_load_dword s3, s[44:47], 0x2 ; C201AD02 s_buffer_load_dword s0, s[44:47], 0x3 ; C2002D03 s_buffer_load_dword s7, s[44:47], 0x4 ; C203AD04 s_buffer_load_dword s9, s[44:47], 0x5 ; C204AD05 s_buffer_load_dword s5, s[44:47], 0x6 ; C202AD06 s_buffer_load_dword s1, s[44:47], 0x7 ; C200AD07 s_buffer_load_dword s10, s[44:47], 0x8 ; C2052D08 s_buffer_load_dword s11, s[44:47], 0x9 ; C205AD09 s_buffer_load_dword s8, s[44:47], 0xa ; C2042D0A s_buffer_load_dword s2, s[44:47], 0xb ; C2012D0B s_buffer_load_dword s19, s[44:47], 0xc ; C209AD0C s_buffer_load_dword s24, s[44:47], 0xd ; C20C2D0D s_buffer_load_dword s18, s[44:47], 0xe ; C2092D0E s_buffer_load_dword s12, s[44:47], 0xf ; C2062D0F s_buffer_load_dword s21, s[44:47], 0x10 ; C20AAD10 s_buffer_load_dword s27, s[44:47], 0x11 ; C20DAD11 s_buffer_load_dword s20, s[44:47], 0x12 ; C20A2D12 s_buffer_load_dword s13, s[44:47], 0x13 ; C206AD13 s_buffer_load_dword s25, s[44:47], 0x14 ; C20CAD14 s_buffer_load_dword s28, s[44:47], 0x15 ; C20E2D15 s_buffer_load_dword s22, s[44:47], 0x16 ; C20B2D16 s_buffer_load_dword s17, s[44:47], 0x17 ; C208AD17 s_buffer_load_dword s14, s[44:47], 0x18 ; C2072D18 s_buffer_load_dword s15, s[44:47], 0x19 ; C207AD19 s_buffer_load_dword s16, s[44:47], 0x1a ; C2082D1A s_buffer_load_dword s48, s[44:47], 0x1c ; C2182D1C s_buffer_load_dword s49, s[44:47], 0x1d ; C218AD1D s_buffer_load_dword s50, s[44:47], 0x1e ; C2192D1E s_buffer_load_dword s51, s[44:47], 0x20 ; C219AD20 s_buffer_load_dword s52, s[44:47], 0x21 ; C21A2D21 s_buffer_load_dword s53, s[44:47], 0x22 ; C21AAD22 s_buffer_load_dword s54, s[44:47], 0x24 ; C21B2D24 s_buffer_load_dword s55, s[44:47], 0x25 ; C21BAD25 s_buffer_load_dword s56, s[44:47], 0x26 ; C21C2D26 s_buffer_load_dword s57, s[44:47], 0x28 ; C21CAD28 s_buffer_load_dword s58, s[44:47], 0x29 ; C21D2D29 s_buffer_load_dword s59, s[44:47], 0x2a ; C21DAD2A s_buffer_load_dword s60, s[44:47], 0x2c ; C21E2D2C s_buffer_load_dword s61, s[44:47], 0x2d ; C21EAD2D s_buffer_load_dword s62, s[44:47], 0x2e ; C21F2D2E s_buffer_load_dword s63, s[44:47], 0x30 ; C21FAD30 s_buffer_load_dword s64, s[44:47], 0x31 ; C2202D31 s_buffer_load_dword s65, s[44:47], 0x32 ; C220AD32 s_buffer_load_dword s66, s[44:47], 0x34 ; C2212D34 s_buffer_load_dword s67, s[44:47], 0x35 ; C221AD35 s_buffer_load_dword s68, s[44:47], 0x36 ; C2222D36 s_buffer_load_dword s23, s[44:47], 0x3e ; C20BAD3E s_buffer_load_dword s26, s[44:47], 0x3f ; C20D2D3F s_buffer_load_dword s69, s[44:47], 0x40 ; C222AD40 s_buffer_load_dword s70, s[44:47], 0x41 ; C2232D41 s_buffer_load_dword s71, s[44:47], 0x42 ; C223AD42 s_buffer_load_dword s72, s[44:47], 0x44 ; C2242D44 s_buffer_load_dword s73, s[44:47], 0x45 ; C224AD45 s_buffer_load_dword s74, s[44:47], 0x46 ; C2252D46 s_buffer_load_dword s75, s[44:47], 0x48 ; C225AD48 s_buffer_load_dword s76, s[44:47], 0x49 ; C2262D49 s_buffer_load_dword s77, s[44:47], 0x4a ; C226AD4A s_buffer_load_dword s78, s[44:47], 0x4c ; C2272D4C s_buffer_load_dword s79, s[44:47], 0x4d ; C227AD4D s_buffer_load_dword s80, s[44:47], 0x4e ; C2282D4E s_buffer_load_dword s81, s[44:47], 0x54 ; C228AD54 s_buffer_load_dword s82, s[44:47], 0x55 ; C2292D55 s_buffer_load_dword s83, s[44:47], 0x56 ; C229AD56 s_buffer_load_dword s84, s[44:47], 0x57 ; C22A2D57 s_buffer_load_dword s85, s[44:47], 0x58 ; C22AAD58 s_buffer_load_dword s86, s[44:47], 0x5c ; C22B2D5C s_buffer_load_dword s87, s[44:47], 0x60 ; C22BAD60 s_buffer_load_dword s88, s[44:47], 0x61 ; C22C2D61 s_buffer_load_dword s89, s[44:47], 0x62 ; C22CAD62 s_buffer_load_dword s90, s[44:47], 0x63 ; C22D2D63 s_buffer_load_dword s91, s[44:47], 0x64 ; C22DAD64 s_buffer_load_dword s92, s[44:47], 0x65 ; C22E2D65 s_buffer_load_dword s93, s[44:47], 0x66 ; C22EAD66 s_buffer_load_dword s94, s[44:47], 0x67 ; C22F2D67 s_buffer_load_dword s95, s[44:47], 0x68 ; C22FAD68 s_buffer_load_dword s37, s[44:47], 0x69 ; C212AD69 s_buffer_load_dword s38, s[44:47], 0x6a ; C2132D6A s_buffer_load_dword s39, s[44:47], 0x6b ; C213AD6B s_buffer_load_dword s40, s[44:47], 0x6c ; C2142D6C s_buffer_load_dword s41, s[44:47], 0x6d ; C214AD6D s_buffer_load_dword s42, s[44:47], 0x6e ; C2152D6E s_buffer_load_dword s43, s[44:47], 0x6f ; C215AD6F s_buffer_load_dword s34, s[44:47], 0x70 ; C2112D70 s_buffer_load_dword s29, s[44:47], 0x71 ; C20EAD71 s_buffer_load_dword s35, s[44:47], 0x72 ; C211AD72 s_buffer_load_dword s30, s[44:47], 0x73 ; C20F2D73 s_buffer_load_dword s31, s[44:47], 0x74 ; C20FAD74 s_buffer_load_dword s32, s[44:47], 0x75 ; C2102D75 s_buffer_load_dword s36, s[44:47], 0x76 ; C2122D76 s_buffer_load_dword s33, s[44:47], 0x77 ; C210AD77 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s89 ; 7E000259 v_mov_b32_e32 v7, s90 ; 7E0E025A v_mov_b32_e32 v8, s93 ; 7E10025D v_mov_b32_e32 v9, s94 ; 7E12025E v_mul_f32_e32 v1, s69, v1 ; 10020245 v_mul_f32_e32 v2, s70, v2 ; 10040446 v_mul_f32_e32 v3, s71, v3 ; 10060647 v_mac_f32_e32 v0, s87, v11 ; 3E001657 v_mac_f32_e32 v7, s88, v12 ; 3E0E1858 v_mac_f32_e32 v8, s91, v11 ; 3E10165B v_mac_f32_e32 v9, s92, v12 ; 3E12185C v_mul_f32_e32 v11, s60, v4 ; 1016083C v_mul_f32_e32 v12, s63, v4 ; 1018083F v_mul_f32_e32 v13, s66, v4 ; 101A0842 v_mul_f32_e32 v14, s72, v1 ; 101C0248 v_mul_f32_e32 v15, s73, v1 ; 101E0249 v_mul_f32_e32 v16, s74, v1 ; 1020024A v_mov_b32_e32 v17, 0x80000000 ; 7E2202FF 80000000 v_mul_f32_e32 v18, s86, v4 ; 10240856 v_mul_f32_e32 v19, v17, v5 ; 10260B11 v_mac_f32_e32 v19, s86, v6 ; 3E260C56 v_mac_f32_e32 v11, s61, v5 ; 3E160A3D v_mac_f32_e32 v12, s64, v5 ; 3E180A40 v_mac_f32_e32 v13, s67, v5 ; 3E1A0A43 v_mac_f32_e32 v14, s75, v2 ; 3E1C044B v_mac_f32_e32 v15, s76, v2 ; 3E1E044C v_mac_f32_e32 v16, s77, v2 ; 3E20044D v_mac_f32_e32 v11, s62, v6 ; 3E160C3E v_mac_f32_e32 v12, s65, v6 ; 3E180C41 v_mac_f32_e32 v13, s68, v6 ; 3E1A0C44 v_mac_f32_e32 v14, s78, v3 ; 3E1C064E v_mac_f32_e32 v15, s79, v3 ; 3E1E064F v_mac_f32_e32 v16, s80, v3 ; 3E200650 v_mul_f32_e32 v20, v10, v10 ; 1028150A v_mad_f32 v10, -v10, v10, 1.0 ; D282000A 23CA150A v_mul_f32_e32 v1, v1, v10 ; 10021501 v_mul_f32_e32 v3, v3, v10 ; 10061503 v_mac_f32_e32 v1, v14, v20 ; 3E02290E v_mac_f32_e32 v3, v16, v20 ; 3E062910 v_mac_f32_e32 v2, 0, v15 ; 3E041E80 v_mul_f32_e32 v10, s81, v1 ; 10140251 v_mac_f32_e32 v10, s82, v2 ; 3E140452 v_mac_f32_e32 v10, s83, v3 ; 3E140653 v_add_f32_e32 v10, s84, v10 ; 06141454 v_mad_f32 v14, -v10, s81, v1 ; D282000E 2404A30A v_mad_f32 v15, -v10, s82, v2 ; D282000F 2408A50A v_mad_f32 v10, -v10, s83, v3 ; D282000A 240CA70A v_sub_f32_e64 v16, 1.0, s85 ; D2080010 0000AAF2 v_mul_f32_e32 v14, v14, v16 ; 101C210E v_mul_f32_e32 v15, v15, v16 ; 101E210F v_mul_f32_e32 v10, v10, v16 ; 1014210A v_mac_f32_e32 v14, s85, v1 ; 3E1C0255 v_mac_f32_e32 v15, s85, v2 ; 3E1E0455 v_mac_f32_e32 v10, s85, v3 ; 3E140655 v_mul_f32_e32 v1, s95, v14 ; 10021C5F v_mul_f32_e32 v2, v17, v6 ; 10040D11 v_mac_f32_e32 v2, 0, v4 ; 3E040880 v_mad_f32 v3, 0, v5, -v18 ; D2820003 844A0A80 v_mul_f32_e32 v16, v2, v6 ; 10200D02 v_mad_f32 v16, v5, v3, -v16 ; D2820010 84420705 v_mul_f32_e32 v17, s48, v16 ; 10222030 v_mul_f32_e32 v18, s48, v14 ; 10241C30 v_mul_f32_e32 v20, s49, v16 ; 10282031 v_mul_f32_e32 v21, s49, v14 ; 102A1C31 v_mul_f32_e32 v22, s50, v16 ; 102C2032 v_mul_f32_e32 v23, s50, v14 ; 102E1C32 v_mul_f32_e32 v24, v3, v4 ; 10300903 v_mad_f32 v24, v6, v19, -v24 ; D2820018 84622706 v_mac_f32_e32 v17, s51, v24 ; 3E223033 v_mac_f32_e32 v18, s51, v15 ; 3E241E33 v_mac_f32_e32 v20, s52, v24 ; 3E283034 v_mac_f32_e32 v21, s52, v15 ; 3E2A1E34 v_mac_f32_e32 v22, s53, v24 ; 3E2C3035 v_mac_f32_e32 v23, s53, v15 ; 3E2E1E35 v_mul_f32_e32 v25, v19, v5 ; 10320B13 v_mad_f32 v25, v4, v2, -v25 ; D2820019 84660504 v_mac_f32_e32 v17, s54, v25 ; 3E223236 v_mac_f32_e32 v18, s54, v10 ; 3E241436 v_mac_f32_e32 v20, s55, v25 ; 3E283237 v_mac_f32_e32 v21, s55, v10 ; 3E2A1437 v_mul_f32_e32 v26, v25, v4 ; 10340919 v_mad_f32 v26, v6, v16, -v26 ; D282001A 846A2106 v_mul_f32_e32 v16, v16, v5 ; 10200B10 v_mad_f32 v4, v4, v24, -v16 ; D2820004 84423104 v_mul_f32_e32 v6, v24, v6 ; 100C0D18 v_mad_f32 v5, v5, v25, -v6 ; D2820005 841A3305 v_mul_f32_e32 v5, v19, v5 ; 100A0B13 v_mac_f32_e32 v5, v2, v26 ; 3E0A3502 v_mac_f32_e32 v5, v3, v4 ; 3E0A0903 v_cmp_gt_f32_e32 vcc, 0, v5 ; 7C080A80 v_cndmask_b32_e64 v2, 1.0, -1.0, vcc ; D2000002 01A9E6F2 v_mac_f32_e32 v22, s56, v25 ; 3E2C3238 v_mac_f32_e32 v23, s56, v10 ; 3E2E1438 v_add_f32_e32 v3, s57, v18 ; 06062439 v_add_f32_e32 v4, s58, v21 ; 06082A3A v_add_f32_e32 v5, s59, v23 ; 060A2E3B exp 15, 32, 0, 0, 0, v0, v7, v8, v9 ; F800020F 09080700 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, v11, v11 ; 1000170B v_mac_f32_e32 v0, v12, v12 ; 3E00190C v_mac_f32_e32 v0, v13, v13 ; 3E001B0D v_rsq_clamp_f32_e32 v0, v0 ; 7E005900 v_mul_f32_e32 v6, v17, v17 ; 100C2311 v_mac_f32_e32 v6, v20, v20 ; 3E0C2914 v_mac_f32_e32 v6, v22, v22 ; 3E0C2D16 v_rsq_clamp_f32_e32 v6, v6 ; 7E0C5906 v_mul_f32_e32 v7, v0, v11 ; 100E1700 v_mul_f32_e32 v8, v0, v12 ; 10101900 v_mul_f32_e32 v0, v0, v13 ; 10001B00 v_mul_f32_e32 v9, v6, v17 ; 10122306 v_mul_f32_e32 v11, v6, v20 ; 10162906 v_mul_f32_e32 v6, v6, v22 ; 100C2D06 v_mul_f32_e32 v12, v11, v0 ; 1018010B v_mad_f32 v12, v8, v6, -v12 ; D282000C 84320D08 v_mul_f32_e32 v12, v2, v12 ; 10181902 exp 15, 33, 0, 0, 0, v9, v12, v7, v3 ; F800021F 03070C09 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v3, v6, v7 ; 10060F06 v_mad_f32 v3, v0, v9, -v3 ; D2820003 840E1300 v_mul_f32_e32 v3, v2, v3 ; 10060702 exp 15, 34, 0, 0, 0, v11, v3, v8, v4 ; F800022F 0408030B s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v3, v9, v8 ; 10061109 v_mad_f32 v3, v7, v11, -v3 ; D2820003 840E1707 v_mul_f32_e32 v2, v2, v3 ; 10040702 exp 15, 35, 0, 0, 0, v6, v2, v0, v5 ; F800023F 05000206 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v2, s37, v14 ; 10041C25 v_mul_f32_e32 v3, s38, v14 ; 10061C26 v_mul_f32_e32 v4, s39, v14 ; 10081C27 v_mac_f32_e32 v1, s40, v15 ; 3E021E28 v_mac_f32_e32 v2, s41, v15 ; 3E041E29 v_mac_f32_e32 v3, s42, v15 ; 3E061E2A v_mac_f32_e32 v4, s43, v15 ; 3E081E2B v_mul_f32_e32 v5, v0, v8 ; 100A1100 v_mul_f32_e32 v6, s24, v5 ; 100C0A18 v_mul_f32_e32 v9, s27, v5 ; 10120A1B v_mul_f32_e32 v5, s28, v5 ; 100A0A1C v_mul_f32_e32 v11, v8, v7 ; 10160F08 v_mac_f32_e32 v6, s19, v11 ; 3E0C1613 v_mac_f32_e32 v9, s21, v11 ; 3E121615 v_mac_f32_e32 v5, s25, v11 ; 3E0A1619 v_mul_f32_e32 v11, v0, v0 ; 10160100 v_mac_f32_e32 v6, s18, v11 ; 3E0C1612 v_mac_f32_e32 v9, s20, v11 ; 3E121614 v_mac_f32_e32 v5, s22, v11 ; 3E0A1616 v_mul_f32_e32 v11, s6, v8 ; 10161006 v_mac_f32_e32 v11, s4, v7 ; 3E160E04 v_mul_f32_e32 v12, s9, v8 ; 10181009 v_mac_f32_e32 v12, s7, v7 ; 3E180E07 v_mul_f32_e32 v13, s11, v8 ; 101A100B v_mac_f32_e32 v13, s10, v7 ; 3E1A0E0A v_mac_f32_e32 v11, s3, v0 ; 3E160003 v_mac_f32_e32 v12, s5, v0 ; 3E180005 v_mac_f32_e32 v13, s8, v0 ; 3E1A0008 v_mul_f32_e32 v0, v7, v0 ; 10000107 v_mac_f32_e32 v6, s12, v0 ; 3E0C000C v_mac_f32_e32 v9, s13, v0 ; 3E12000D v_mac_f32_e32 v5, s17, v0 ; 3E0A0011 v_mul_f32_e32 v0, v8, v8 ; 10001108 v_mad_f32 v0, v7, v7, -v0 ; D2820000 84020F07 v_mac_f32_e32 v6, s14, v0 ; 3E0C000E v_mac_f32_e32 v9, s15, v0 ; 3E12000F v_mac_f32_e32 v5, s16, v0 ; 3E0A0010 v_mac_f32_e32 v1, s34, v10 ; 3E021422 v_mac_f32_e32 v3, s35, v10 ; 3E061423 v_add_f32_e32 v0, s0, v11 ; 06001600 v_add_f32_e32 v7, s1, v12 ; 060E1801 v_add_f32_e32 v8, s2, v13 ; 06101A02 v_add_f32_e32 v3, s36, v3 ; 06060624 v_mov_b32_e32 v11, s26 ; 7E16021A v_mac_f32_e32 v11, s23, v3 ; 3E160617 v_add_f32_e32 v0, v0, v6 ; 06000D00 v_add_f32_e32 v6, v7, v9 ; 060C1307 v_add_f32_e32 v5, v8, v5 ; 060A0B08 exp 15, 36, 0, 0, 0, v11, v0, v6, v5 ; F800024F 0506000B v_mac_f32_e32 v2, s29, v10 ; 3E04141D v_mac_f32_e32 v4, s30, v10 ; 3E08141E s_waitcnt expcnt(0) ; BF8C070F v_add_f32_e32 v0, s31, v1 ; 0600021F v_add_f32_e32 v1, s32, v2 ; 06020420 v_add_f32_e32 v2, s33, v4 ; 06040821 exp 15, 12, 0, 1, 0, v0, v1, v3, v2 ; F80008CF 02030100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 104 VGPRS: 28 Code Size: 1276 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL IN[4], GENERIC[4], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL CONST[0..5] DCL CONST[8..9] DCL TEMP[0..6], LOCAL IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 0.0000} 0: MOV TEMP[0].x, IN[1].wwww 1: MOV TEMP[0].y, IN[2].wwww 2: MOV TEMP[0].z, IN[3].wwww 3: MOV TEMP[1].xy, IN[0].xyyy 4: TEX TEMP[1], TEMP[1], SAMP[0], 2D 5: MUL TEMP[2].xyz, TEMP[1].xyzz, CONST[8].xyzz 6: MOV TEMP[3].xy, IN[0].zwww 7: TEX TEMP[3].yw, TEMP[3], SAMP[1], 2D 8: MAD TEMP[3].xy, TEMP[3].wyyy, IMM[0].xxxx, IMM[0].yyyy 9: DP2 TEMP[4].x, TEMP[3].xyyy, TEMP[3].xyyy 10: MOV_SAT TEMP[4].x, TEMP[4].xxxx 11: ADD TEMP[4].x, IMM[0].zzzz, -TEMP[4].xxxx 12: SQRT TEMP[4].x, TEMP[4].xxxx 13: MOV TEMP[3].z, TEMP[4].xxxx 14: FSLT TEMP[4].x, TEMP[1].wwww, CONST[9].xxxx 15: AND TEMP[4].x, TEMP[4].xxxx, IMM[0].zzzz 16: KILL_IF -TEMP[4].xxxx 17: MOV TEMP[4].w, IMM[0].wwww 18: DP3 TEMP[5].x, IN[1].xyzz, TEMP[3].xyzz 19: DP3 TEMP[6].x, IN[2].xyzz, TEMP[3].xyzz 20: MOV TEMP[5].y, TEMP[6].xxxx 21: DP3 TEMP[3].x, IN[3].xyzz, TEMP[3].xyzz 22: MOV TEMP[5].z, TEMP[3].xxxx 23: MUL TEMP[4].xyz, TEMP[2].xyzz, IN[4].yzww 24: ADD TEMP[0].xyz, CONST[0].xyzz, -TEMP[0].xyzz 25: DP3 TEMP[3].x, TEMP[0].xyzz, TEMP[0].xyzz 26: RSQ TEMP[3].x, TEMP[3].xxxx 27: MAD TEMP[0].xyz, TEMP[0].xyzz, TEMP[3].xxxx, CONST[1].xyzz 28: DP3 TEMP[3].x, TEMP[0].xyzz, TEMP[0].xyzz 29: RSQ TEMP[3].x, TEMP[3].xxxx 30: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[3].xxxx 31: DP3 TEMP[0].x, TEMP[5].xyzz, TEMP[0].xyzz 32: MAX TEMP[0].x, IMM[0].wwww, TEMP[0].xxxx 33: POW TEMP[0].x, TEMP[0].xxxx, CONST[4].xxxx 34: MUL TEMP[0].xyz, TEMP[0].xxxx, CONST[5] 35: MUL TEMP[2].xyz, TEMP[2].xyzz, CONST[3].xyzz 36: DP3 TEMP[3].x, TEMP[5].xyzz, CONST[1].xyzz 37: MAX TEMP[3].x, IMM[0].wwww, TEMP[3].xxxx 38: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx 39: MAD TEMP[0].xyz, CONST[3].xyzz, TEMP[0].xyzz, TEMP[2].xyzz 40: MUL TEMP[0].xyz, TEMP[0].xyzz, IMM[0].xxxx 41: MOV TEMP[0].w, TEMP[1].wwww 42: ADD TEMP[0], TEMP[4], TEMP[0] 43: MOV TEMP[4].w, TEMP[0].wwww 44: MOV_SAT TEMP[1].x, IN[4].xxxx 45: LRP TEMP[4].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[2].xyzz 46: MOV OUT[0], TEMP[4] 47: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %44 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %45 = load <32 x i8>, <32 x i8> addrspace(2)* %44, align 32, !tbaa !0 %46 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %47 = load <16 x i8>, <16 x i8> addrspace(2)* %46, align 16, !tbaa !0 %48 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %49 = bitcast <8 x i32> addrspace(2)* %48 to <32 x i8> addrspace(2)* %50 = load <32 x i8>, <32 x i8> addrspace(2)* %49, align 32, !tbaa !0 %51 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %52 = bitcast <4 x i32> addrspace(2)* %51 to <16 x i8> addrspace(2)* %53 = load <16 x i8>, <16 x i8> addrspace(2)* %52, align 16, !tbaa !0 %54 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %55 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %56 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %57 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %58 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %59 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %60 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %61 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %62 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %63 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %64 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %65 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %66 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %67 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %68 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %69 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7) %70 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %71 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %72 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %73 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7) %74 = bitcast float %54 to i32 %75 = bitcast float %55 to i32 %76 = insertelement <2 x i32> undef, i32 %74, i32 0 %77 = insertelement <2 x i32> %76, i32 %75, i32 1 %78 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %77, <32 x i8> %45, <16 x i8> %47, i32 2) %79 = extractelement <4 x float> %78, i32 0 %80 = extractelement <4 x float> %78, i32 1 %81 = extractelement <4 x float> %78, i32 2 %82 = extractelement <4 x float> %78, i32 3 %83 = fmul float %79, %40 %84 = fmul float %80, %41 %85 = fmul float %81, %42 %86 = bitcast float %56 to i32 %87 = bitcast float %57 to i32 %88 = insertelement <2 x i32> undef, i32 %86, i32 0 %89 = insertelement <2 x i32> %88, i32 %87, i32 1 %90 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %89, <32 x i8> %50, <16 x i8> %53, i32 2) %91 = extractelement <4 x float> %90, i32 1 %92 = extractelement <4 x float> %90, i32 3 %93 = fmul float %92, 2.000000e+00 %94 = fadd float %93, -1.000000e+00 %95 = fmul float %91, 2.000000e+00 %96 = fadd float %95, -1.000000e+00 %97 = fmul float %94, %94 %98 = fmul float %96, %96 %99 = fadd float %97, %98 %100 = call float @llvm.AMDIL.clamp.(float %99, float 0.000000e+00, float 1.000000e+00) %101 = fsub float 1.000000e+00, %100 %102 = call float @llvm.sqrt.f32(float %101) %103 = fcmp olt float %82, %43 %104 = select i1 %103, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %104) %105 = fmul float %58, %94 %106 = fmul float %59, %96 %107 = fadd float %106, %105 %108 = fmul float %60, %102 %109 = fadd float %107, %108 %110 = fmul float %62, %94 %111 = fmul float %63, %96 %112 = fadd float %111, %110 %113 = fmul float %64, %102 %114 = fadd float %112, %113 %115 = fmul float %66, %94 %116 = fmul float %67, %96 %117 = fadd float %116, %115 %118 = fmul float %68, %102 %119 = fadd float %117, %118 %120 = fmul float %83, %71 %121 = fmul float %84, %72 %122 = fmul float %85, %73 %123 = fsub float %24, %61 %124 = fsub float %25, %65 %125 = fsub float %26, %69 %126 = fmul float %123, %123 %127 = fmul float %124, %124 %128 = fadd float %127, %126 %129 = fmul float %125, %125 %130 = fadd float %128, %129 %131 = call float @llvm.AMDGPU.rsq.clamped.f32(float %130) %132 = fmul float %123, %131 %133 = fadd float %132, %27 %134 = fmul float %124, %131 %135 = fadd float %134, %28 %136 = fmul float %125, %131 %137 = fadd float %136, %29 %138 = fmul float %133, %133 %139 = fmul float %135, %135 %140 = fadd float %139, %138 %141 = fmul float %137, %137 %142 = fadd float %140, %141 %143 = call float @llvm.AMDGPU.rsq.clamped.f32(float %142) %144 = fmul float %133, %143 %145 = fmul float %135, %143 %146 = fmul float %137, %143 %147 = fmul float %109, %144 %148 = fmul float %114, %145 %149 = fadd float %148, %147 %150 = fmul float %119, %146 %151 = fadd float %149, %150 %152 = call float @llvm.maxnum.f32(float %151, float 0.000000e+00) %153 = call float @llvm.pow.f32(float %152, float %36) %154 = fmul float %153, %37 %155 = fmul float %153, %38 %156 = fmul float %153, %39 %157 = fmul float %83, %33 %158 = fmul float %84, %34 %159 = fmul float %85, %35 %160 = fmul float %109, %27 %161 = fmul float %114, %28 %162 = fadd float %161, %160 %163 = fmul float %119, %29 %164 = fadd float %162, %163 %165 = call float @llvm.maxnum.f32(float %164, float 0.000000e+00) %166 = fmul float %157, %165 %167 = fmul float %158, %165 %168 = fmul float %159, %165 %169 = fmul float %33, %154 %170 = fadd float %169, %166 %171 = fmul float %34, %155 %172 = fadd float %171, %167 %173 = fmul float %35, %156 %174 = fadd float %173, %168 %175 = fmul float %170, 2.000000e+00 %176 = fmul float %172, 2.000000e+00 %177 = fmul float %174, 2.000000e+00 %178 = fadd float %120, %175 %179 = fadd float %121, %176 %180 = fadd float %122, %177 %181 = fadd float %82, 0.000000e+00 %182 = call float @llvm.AMDIL.clamp.(float %70, float 0.000000e+00, float 1.000000e+00) %183 = call float @llvm.AMDGPU.lrp(float %182, float %178, float %30) %184 = call float @llvm.AMDGPU.lrp(float %182, float %179, float %31) %185 = call float @llvm.AMDGPU.lrp(float %182, float %180, float %32) %186 = call i32 @llvm.SI.packf16(float %183, float %184) %187 = bitcast i32 %186 to float %188 = call i32 @llvm.SI.packf16(float %185, float %181) %189 = bitcast i32 %188 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %187, float %189, float %187, float %189) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600 v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601 v_interp_p1_f32 v9, v0, 3, 1, [m0] ; C8240700 v_interp_p2_f32 v9, [v9], v1, 3, 1, [m0] ; C8250701 v_interp_p1_f32 v10, v0, 0, 2, [m0] ; C8280800 v_interp_p2_f32 v10, [v10], v1, 0, 2, [m0] ; C8290801 v_interp_p1_f32 v11, v0, 1, 2, [m0] ; C82C0900 v_interp_p2_f32 v11, [v11], v1, 1, 2, [m0] ; C82D0901 v_interp_p1_f32 v12, v0, 2, 2, [m0] ; C8300A00 v_interp_p2_f32 v12, [v12], v1, 2, 2, [m0] ; C8310A01 v_interp_p1_f32 v13, v0, 3, 2, [m0] ; C8340B00 v_interp_p2_f32 v13, [v13], v1, 3, 2, [m0] ; C8350B01 v_interp_p1_f32 v14, v0, 0, 3, [m0] ; C8380C00 v_interp_p2_f32 v14, [v14], v1, 0, 3, [m0] ; C8390C01 v_interp_p1_f32 v15, v0, 1, 3, [m0] ; C83C0D00 v_interp_p2_f32 v15, [v15], v1, 1, 3, [m0] ; C83D0D01 v_interp_p1_f32 v16, v0, 2, 3, [m0] ; C8400E00 v_interp_p2_f32 v16, [v16], v1, 2, 3, [m0] ; C8410E01 v_interp_p1_f32 v17, v0, 3, 3, [m0] ; C8440F00 v_interp_p2_f32 v17, [v17], v1, 3, 3, [m0] ; C8450F01 v_interp_p1_f32 v18, v0, 0, 4, [m0] ; C8481000 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504 s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700 s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_interp_p2_f32 v18, [v18], v1, 0, 4, [m0] ; C8491001 v_interp_p1_f32 v19, v0, 1, 4, [m0] ; C84C1100 v_interp_p2_f32 v19, [v19], v1, 1, 4, [m0] ; C84D1101 v_interp_p1_f32 v20, v0, 2, 4, [m0] ; C8501200 v_interp_p2_f32 v20, [v20], v1, 2, 4, [m0] ; C8511201 v_interp_p1_f32 v0, v0, 3, 4, [m0] ; C8001300 v_interp_p2_f32 v0, [v0], v1, 3, 4, [m0] ; C8011301 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[21:24], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[16:23], s[8:11] ; F0800F00 00441502 image_sample v[1:2], 10, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[24:31], s[12:15] ; F0800A00 00660104 s_buffer_load_dword s4, s[0:3], 0x24 ; C2020124 s_buffer_load_dword s5, s[0:3], 0x20 ; C2028120 s_buffer_load_dword s6, s[0:3], 0x21 ; C2030121 s_buffer_load_dword s7, s[0:3], 0x22 ; C2038122 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v2, 2.0, v2, -1.0 ; D2820002 03CE04F4 v_mad_f32 v1, 2.0, v1, -1.0 ; D2820001 03CE02F4 v_mul_f32_e32 v3, v1, v1 ; 10060301 v_mac_f32_e32 v3, v2, v2 ; 3E060502 v_add_f32_e64 v3, 0, v3 clamp ; D2060803 00020680 v_sub_f32_e32 v3, 1.0, v3 ; 080606F2 v_sqrt_f32_e32 v3, v3 ; 7E066703 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_gt_f32_e32 vcc, s4, v24 ; 7C083004 v_mul_f32_e32 v4, s5, v21 ; 10082A05 v_mul_f32_e32 v5, s6, v22 ; 100A2C06 v_mul_f32_e32 v21, s7, v23 ; 102A2E07 v_cndmask_b32_e64 v22, 0, -1.0, vcc ; D2000016 01A9E680 v_cmpx_le_f32_e32 vcc, 0, v22 ; 7C262C80 v_mul_f32_e32 v6, v2, v6 ; 100C0D02 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 v_mac_f32_e32 v6, v1, v7 ; 3E0C0F01 v_mul_f32_e32 v7, v2, v10 ; 100E1502 v_mac_f32_e32 v7, v1, v11 ; 3E0E1701 v_mul_f32_e32 v2, v2, v14 ; 10041D02 v_mac_f32_e32 v2, v1, v15 ; 3E041F01 s_buffer_load_dword s7, s[0:3], 0x4 ; C2038104 s_buffer_load_dword s8, s[0:3], 0x5 ; C2040105 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v1, s4, v9 ; 08021204 v_sub_f32_e32 v9, s5, v13 ; 08121A05 v_sub_f32_e32 v10, s6, v17 ; 08142206 v_mul_f32_e32 v11, v1, v1 ; 10160301 v_mac_f32_e32 v11, v9, v9 ; 3E161309 v_mac_f32_e32 v11, v10, v10 ; 3E16150A v_rsq_clamp_f32_e32 v11, v11 ; 7E16590B s_buffer_load_dword s4, s[0:3], 0x16 ; C2020116 s_buffer_load_dword s5, s[0:3], 0x6 ; C2028106 s_buffer_load_dword s6, s[0:3], 0x8 ; C2030108 s_buffer_load_dword s9, s[0:3], 0x9 ; C2048109 s_buffer_load_dword s10, s[0:3], 0xa ; C205010A s_buffer_load_dword s11, s[0:3], 0xc ; C205810C s_buffer_load_dword s12, s[0:3], 0xd ; C206010D s_buffer_load_dword s13, s[0:3], 0xe ; C206810E s_buffer_load_dword s14, s[0:3], 0x10 ; C2070110 s_buffer_load_dword s15, s[0:3], 0x14 ; C2078114 s_buffer_load_dword s0, s[0:3], 0x15 ; C2000115 v_mad_f32 v1, v1, v11, s7 ; D2820001 001E1701 v_mad_f32 v9, v9, v11, s8 ; D2820009 00221709 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v10, v10, v11, s5 ; D282000A 0016170A v_mul_f32_e32 v11, v1, v1 ; 10160301 v_mac_f32_e32 v11, v9, v9 ; 3E161309 v_mac_f32_e32 v11, v10, v10 ; 3E16150A v_rsq_clamp_f32_e32 v11, v11 ; 7E16590B v_mac_f32_e32 v6, v3, v8 ; 3E0C1103 v_mac_f32_e32 v7, v3, v12 ; 3E0E1903 v_mac_f32_e32 v2, v3, v16 ; 3E042103 v_mul_f32_e32 v1, v11, v1 ; 1002030B v_mul_f32_e32 v3, v11, v9 ; 1006130B v_mul_f32_e32 v8, v11, v10 ; 1010150B v_mul_f32_e32 v1, v1, v6 ; 10020D01 v_mac_f32_e32 v1, v3, v7 ; 3E020F03 v_mac_f32_e32 v1, v8, v2 ; 3E020508 v_max_f32_e32 v1, 0, v1 ; 20020280 v_log_f32_e32 v1, v1 ; 7E024F01 v_mul_f32_e32 v3, s7, v6 ; 10060C07 v_mac_f32_e32 v3, s8, v7 ; 3E060E08 v_mac_f32_e32 v3, s5, v2 ; 3E060405 v_mul_legacy_f32_e32 v1, s14, v1 ; 0E02020E v_exp_f32_e32 v1, v1 ; 7E024B01 v_mul_f32_e32 v2, s15, v1 ; 1004020F v_max_f32_e32 v3, 0, v3 ; 20060680 v_mul_f32_e32 v6, s11, v4 ; 100C080B v_mul_f32_e32 v7, v3, v6 ; 100E0D03 v_mac_f32_e32 v7, s11, v2 ; 3E0E040B v_mac_f32_e32 v7, v3, v6 ; 3E0E0D03 v_mac_f32_e32 v7, s11, v2 ; 3E0E040B v_mul_f32_e32 v2, s0, v1 ; 10040200 v_mul_f32_e32 v6, s12, v5 ; 100C0A0C v_mul_f32_e32 v8, v3, v6 ; 10100D03 v_mac_f32_e32 v8, s12, v2 ; 3E10040C v_mac_f32_e32 v8, v3, v6 ; 3E100D03 v_mac_f32_e32 v8, s12, v2 ; 3E10040C v_mul_f32_e32 v1, s4, v1 ; 10020204 v_mul_f32_e32 v2, s13, v21 ; 10042A0D v_mul_f32_e32 v6, v3, v2 ; 100C0503 v_mac_f32_e32 v6, s13, v1 ; 3E0C020D v_mac_f32_e32 v6, v3, v2 ; 3E0C0503 v_mac_f32_e32 v6, s13, v1 ; 3E0C020D v_add_f32_e64 v1, 0, v18 clamp ; D2060801 00022480 v_sub_f32_e32 v2, 1.0, v1 ; 080402F2 v_mul_f32_e32 v3, s6, v2 ; 10060406 v_mul_f32_e32 v9, s9, v2 ; 10120409 v_mul_f32_e32 v2, s10, v2 ; 1004040A v_add_f32_e32 v10, 0, v24 ; 06143080 v_mac_f32_e32 v7, v19, v4 ; 3E0E0913 v_mac_f32_e32 v8, v20, v5 ; 3E100B14 v_mac_f32_e32 v6, v0, v21 ; 3E0C2B00 v_mac_f32_e32 v3, v7, v1 ; 3E060307 v_mac_f32_e32 v9, v8, v1 ; 3E120308 v_mac_f32_e32 v2, v6, v1 ; 3E040306 v_cvt_pkrtz_f16_f32_e32 v0, v3, v9 ; 5E001303 v_cvt_pkrtz_f16_f32_e32 v1, v2, v10 ; 5E021502 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 28 Code Size: 676 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL CONST[0..3] DCL TEMP[0..1], LOCAL 0: MUL TEMP[0], CONST[0], IN[0].xxxx 1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0] 3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0] 4: MOV TEMP[1].xy, IN[1].xyxx 5: MOV OUT[1], TEMP[1] 6: MOV OUT[0], TEMP[0] 7: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = fmul float %13, %33 %44 = fmul float %14, %33 %45 = fmul float %15, %33 %46 = fmul float %16, %33 %47 = fmul float %17, %34 %48 = fadd float %47, %43 %49 = fmul float %18, %34 %50 = fadd float %49, %44 %51 = fmul float %19, %34 %52 = fadd float %51, %45 %53 = fmul float %20, %34 %54 = fadd float %53, %46 %55 = fmul float %21, %35 %56 = fadd float %55, %48 %57 = fmul float %22, %35 %58 = fadd float %57, %50 %59 = fmul float %23, %35 %60 = fadd float %59, %52 %61 = fmul float %24, %35 %62 = fadd float %61, %54 %63 = fmul float %25, %36 %64 = fadd float %63, %56 %65 = fmul float %26, %36 %66 = fadd float %65, %58 %67 = fmul float %27, %36 %68 = fadd float %67, %60 %69 = fmul float %28, %36 %70 = fadd float %69, %62 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %64, float %66, float %68, float %70) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_mov_b32_e32 v1, 0 ; 7E020280 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[0:3], 0x0 ; C2060100 s_buffer_load_dword s13, s[0:3], 0x1 ; C2068101 buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[8:11], 0 idxen ; E00C2000 80020600 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_buffer_load_dword s5, s[0:3], 0x3 ; C2028103 s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104 s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105 s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106 s_buffer_load_dword s9, s[0:3], 0x7 ; C2048107 s_buffer_load_dword s10, s[0:3], 0x8 ; C2050108 s_buffer_load_dword s11, s[0:3], 0x9 ; C2058109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 v_mul_f32_e32 v0, s12, v2 ; 1000040C s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v0, s6, v3 ; 3E000606 v_mul_f32_e32 v8, s13, v2 ; 1010040D v_mac_f32_e32 v8, s7, v3 ; 3E100607 v_mul_f32_e32 v9, s4, v2 ; 10120404 v_mac_f32_e32 v9, s8, v3 ; 3E120608 v_mul_f32_e32 v2, s5, v2 ; 10040405 v_mac_f32_e32 v2, s9, v3 ; 3E040609 v_mac_f32_e32 v0, s10, v4 ; 3E00080A v_mac_f32_e32 v8, s11, v4 ; 3E10080B v_mac_f32_e32 v9, s14, v4 ; 3E12080E v_mac_f32_e32 v2, s15, v4 ; 3E04080F v_mac_f32_e32 v0, s16, v5 ; 3E000A10 v_mac_f32_e32 v8, s17, v5 ; 3E100A11 v_mac_f32_e32 v9, s18, v5 ; 3E120A12 v_mac_f32_e32 v2, s0, v5 ; 3E040A00 exp 15, 32, 0, 0, 0, v6, v7, v1, v1 ; F800020F 01010706 exp 15, 12, 0, 1, 0, v0, v8, v9, v2 ; F80008CF 02090800 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 196 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL CONST[0] DCL CONST[3..6] DCL TEMP[0..4], LOCAL IMM[0] FLT32 { 0.1000, 0.6000, 0.3000, 0.5000} IMM[1] FLT32 { 3.0000, 2.0000, 1.0000, -0.5000} IMM[2] FLT32 { 0.0000, 5.0000, 0.0500, 0.7500} IMM[3] FLT32 { 1.1800, -0.2500, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MUL TEMP[1].x, TEMP[0].xxxx, IMM[0].zzzz 3: MAD TEMP[1].x, TEMP[0].yyyy, IMM[0].yyyy, TEMP[1].xxxx 4: MAD TEMP[1].x, TEMP[0].zzzz, IMM[0].xxxx, TEMP[1].xxxx 5: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[0].wwww 6: LRP TEMP[0], IMM[0].wwww, TEMP[1].xxxx, TEMP[0] 7: MUL TEMP[2].xyz, CONST[0].xyzz, IMM[1].xxxx 8: MUL TEMP[1].xyz, TEMP[0].xyzz, TEMP[2].xyzz 9: ADD TEMP[2].xy, CONST[3].xyyy, CONST[3].zwww 10: MUL TEMP[2].xy, TEMP[2].xyyy, IMM[0].wwww 11: ADD TEMP[2].xy, IN[0].xyyy, -TEMP[2].xyyy 12: ADD TEMP[3].xy, CONST[3].zwww, -CONST[3].xyyy 13: RCP TEMP[4].x, TEMP[3].xxxx 14: RCP TEMP[4].y, TEMP[3].yyyy 15: MUL TEMP[2].xy, TEMP[2].xyyy, TEMP[4].xyyy 16: MUL TEMP[2].xy, TEMP[2].xyyy, IMM[1].yyyy 17: MOV TEMP[3].xy, TEMP[2].xyxx 18: MUL TEMP[4].x, TEMP[2].xxxx, TEMP[2].xxxx 19: ADD TEMP[4].x, IMM[1].zzzz, -TEMP[4].xxxx 20: MUL TEMP[2].x, TEMP[2].yyyy, TEMP[2].yyyy 21: ADD TEMP[2].x, TEMP[4].xxxx, -TEMP[2].xxxx 22: MOV TEMP[4].xy, IN[0].xyyy 23: TEX TEMP[4].xyz, TEMP[4], SAMP[1], 2D 24: ADD TEMP[4].xy, TEMP[4].xyzz, IMM[1].wwww 25: MOV TEMP[4].xy, TEMP[4].xyxx 26: MOV TEMP[4].z, IMM[2].xxxx 27: SQRT TEMP[2].x, TEMP[2].xxxx 28: MOV TEMP[3].z, TEMP[2].xxxx 29: ADD TEMP[2].xyz, TEMP[3].xyzz, -TEMP[4].xyzz 30: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz 31: RSQ TEMP[3].x, TEMP[3].xxxx 32: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx 33: MOV TEMP[3].xy, CONST[4].xyxx 34: MOV TEMP[3].z, -CONST[4].zzzz 35: DP3 TEMP[2].x, TEMP[3].xyzz, TEMP[2].xyzz 36: FSLT TEMP[3].x, IMM[2].xxxx, TEMP[2].xxxx 37: UIF TEMP[3].xxxx :0 38: MAD TEMP[3].x, TEMP[2].xxxx, IMM[2].yyyy, IMM[2].zzzz 39: ELSE :0 40: MAD TEMP[3].x, TEMP[2].xxxx, IMM[2].zzzz, IMM[2].zzzz 41: ENDIF 42: MOV_SAT TEMP[2].x, TEMP[2].xxxx 43: MAD TEMP[2].x, TEMP[2].xxxx, IMM[1].yyyy, IMM[2].wwww 44: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx 45: MOV_SAT TEMP[2].x, TEMP[3].xxxx 46: MUL TEMP[3].x, CONST[5].xxxx, IMM[0].zzzz 47: MAD TEMP[3].x, CONST[5].yyyy, IMM[0].yyyy, TEMP[3].xxxx 48: MAD TEMP[3].x, CONST[5].zzzz, IMM[0].xxxx, TEMP[3].xxxx 49: ADD TEMP[3].x, IMM[3].xxxx, -TEMP[3].xxxx 50: POW TEMP[3].x, TEMP[3].xxxx, IMM[1].xxxx 51: ADD TEMP[3].x, TEMP[3].xxxx, IMM[3].yyyy 52: MOV_SAT TEMP[3].x, TEMP[3].xxxx 53: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[3].xxxx 54: MUL TEMP[0].x, TEMP[0].wwww, TEMP[2].xxxx 55: MUL TEMP[0].x, TEMP[0].xxxx, CONST[6].xxxx 56: MOV TEMP[1].w, TEMP[0].xxxx 57: MOV OUT[0], TEMP[1] 58: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %38 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %39 = load <32 x i8>, <32 x i8> addrspace(2)* %38, align 32, !tbaa !0 %40 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %41 = load <16 x i8>, <16 x i8> addrspace(2)* %40, align 16, !tbaa !0 %42 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %43 = bitcast <8 x i32> addrspace(2)* %42 to <32 x i8> addrspace(2)* %44 = load <32 x i8>, <32 x i8> addrspace(2)* %43, align 32, !tbaa !0 %45 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %46 = bitcast <4 x i32> addrspace(2)* %45 to <16 x i8> addrspace(2)* %47 = load <16 x i8>, <16 x i8> addrspace(2)* %46, align 16, !tbaa !0 %48 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %49 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %50 = bitcast float %48 to i32 %51 = bitcast float %49 to i32 %52 = insertelement <2 x i32> undef, i32 %50, i32 0 %53 = insertelement <2 x i32> %52, i32 %51, i32 1 %54 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %53, <32 x i8> %39, <16 x i8> %41, i32 2) %55 = extractelement <4 x float> %54, i32 0 %56 = extractelement <4 x float> %54, i32 1 %57 = extractelement <4 x float> %54, i32 2 %58 = extractelement <4 x float> %54, i32 3 %59 = fmul float %55, 0x3FD3333340000000 %60 = fmul float %56, 0x3FE3333340000000 %61 = fadd float %60, %59 %62 = fmul float %57, 0x3FB99999A0000000 %63 = fadd float %62, %61 %64 = fmul float %63, %58 %65 = call float @llvm.AMDGPU.lrp(float 5.000000e-01, float %64, float %55) %66 = call float @llvm.AMDGPU.lrp(float 5.000000e-01, float %64, float %56) %67 = call float @llvm.AMDGPU.lrp(float 5.000000e-01, float %64, float %57) %68 = call float @llvm.AMDGPU.lrp(float 5.000000e-01, float %64, float %58) %69 = fmul float %24, 3.000000e+00 %70 = fmul float %25, 3.000000e+00 %71 = fmul float %26, 3.000000e+00 %72 = fmul float %65, %69 %73 = fmul float %66, %70 %74 = fmul float %67, %71 %75 = fadd float %27, %29 %76 = fadd float %28, %30 %77 = fmul float %75, 5.000000e-01 %78 = fmul float %76, 5.000000e-01 %79 = fsub float %48, %77 %80 = fsub float %49, %78 %81 = fsub float %29, %27 %82 = fsub float %30, %28 %83 = fdiv float 1.000000e+00, %81 %84 = fdiv float 1.000000e+00, %82 %85 = fmul float %79, %83 %86 = fmul float %80, %84 %87 = fmul float %85, 2.000000e+00 %88 = fmul float %86, 2.000000e+00 %89 = fmul float %87, %87 %90 = fsub float 1.000000e+00, %89 %91 = fmul float %88, %88 %92 = fsub float %90, %91 %93 = bitcast float %48 to i32 %94 = bitcast float %49 to i32 %95 = insertelement <2 x i32> undef, i32 %93, i32 0 %96 = insertelement <2 x i32> %95, i32 %94, i32 1 %97 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %96, <32 x i8> %44, <16 x i8> %47, i32 2) %98 = extractelement <4 x float> %97, i32 0 %99 = extractelement <4 x float> %97, i32 1 %100 = fadd float %98, -5.000000e-01 %101 = fadd float %99, -5.000000e-01 %102 = call float @llvm.sqrt.f32(float %92) %103 = fsub float %87, %100 %104 = fsub float %88, %101 %105 = fmul float %103, %103 %106 = fmul float %104, %104 %107 = fadd float %106, %105 %108 = fmul float %102, %102 %109 = fadd float %107, %108 %110 = call float @llvm.AMDGPU.rsq.clamped.f32(float %109) %111 = fmul float %103, %110 %112 = fmul float %104, %110 %113 = fmul float %102, %110 %114 = fmul float %31, %111 %115 = fmul float %32, %112 %116 = fadd float %115, %114 %117 = fmul float %33, %113 %118 = fsub float %116, %117 %119 = fcmp ogt float %118, 0.000000e+00 %.sink.v = select i1 %119, float 5.000000e+00, float 0x3FA99999A0000000 %.sink = fmul float %118, %.sink.v %120 = fadd float %.sink, 0x3FA99999A0000000 %121 = call float @llvm.AMDIL.clamp.(float %118, float 0.000000e+00, float 1.000000e+00) %122 = fmul float %121, 2.000000e+00 %123 = fadd float %122, 7.500000e-01 %124 = fmul float %72, %123 %125 = fmul float %73, %123 %126 = fmul float %74, %123 %127 = call float @llvm.AMDIL.clamp.(float %120, float 0.000000e+00, float 1.000000e+00) %128 = fmul float %34, 0x3FD3333340000000 %129 = fmul float %35, 0x3FE3333340000000 %130 = fadd float %129, %128 %131 = fmul float %36, 0x3FB99999A0000000 %132 = fadd float %131, %130 %133 = fsub float 0x3FF2E147A0000000, %132 %134 = call float @llvm.pow.f32(float %133, float 3.000000e+00) %135 = fadd float %134, -2.500000e-01 %136 = call float @llvm.AMDIL.clamp.(float %135, float 0.000000e+00, float 1.000000e+00) %137 = fmul float %127, %136 %138 = fmul float %68, %137 %139 = fmul float %138, %37 %140 = call i32 @llvm.SI.packf16(float %124, float %125) %141 = bitcast i32 %140 to float %142 = call i32 @llvm.SI.packf16(float %126, float %139) %143 = bitcast i32 %142 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %141, float %143, float %141, float %143) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_mov_b32_e32 v2, 0x3e99999a ; 7E0402FF 3E99999A v_mov_b32_e32 v3, 0x3f19999a ; 7E0602FF 3F19999A v_mov_b32_e32 v4, 0x3dcccccd ; 7E0802FF 3DCCCCCD v_mov_b32_e32 v5, 0x40400000 ; 7E0A02FF 40400000 v_mov_b32_e32 v6, 0x40a00000 ; 7E0C02FF 40A00000 v_mov_b32_e32 v7, 0x3d4ccccd ; 7E0E02FF 3D4CCCCD s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_interp_p1_f32 v8, v0, 0, 0, [m0] ; C8200000 v_interp_p2_f32 v8, [v8], v1, 0, 0, [m0] ; C8210001 v_interp_p1_f32 v9, v0, 1, 0, [m0] ; C8240100 v_interp_p2_f32 v9, [v9], v1, 1, 0, [m0] ; C8250101 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504 s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700 s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0xc ; C203810C s_buffer_load_dword s32, s[0:3], 0xd ; C210010D s_buffer_load_dword s33, s[0:3], 0xe ; C210810E s_buffer_load_dword s34, s[0:3], 0xf ; C211010F image_sample v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[16:23], s[8:11] ; F0800F00 00440A08 image_sample v[0:1], 3, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[24:31], s[12:15] ; F0800300 00660008 s_buffer_load_dword s8, s[0:3], 0x10 ; C2040110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v14, s4, v5 ; 101C0A04 v_mul_f32_e32 v15, s5, v5 ; 101E0A05 v_mul_f32_e32 v16, s6, v5 ; 10200A06 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_buffer_load_dword s5, s[0:3], 0x12 ; C2028112 s_buffer_load_dword s6, s[0:3], 0x14 ; C2030114 s_buffer_load_dword s9, s[0:3], 0x15 ; C2048115 s_buffer_load_dword s10, s[0:3], 0x16 ; C2050116 s_buffer_load_dword s0, s[0:3], 0x18 ; C2000118 s_waitcnt vmcnt(1) ; BF8C0771 v_mul_f32_e32 v17, v2, v10 ; 10221502 v_mac_f32_e32 v17, v3, v11 ; 3E221703 v_mac_f32_e32 v17, v4, v12 ; 3E221904 v_mul_f32_e32 v17, v13, v17 ; 1022230D v_mul_f32_e32 v10, 0.5, v10 ; 101414F0 v_mac_f32_e32 v10, 0.5, v17 ; 3E1422F0 v_mul_f32_e32 v10, v14, v10 ; 1014150E s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v2, s6, v2 ; 10040406 v_mac_f32_e32 v2, s9, v3 ; 3E040609 v_mac_f32_e32 v2, s10, v4 ; 3E04080A v_sub_f32_e32 v2, 0x3f970a3d, v2 ; 080404FF 3F970A3D v_log_f32_e32 v2, v2 ; 7E044F02 v_mul_f32_e32 v3, 0.5, v11 ; 100616F0 v_mac_f32_e32 v3, 0.5, v17 ; 3E0622F0 v_mul_f32_e32 v3, v15, v3 ; 1006070F v_mul_legacy_f32_e32 v2, v5, v2 ; 0E040505 v_exp_f32_e32 v2, v2 ; 7E044B02 v_mul_f32_e32 v4, 0.5, v12 ; 100818F0 v_mul_f32_e32 v5, 0.5, v13 ; 100A1AF0 v_mac_f32_e32 v4, 0.5, v17 ; 3E0822F0 v_mac_f32_e32 v5, 0.5, v17 ; 3E0A22F0 v_mul_f32_e32 v4, v16, v4 ; 10080910 v_mov_b32_e32 v11, s7 ; 7E160207 v_sub_f32_e32 v11, s33, v11 ; 08161621 v_mov_b32_e32 v12, s33 ; 7E180221 v_add_f32_e32 v12, s7, v12 ; 06181807 v_mad_f32 v8, 0.5, -v12, v8 ; D2820008 442218F0 v_rcp_f32_e32 v11, v11 ; 7E16550B v_mov_b32_e32 v12, s32 ; 7E180220 v_sub_f32_e32 v12, s34, v12 ; 08181822 v_mov_b32_e32 v13, s34 ; 7E1A0222 v_add_f32_e32 v13, s32, v13 ; 061A1A20 v_rcp_f32_e32 v12, v12 ; 7E18550C v_mad_f32 v9, 0.5, -v13, v9 ; D2820009 44261AF0 v_mul_f32_e32 v13, v11, v8 ; 101A110B v_mad_f32 v8, v11, v8, v13 ; D2820008 0436110B v_mul_f32_e32 v11, v12, v9 ; 1016130C v_mad_f32 v9, v12, v9, v11 ; D2820009 042E130C v_mad_f32 v8, -v8, v8, 1.0 ; D2820008 23CA1108 v_mad_f32 v8, -v9, v9, v8 ; D2820008 24221309 v_add_f32_e32 v0, -0.5, v0 ; 060000F1 v_add_f32_e32 v1, -0.5, v1 ; 060202F1 v_sqrt_f32_e32 v8, v8 ; 7E106708 v_mad_f32 v0, 2.0, v13, -v0 ; D2820000 84021AF4 v_mad_f32 v1, 2.0, v11, -v1 ; D2820001 840616F4 v_mul_f32_e32 v9, v0, v0 ; 10120100 v_mac_f32_e32 v9, v1, v1 ; 3E120301 v_mac_f32_e32 v9, v8, v8 ; 3E121108 v_rsq_clamp_f32_e32 v9, v9 ; 7E125909 v_mul_f32_e32 v0, v9, v0 ; 10000109 v_mul_f32_e32 v1, v9, v1 ; 10020309 v_mul_f32_e32 v8, v9, v8 ; 10101109 v_mul_f32_e32 v0, s8, v0 ; 10000008 v_mac_f32_e32 v0, s4, v1 ; 3E000204 v_mad_f32 v0, -s5, v8, v0 ; D2820000 24021005 v_cmp_lt_f32_e32 vcc, 0, v0 ; 7C020080 v_cndmask_b32_e32 v1, v7, v6 ; 00020D07 v_mac_f32_e32 v7, v1, v0 ; 3E0E0101 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_madak_f32_e32 v0, 2.0, v0, 0x3f400000 ; 420000F4 3F400000 v_mul_f32_e32 v1, v0, v10 ; 10021500 v_mul_f32_e32 v3, v0, v3 ; 10060700 v_mul_f32_e32 v0, v0, v4 ; 10000900 v_add_f32_e64 v4, 0, v7 clamp ; D2060804 00020E80 v_mov_b32_e32 v6, 0xbe800000 ; 7E0C02FF BE800000 v_add_f32_e32 v2, v2, v6 ; 06040D02 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_mul_f32_e32 v2, v2, v4 ; 10040902 v_mul_f32_e32 v2, v2, v5 ; 10040B02 v_mul_f32_e32 v2, s0, v2 ; 10040400 v_cvt_pkrtz_f16_f32_e32 v1, v1, v3 ; 5E020701 v_cvt_pkrtz_f16_f32_e32 v0, v0, v2 ; 5E000500 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 20 Code Size: 552 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL OUT[5], GENERIC[4] DCL OUT[6], GENERIC[5] DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[0..15] DCL CONST[17..20] DCL TEMP[0..8], LOCAL IMM[0] FLT32 { 0.2500, -1.0000, 10.0000, 0.4999} IMM[1] INT32 {256, 0, 1, 2} IMM[2] FLT32 { 1.0000, 0.0000, 0.1000, 0.0039} IMM[3] FLT32 { 16.0000, -8.0000, 4.0000, -2.0000} IMM[4] INT32 {4, 0, 0, 0} IMM[5] FLT32 { 0.5000, 0.0000, 0.0000, 0.0000} 0: MUL TEMP[0].x, IN[2].xxxx, IMM[0].xxxx 1: F2I TEMP[0].x, TEMP[0].xxxx 2: F2I TEMP[1].x, IN[2].yyyy 3: IDIV TEMP[2].x, TEMP[1].xxxx, IMM[1].xxxx 4: I2F TEMP[3].x, TEMP[0].xxxx 5: I2F TEMP[4].x, TEMP[2].xxxx 6: MOV TEMP[3].y, TEMP[4].xxxx 7: UMUL TEMP[2].x, TEMP[2].xxxx, IMM[1].xxxx 8: INEG TEMP[2].x, TEMP[2].xxxx 9: UADD TEMP[2].x, TEMP[1].xxxx, TEMP[2].xxxx 10: I2F TEMP[2].x, TEMP[2].xxxx 11: MOV TEMP[3].z, TEMP[2].xxxx 12: ADD TEMP[2].xyz, TEMP[3].xyzz, IMM[0].yyyy 13: I2F TEMP[1].x, TEMP[1].xxxx 14: ADD TEMP[1].x, IN[2].yyyy, -TEMP[1].xxxx 15: MAD TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz, IMM[0].wwww 16: F2I TEMP[1].x, TEMP[1].xxxx 17: USEQ TEMP[4].x, TEMP[1].xxxx, IMM[1].yyyy 18: AND TEMP[4].x, TEMP[4].xxxx, IMM[2].xxxx 19: USEQ TEMP[5].x, TEMP[1].xxxx, IMM[1].zzzz 20: AND TEMP[5].x, TEMP[5].xxxx, IMM[2].xxxx 21: MOV TEMP[4].y, TEMP[5].xxxx 22: USEQ TEMP[1].x, TEMP[1].xxxx, IMM[1].wwww 23: AND TEMP[1].x, TEMP[1].xxxx, IMM[2].xxxx 24: MOV TEMP[4].z, TEMP[1].xxxx 25: MOV TEMP[1].xyz, TEMP[4].xyzx 26: MOV TEMP[4].w, IMM[2].yyyy 27: MOV TEMP[4].xyz, TEMP[3].xyzx 28: MOV TEMP[3].y, IMM[2].yzyy 29: DP4 TEMP[4].x, TEMP[1], TEMP[4] 30: MUL TEMP[3].x, TEMP[4].xxxx, IMM[2].wwww 31: MOV TEMP[3].xy, TEMP[3].xyyy 32: MOV TEMP[3].w, IMM[2].yyyy 33: TXL TEMP[3].xy, TEMP[3], SAMP[0], 2D 34: MAD TEMP[4].x, TEMP[3].xxxx, IMM[3].xxxx, IMM[3].yyyy 35: MOV TEMP[2].w, TEMP[4].xxxx 36: MUL TEMP[3].x, TEMP[3].yyyy, IMM[3].zzzz 37: MOV TEMP[1].w, TEMP[3].xxxx 38: UMUL TEMP[0].x, IMM[4].xxxx, TEMP[0].xxxx 39: I2F TEMP[0].x, TEMP[0].xxxx 40: ADD TEMP[0].x, IN[2].xxxx, -TEMP[0].xxxx 41: ADD TEMP[0].x, TEMP[0].xxxx, IMM[3].wwww 42: MUL TEMP[3], CONST[17], IN[0].xxxx 43: MAD TEMP[3], CONST[18], IN[0].yyyy, TEMP[3] 44: MAD TEMP[3], CONST[19], IN[0].zzzz, TEMP[3] 45: MAD TEMP[3], CONST[20], IN[0].wwww, TEMP[3] 46: MOV TEMP[4].x, CONST[12].xxxx 47: MOV TEMP[4].y, CONST[13].xxxx 48: MOV TEMP[4].z, CONST[14].xxxx 49: MOV TEMP[5].x, CONST[12].yyyy 50: MOV TEMP[5].y, CONST[13].yyyy 51: MOV TEMP[5].z, CONST[14].yyyy 52: MOV TEMP[6].x, CONST[12].zzzz 53: MOV TEMP[6].y, CONST[13].zzzz 54: MOV TEMP[6].z, CONST[14].zzzz 55: MUL TEMP[4].xyz, TEMP[4].xyzz, IN[1].xxxx 56: MAD TEMP[4].xyz, TEMP[5].xyzz, IN[1].yyyy, TEMP[4].xyzz 57: MAD TEMP[0].xyz, TEMP[6].xyzz, TEMP[0].xxxx, TEMP[4].xyzz 58: DP3 TEMP[4].x, TEMP[0].xyzz, TEMP[0].xyzz 59: RSQ TEMP[4].x, TEMP[4].xxxx 60: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[4].xxxx 61: MOV TEMP[4].w, IMM[2].xxxx 62: MOV TEMP[4].xyz, TEMP[0].xyzx 63: DP4 TEMP[5].x, CONST[1], TEMP[4] 64: DP4 TEMP[6].x, CONST[2], TEMP[4] 65: MOV TEMP[5].y, TEMP[6].xxxx 66: DP4 TEMP[4].x, CONST[3], TEMP[4] 67: MOV TEMP[5].z, TEMP[4].xxxx 68: MUL TEMP[4], TEMP[0].xyzz, TEMP[0].yzzx 69: DP4 TEMP[6].x, CONST[4], TEMP[4] 70: DP4 TEMP[7].x, CONST[5], TEMP[4] 71: MOV TEMP[6].y, TEMP[7].xxxx 72: DP4 TEMP[4].x, CONST[6], TEMP[4] 73: MOV TEMP[6].z, TEMP[4].xxxx 74: MUL TEMP[4].xyw, TEMP[3], IMM[5].xxxx 75: MOV TEMP[7].x, TEMP[4].xxxx 76: MUL TEMP[8].x, TEMP[4].yyyy, CONST[0].xxxx 77: MOV TEMP[7].y, TEMP[8].xxxx 78: ADD TEMP[4].xy, TEMP[7].xyyy, TEMP[4].wwww 79: MOV TEMP[4].zw, TEMP[3].wwzw 80: MUL TEMP[7].x, TEMP[0].yyyy, TEMP[0].yyyy 81: MAD TEMP[7].x, TEMP[0].xxxx, TEMP[0].xxxx, -TEMP[7].xxxx 82: MAD TEMP[6].xyz, CONST[7].xyzz, TEMP[7].xxxx, TEMP[6].xyzz 83: ADD TEMP[5].xyz, TEMP[6].xyzz, TEMP[5].xyzz 84: MOV TEMP[0].yzw, TEMP[0].yxyz 85: MUL TEMP[6], CONST[8], IN[0].xxxx 86: MAD TEMP[6], CONST[9], IN[0].yyyy, TEMP[6] 87: MAD TEMP[6], CONST[10], IN[0].zzzz, TEMP[6] 88: MAD TEMP[6].xyz, CONST[11], IN[0].wwww, TEMP[6] 89: MOV TEMP[6].xyz, TEMP[6].xyzx 90: MOV TEMP[6].w, TEMP[5].xxxx 91: MOV TEMP[5].xy, TEMP[5].yzyy 92: MOV TEMP[0].x, TEMP[3].zzzz 93: MOV OUT[5], TEMP[6] 94: MOV OUT[1], TEMP[2] 95: MOV OUT[2], TEMP[1] 96: MOV OUT[4], TEMP[0] 97: MOV OUT[3], TEMP[4] 98: MOV OUT[0], TEMP[3] 99: MOV OUT[6], TEMP[5] 100: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272) %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276) %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280) %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284) %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288) %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292) %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296) %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300) %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304) %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308) %72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312) %73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316) %74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320) %75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 324) %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 328) %77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 332) %78 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %79 = load <32 x i8>, <32 x i8> addrspace(2)* %78, align 32, !tbaa !0 %80 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %81 = load <16 x i8>, <16 x i8> addrspace(2)* %80, align 16, !tbaa !0 %82 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %83 = load <16 x i8>, <16 x i8> addrspace(2)* %82, align 16, !tbaa !0 %84 = add i32 %5, %7 %85 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %83, i32 0, i32 %84) %86 = extractelement <4 x float> %85, i32 0 %87 = extractelement <4 x float> %85, i32 1 %88 = extractelement <4 x float> %85, i32 2 %89 = extractelement <4 x float> %85, i32 3 %90 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %91 = load <16 x i8>, <16 x i8> addrspace(2)* %90, align 16, !tbaa !0 %92 = add i32 %5, %7 %93 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %91, i32 0, i32 %92) %94 = extractelement <4 x float> %93, i32 0 %95 = extractelement <4 x float> %93, i32 1 %96 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %97 = load <16 x i8>, <16 x i8> addrspace(2)* %96, align 16, !tbaa !0 %98 = add i32 %5, %7 %99 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %97, i32 0, i32 %98) %100 = extractelement <4 x float> %99, i32 0 %101 = extractelement <4 x float> %99, i32 1 %102 = fmul float %100, 2.500000e-01 %103 = fptosi float %102 to i32 %104 = fptosi float %101 to i32 %105 = sdiv i32 %104, 256 %106 = sitofp i32 %103 to float %107 = sitofp i32 %105 to float %108 = shl nsw i32 %105, 8 %109 = sub i32 %104, %108 %110 = sitofp i32 %109 to float %111 = fadd float %106, -1.000000e+00 %112 = fadd float %107, -1.000000e+00 %113 = fadd float %110, -1.000000e+00 %114 = sitofp i32 %104 to float %115 = fsub float %101, %114 %116 = fmul float %115, 1.000000e+01 %117 = fadd float %116, 0x3FDFFE5CA0000000 %118 = fptosi float %117 to i32 %119 = icmp eq i32 %118, 0 %120 = select i1 %119, float 1.000000e+00, float 0.000000e+00 %121 = icmp eq i32 %118, 1 %122 = select i1 %121, float 1.000000e+00, float 0.000000e+00 %123 = icmp eq i32 %118, 2 %124 = select i1 %123, float 1.000000e+00, float 0.000000e+00 %125 = fmul float %120, %106 %126 = fmul float %122, %107 %127 = fadd float %125, %126 %128 = fmul float %124, %110 %129 = fadd float %127, %128 %130 = fadd float %129, 0.000000e+00 %131 = fmul float %130, 0x3F70101020000000 %132 = bitcast float %131 to i32 %133 = insertelement <4 x i32> undef, i32 %132, i32 0 %134 = insertelement <4 x i32> %133, i32 1036831949, i32 1 %135 = insertelement <4 x i32> %134, i32 0, i32 2 %136 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %135, <32 x i8> %79, <16 x i8> %81, i32 2) %137 = extractelement <4 x float> %136, i32 0 %138 = extractelement <4 x float> %136, i32 1 %139 = fmul float %137, 1.600000e+01 %140 = fadd float %139, -8.000000e+00 %141 = fmul float %138, 4.000000e+00 %142 = shl i32 %103, 2 %143 = sitofp i32 %142 to float %144 = fsub float %100, %143 %145 = fadd float %144, -2.000000e+00 %146 = fmul float %62, %86 %147 = fmul float %63, %86 %148 = fmul float %64, %86 %149 = fmul float %65, %86 %150 = fmul float %66, %87 %151 = fadd float %150, %146 %152 = fmul float %67, %87 %153 = fadd float %152, %147 %154 = fmul float %68, %87 %155 = fadd float %154, %148 %156 = fmul float %69, %87 %157 = fadd float %156, %149 %158 = fmul float %70, %88 %159 = fadd float %158, %151 %160 = fmul float %71, %88 %161 = fadd float %160, %153 %162 = fmul float %72, %88 %163 = fadd float %162, %155 %164 = fmul float %73, %88 %165 = fadd float %164, %157 %166 = fmul float %74, %89 %167 = fadd float %166, %159 %168 = fmul float %75, %89 %169 = fadd float %168, %161 %170 = fmul float %76, %89 %171 = fadd float %170, %163 %172 = fmul float %77, %89 %173 = fadd float %172, %165 %174 = fmul float %53, %94 %175 = fmul float %56, %94 %176 = fmul float %59, %94 %177 = fmul float %54, %95 %178 = fadd float %177, %174 %179 = fmul float %57, %95 %180 = fadd float %179, %175 %181 = fmul float %60, %95 %182 = fadd float %181, %176 %183 = fmul float %55, %145 %184 = fadd float %183, %178 %185 = fmul float %58, %145 %186 = fadd float %185, %180 %187 = fmul float %61, %145 %188 = fadd float %187, %182 %189 = fmul float %184, %184 %190 = fmul float %186, %186 %191 = fadd float %190, %189 %192 = fmul float %188, %188 %193 = fadd float %191, %192 %194 = call float @llvm.AMDGPU.rsq.clamped.f32(float %193) %195 = fmul float %184, %194 %196 = fmul float %186, %194 %197 = fmul float %188, %194 %198 = fmul float %14, %195 %199 = fmul float %15, %196 %200 = fadd float %198, %199 %201 = fmul float %16, %197 %202 = fadd float %200, %201 %203 = fadd float %202, %17 %204 = fmul float %18, %195 %205 = fmul float %19, %196 %206 = fadd float %204, %205 %207 = fmul float %20, %197 %208 = fadd float %206, %207 %209 = fadd float %208, %21 %210 = fmul float %22, %195 %211 = fmul float %23, %196 %212 = fadd float %210, %211 %213 = fmul float %24, %197 %214 = fadd float %212, %213 %215 = fadd float %214, %25 %216 = fmul float %195, %196 %217 = fmul float %196, %197 %218 = fmul float %197, %197 %219 = fmul float %197, %195 %220 = fmul float %26, %216 %221 = fmul float %27, %217 %222 = fadd float %220, %221 %223 = fmul float %28, %218 %224 = fadd float %222, %223 %225 = fmul float %29, %219 %226 = fadd float %224, %225 %227 = fmul float %30, %216 %228 = fmul float %31, %217 %229 = fadd float %227, %228 %230 = fmul float %32, %218 %231 = fadd float %229, %230 %232 = fmul float %33, %219 %233 = fadd float %231, %232 %234 = fmul float %34, %216 %235 = fmul float %35, %217 %236 = fadd float %234, %235 %237 = fmul float %36, %218 %238 = fadd float %236, %237 %239 = fmul float %37, %219 %240 = fadd float %238, %239 %241 = fmul float %167, 5.000000e-01 %242 = fmul float %169, 5.000000e-01 %243 = fmul float %173, 5.000000e-01 %244 = fmul float %242, %13 %245 = fadd float %241, %243 %246 = fadd float %244, %243 %247 = fmul float %196, %196 %248 = fmul float %195, %195 %249 = fsub float %248, %247 %250 = fmul float %38, %249 %251 = fadd float %250, %226 %252 = fmul float %39, %249 %253 = fadd float %252, %233 %254 = fmul float %40, %249 %255 = fadd float %254, %240 %256 = fadd float %251, %203 %257 = fadd float %253, %209 %258 = fadd float %255, %215 %259 = fmul float %41, %86 %260 = fmul float %42, %86 %261 = fmul float %43, %86 %262 = fmul float %44, %87 %263 = fadd float %262, %259 %264 = fmul float %45, %87 %265 = fadd float %264, %260 %266 = fmul float %46, %87 %267 = fadd float %266, %261 %268 = fmul float %47, %88 %269 = fadd float %268, %263 %270 = fmul float %48, %88 %271 = fadd float %270, %265 %272 = fmul float %49, %88 %273 = fadd float %272, %267 %274 = fmul float %50, %89 %275 = fadd float %274, %269 %276 = fmul float %51, %89 %277 = fadd float %276, %271 %278 = fmul float %52, %89 %279 = fadd float %278, %273 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %111, float %112, float %113, float %140) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %120, float %122, float %124, float %141) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %245, float %246, float %171, float %173) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %171, float %195, float %196, float %197) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %275, float %277, float %279, float %256) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %257, float %258, float %258, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %167, float %169, float %171, float %173) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0x3efff2e5 ; 7E0202FF 3EFFF2E5 v_mov_b32_e32 v2, 0xc1000000 ; 7E0402FF C1000000 v_mov_b32_e32 v5, 0 ; 7E0A0280 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[12:15], s[2:3], 0x0 ; C0860300 s_load_dwordx4 s[16:19], s[4:5], 0x0 ; C0880500 s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[20:23], s[8:9], 0x4 ; C08A0904 s_load_dwordx4 s[24:27], s[8:9], 0x8 ; C08C0908 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s39, s[12:15], 0x30 ; C2138D30 s_buffer_load_dword s40, s[12:15], 0x31 ; C2140D31 buffer_load_format_xyzw v[6:9], v0, s[0:3], 0 idxen ; E00C2000 80000600 buffer_load_format_xyzw v[10:13], v0, s[20:23], 0 idxen ; E00C2000 80050A00 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[12:15], v0, s[24:27], 0 idxen ; E00C2000 80060C00 s_buffer_load_dword s1, s[12:15], 0x32 ; C2008D32 s_buffer_load_dword s41, s[12:15], 0x34 ; C2148D34 s_buffer_load_dword s42, s[12:15], 0x35 ; C2150D35 s_buffer_load_dword s2, s[12:15], 0x36 ; C2010D36 s_buffer_load_dword s43, s[12:15], 0x38 ; C2158D38 s_buffer_load_dword s44, s[12:15], 0x39 ; C2160D39 s_buffer_load_dword s3, s[12:15], 0x3a ; C2018D3A s_buffer_load_dword s45, s[12:15], 0x44 ; C2168D44 s_buffer_load_dword s46, s[12:15], 0x45 ; C2170D45 s_buffer_load_dword s47, s[12:15], 0x46 ; C2178D46 s_buffer_load_dword s48, s[12:15], 0x47 ; C2180D47 s_buffer_load_dword s49, s[12:15], 0x48 ; C2188D48 s_buffer_load_dword s50, s[12:15], 0x49 ; C2190D49 s_buffer_load_dword s51, s[12:15], 0x4a ; C2198D4A s_buffer_load_dword s52, s[12:15], 0x4b ; C21A0D4B s_buffer_load_dword s28, s[12:15], 0x4c ; C20E0D4C s_buffer_load_dword s30, s[12:15], 0x4d ; C20F0D4D s_buffer_load_dword s29, s[12:15], 0x4e ; C20E8D4E s_buffer_load_dword s0, s[12:15], 0xd ; C2000D0D s_buffer_load_dword s31, s[12:15], 0x4f ; C20F8D4F s_buffer_load_dword s26, s[12:15], 0x50 ; C20D0D50 s_buffer_load_dword s23, s[12:15], 0x51 ; C20B8D51 s_buffer_load_dword s24, s[12:15], 0x52 ; C20C0D52 s_buffer_load_dword s22, s[12:15], 0x53 ; C20B0D53 s_buffer_load_dword s35, s[12:15], 0x22 ; C2118D22 s_buffer_load_dword s37, s[12:15], 0x24 ; C2128D24 s_buffer_load_dword s36, s[12:15], 0x25 ; C2120D25 s_buffer_load_dword s34, s[12:15], 0x26 ; C2110D26 s_buffer_load_dword s33, s[12:15], 0x28 ; C2108D28 s_buffer_load_dword s32, s[12:15], 0x29 ; C2100D29 s_buffer_load_dword s27, s[12:15], 0x2a ; C20D8D2A s_buffer_load_dword s25, s[12:15], 0x2c ; C20C8D2C s_buffer_load_dword s21, s[12:15], 0x2d ; C20A8D2D s_buffer_load_dword s20, s[12:15], 0x2e ; C20A0D2E s_buffer_load_dword s38, s[12:15], 0x20 ; C2130D20 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v0, s45, v6 ; 10000C2D v_mul_f32_e32 v14, s46, v6 ; 101C0C2E v_mul_f32_e32 v15, s39, v10 ; 101E1427 v_mul_f32_e32 v16, s41, v10 ; 10201429 v_mul_f32_e32 v10, s43, v10 ; 1014142B v_mac_f32_e32 v0, s49, v7 ; 3E000E31 v_mac_f32_e32 v14, s50, v7 ; 3E1C0E32 s_buffer_load_dword s39, s[12:15], 0x21 ; C2138D21 v_mac_f32_e32 v15, s40, v11 ; 3E1E1628 v_mac_f32_e32 v16, s42, v11 ; 3E20162A v_mac_f32_e32 v10, s44, v11 ; 3E14162C v_mul_f32_e32 v11, s47, v6 ; 10160C2F v_mac_f32_e32 v11, s51, v7 ; 3E160E33 v_mul_f32_e32 v17, s48, v6 ; 10220C30 v_mac_f32_e32 v17, s52, v7 ; 3E220E34 v_mul_f32_e32 v18, s38, v6 ; 10240C26 v_mac_f32_e32 v18, s37, v7 ; 3E240E25 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v19, s39, v6 ; 10260C27 v_mac_f32_e32 v19, s36, v7 ; 3E260E24 v_mul_f32_e32 v6, s35, v6 ; 100C0C23 v_mac_f32_e32 v6, s34, v7 ; 3E0C0E22 v_mac_f32_e32 v0, s28, v8 ; 3E00101C v_mac_f32_e32 v14, s30, v8 ; 3E1C101E v_mac_f32_e32 v11, s29, v8 ; 3E16101D v_mac_f32_e32 v17, s31, v8 ; 3E22101F v_mac_f32_e32 v18, s33, v8 ; 3E241021 v_mac_f32_e32 v19, s32, v8 ; 3E261020 v_mac_f32_e32 v6, s27, v8 ; 3E0C101B v_mac_f32_e32 v0, s26, v9 ; 3E00121A v_mac_f32_e32 v14, s23, v9 ; 3E1C1217 v_mac_f32_e32 v11, s24, v9 ; 3E161218 v_mul_f32_e32 v3, 0x3e800000, v12 ; 100618FF 3E800000 v_cvt_i32_f32_e32 v4, v13 ; 7E08110D v_cvt_i32_f32_e32 v3, v3 ; 7E061103 v_mac_f32_e32 v17, s22, v9 ; 3E221216 v_mac_f32_e32 v18, s25, v9 ; 3E241219 v_cvt_f32_i32_e32 v7, v4 ; 7E0E0B04 v_lshlrev_b32_e32 v8, 2, v3 ; 34100682 v_cvt_f32_i32_e32 v8, v8 ; 7E100B08 v_mac_f32_e32 v19, s21, v9 ; 3E261215 v_mac_f32_e32 v6, s20, v9 ; 3E0C1214 v_subrev_f32_e32 v7, v7, v13 ; 0A0E1B07 v_subrev_f32_e32 v8, v8, v12 ; 0A101908 v_madmk_f32_e32 v1, v7, v1, 0x41200000 ; 40020307 41200000 v_ashrrev_i32_e32 v7, 31, v4 ; 300E089F v_lshrrev_b32_e32 v7, 24, v7 ; 2C0E0E98 v_cvt_i32_f32_e32 v1, v1 ; 7E021101 v_add_i32_e32 v7, v4, v7 ; 4A0E0F04 v_and_b32_e32 v9, 0xffffff00, v7 ; 36120EFF FFFFFF00 v_sub_i32_e32 v4, v4, v9 ; 4C081304 v_cmp_eq_i32_e32 vcc, 0, v1 ; 7D040280 v_cndmask_b32_e64 v9, 0, 1.0, vcc ; D2000009 01A9E480 v_cmp_eq_i32_e32 vcc, 1, v1 ; 7D040281 v_cndmask_b32_e64 v12, 0, 1.0, vcc ; D200000C 01A9E480 v_ashrrev_i32_e32 v7, 8, v7 ; 300E0E88 v_cvt_f32_i32_e32 v7, v7 ; 7E0E0B07 v_cvt_f32_i32_e32 v13, v3 ; 7E1A0B03 v_cvt_f32_i32_e32 v4, v4 ; 7E080B04 v_cmp_eq_i32_e32 vcc, 2, v1 ; 7D040282 v_mul_f32_e32 v1, v7, v12 ; 10021907 v_mac_f32_e32 v1, v13, v9 ; 3E02130D v_cndmask_b32_e64 v20, 0, 1.0, vcc ; D2000014 01A9E480 v_mac_f32_e32 v1, v4, v20 ; 3E022904 v_add_f32_e32 v1, 0, v1 ; 06020280 v_mul_f32_e32 v3, 0x3b808081, v1 ; 100602FF 3B808081 v_add_f32_e32 v1, -1.0, v13 ; 06021AF3 v_add_f32_e32 v7, -1.0, v7 ; 060E0EF3 v_add_f32_e32 v13, -1.0, v4 ; 061A08F3 v_mov_b32_e32 v4, 0x3dcccccd ; 7E0802FF 3DCCCCCD image_sample_l v[3:4], 3, 0, 0, 0, 0, 0, 0, 0, v[3:6], s[4:11], s[16:19] ; F0900300 00810303 s_waitcnt vmcnt(0) ; BF8C0770 v_madmk_f32_e32 v2, v3, v2, 0x41800000 ; 40040503 41800000 exp 15, 32, 0, 0, 0, v1, v7, v13, v2 ; F800020F 020D0701 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v1, 4.0, v4 ; 100208F6 exp 15, 33, 0, 0, 0, v9, v12, v20, v1 ; F800021F 01140C09 s_buffer_load_dword s4, s[12:15], 0x1c ; C2020D1C s_buffer_load_dword s5, s[12:15], 0x1d ; C2028D1D s_buffer_load_dword s6, s[12:15], 0x1e ; C2030D1E s_buffer_load_dword s7, s[12:15], 0x0 ; C2038D00 s_buffer_load_dword s8, s[12:15], 0x4 ; C2040D04 s_buffer_load_dword s9, s[12:15], 0x5 ; C2048D05 s_buffer_load_dword s10, s[12:15], 0x6 ; C2050D06 s_buffer_load_dword s11, s[12:15], 0x7 ; C2058D07 s_buffer_load_dword s16, s[12:15], 0x8 ; C2080D08 s_buffer_load_dword s17, s[12:15], 0x9 ; C2088D09 s_buffer_load_dword s18, s[12:15], 0xa ; C2090D0A s_buffer_load_dword s19, s[12:15], 0xb ; C2098D0B s_buffer_load_dword s20, s[12:15], 0xc ; C20A0D0C s_buffer_load_dword s21, s[12:15], 0xe ; C20A8D0E s_buffer_load_dword s22, s[12:15], 0xf ; C20B0D0F s_buffer_load_dword s23, s[12:15], 0x10 ; C20B8D10 s_buffer_load_dword s24, s[12:15], 0x11 ; C20C0D11 s_buffer_load_dword s25, s[12:15], 0x12 ; C20C8D12 s_buffer_load_dword s26, s[12:15], 0x13 ; C20D0D13 s_buffer_load_dword s27, s[12:15], 0x14 ; C20D8D14 s_buffer_load_dword s28, s[12:15], 0x15 ; C20E0D15 s_buffer_load_dword s29, s[12:15], 0x16 ; C20E8D16 s_buffer_load_dword s30, s[12:15], 0x17 ; C20F0D17 s_buffer_load_dword s31, s[12:15], 0x18 ; C20F8D18 s_buffer_load_dword s32, s[12:15], 0x19 ; C2100D19 s_buffer_load_dword s33, s[12:15], 0x1a ; C2108D1A s_buffer_load_dword s12, s[12:15], 0x1b ; C2060D1B s_waitcnt expcnt(0) ; BF8C070F v_add_f32_e32 v1, -2.0, v8 ; 060210F5 v_mac_f32_e32 v15, s1, v1 ; 3E1E0201 v_mac_f32_e32 v16, s2, v1 ; 3E200202 v_mac_f32_e32 v10, s3, v1 ; 3E140203 v_mul_f32_e32 v1, v15, v15 ; 10021F0F v_mac_f32_e32 v1, v16, v16 ; 3E022110 v_mac_f32_e32 v1, v10, v10 ; 3E02150A v_rsq_clamp_f32_e32 v1, v1 ; 7E025901 v_mul_f32_e32 v2, 0.5, v14 ; 10041CF0 v_mul_f32_e32 v3, 0.5, v17 ; 100622F0 v_mad_f32 v4, 0.5, v0, v3 ; D2820004 040E00F0 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v3, s7, v2 ; 3E060407 v_mul_f32_e32 v2, v1, v15 ; 10041F01 v_mul_f32_e32 v7, v1, v16 ; 100E2101 v_mul_f32_e32 v1, v1, v10 ; 10021501 v_mul_f32_e32 v8, v1, v7 ; 10100F01 v_mul_f32_e32 v9, s24, v8 ; 10121018 v_mul_f32_e32 v10, s28, v8 ; 1014101C v_mul_f32_e32 v8, s32, v8 ; 10101020 v_mul_f32_e32 v12, v7, v2 ; 10180507 v_mac_f32_e32 v9, s23, v12 ; 3E121817 v_mac_f32_e32 v10, s27, v12 ; 3E14181B v_mac_f32_e32 v8, s31, v12 ; 3E10181F v_mul_f32_e32 v12, v1, v1 ; 10180301 v_mac_f32_e32 v9, s25, v12 ; 3E121819 v_mac_f32_e32 v10, s29, v12 ; 3E14181D v_mac_f32_e32 v8, s33, v12 ; 3E101821 v_mul_f32_e32 v12, v2, v1 ; 10180302 v_mac_f32_e32 v9, s26, v12 ; 3E12181A v_mac_f32_e32 v10, s30, v12 ; 3E14181E v_mac_f32_e32 v8, s12, v12 ; 3E10180C exp 15, 34, 0, 0, 0, v4, v3, v11, v17 ; F800022F 110B0304 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v3, s9, v7 ; 10060E09 v_mac_f32_e32 v3, s8, v2 ; 3E060408 v_mul_f32_e32 v4, s17, v7 ; 10080E11 v_mac_f32_e32 v4, s16, v2 ; 3E080410 v_mul_f32_e32 v12, s0, v7 ; 10180E00 v_mac_f32_e32 v12, s20, v2 ; 3E180414 v_mac_f32_e32 v3, s10, v1 ; 3E06020A v_mac_f32_e32 v4, s18, v1 ; 3E080212 v_mac_f32_e32 v12, s21, v1 ; 3E180215 exp 15, 35, 0, 0, 0, v11, v2, v7, v1 ; F800023F 0107020B s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v1, v7, v7 ; 10020F07 v_mad_f32 v1, v2, v2, -v1 ; D2820001 84060502 v_mac_f32_e32 v9, s4, v1 ; 3E120204 v_mac_f32_e32 v10, s5, v1 ; 3E140205 v_mac_f32_e32 v8, s6, v1 ; 3E100206 v_add_f32_e32 v1, s11, v3 ; 0602060B v_add_f32_e32 v1, v1, v9 ; 06021301 exp 15, 36, 0, 0, 0, v18, v19, v6, v1 ; F800024F 01061312 s_waitcnt expcnt(0) ; BF8C070F v_add_f32_e32 v1, s19, v4 ; 06020813 v_add_f32_e32 v1, v1, v10 ; 06021501 v_add_f32_e32 v2, s22, v12 ; 06041816 v_add_f32_e32 v2, v2, v8 ; 06041102 exp 15, 37, 0, 0, 0, v1, v2, v2, v5 ; F800025F 05020201 exp 15, 12, 0, 1, 0, v0, v14, v11, v17 ; F80008CF 110B0E00 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 56 VGPRS: 24 Code Size: 972 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL IN[4], GENERIC[4], PERSPECTIVE DCL IN[5], GENERIC[5], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL SAMP[7] DCL SAMP[8] DCL SAMP[9] DCL SAMP[10] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL SVIEW[4], 2D, FLOAT DCL SVIEW[5], 2D, FLOAT DCL SVIEW[6], 2D, FLOAT DCL SVIEW[7], 2D, FLOAT DCL SVIEW[8], 2D, FLOAT DCL SVIEW[9], 2D, FLOAT DCL SVIEW[10], 2D, FLOAT DCL CONST[0..4] DCL CONST[16..24] DCL TEMP[0..37], LOCAL IMM[0] FLT32 { -0.2000, 7.0000, 0.0100, 0.5000} IMM[1] FLT32 { 64.0000, -64.0000, 4.0000, 0.6931} IMM[2] FLT32 { 0.0039, 0.0020, 1.0000, 2.0000} IMM[3] FLT32 { 3.0000, 0.0000, -1.0000, 0.0001} IMM[4] FLT32 { 32.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].x, IN[4].wwww 1: MOV TEMP[0].yz, IN[5].yxyy 2: DP3 TEMP[1].x, CONST[1].xyzz, CONST[1].xyzz 3: RSQ TEMP[1].x, TEMP[1].xxxx 4: MUL TEMP[1].xyz, CONST[1].xyzz, TEMP[1].xxxx 5: ADD TEMP[2].xyz, CONST[0].xyzz, -IN[4].xyzz 6: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz 7: RSQ TEMP[3].x, TEMP[3].xxxx 8: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx 9: ABS TEMP[3].xyz, IN[3].yzww 10: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz 11: RSQ TEMP[4].x, TEMP[4].xxxx 12: MAD TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx, IMM[0].xxxx 13: MUL TEMP[3].xyz, TEMP[3].xyzz, IMM[0].yyyy 14: MAX TEMP[3].xyz, TEMP[3].xyzz, IMM[0].zzzz 15: ADD TEMP[4].x, TEMP[3].xxxx, TEMP[3].yyyy 16: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[3].zzzz 17: RCP TEMP[4].xyz, TEMP[4].xxxx 18: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xyzz 19: ADD TEMP[4], IN[0], IMM[0].wwww 20: FLR TEMP[4].xyz, TEMP[4] 21: MOV TEMP[5].x, CONST[16].xxxx 22: MUL TEMP[6].x, TEMP[4].xxxx, CONST[16].xxxx 23: MOV TEMP[7].x, TEMP[6].xxxx 24: FLR TEMP[6].x, TEMP[6].xxxx 25: MUL TEMP[6].x, TEMP[6].xxxx, CONST[16].xxxx 26: FSGE TEMP[8].x, TEMP[4].xxxx, IMM[1].xxxx 27: UIF TEMP[8].xxxx :0 28: MOV TEMP[5].x, CONST[17].xxxx 29: ADD TEMP[8].x, TEMP[4].xxxx, IMM[1].yyyy 30: MUL TEMP[8].x, TEMP[8].xxxx, CONST[17].xxxx 31: MOV TEMP[7].x, TEMP[8].xxxx 32: FLR TEMP[9].x, TEMP[8].xxxx 33: MUL TEMP[9].x, TEMP[9].xxxx, CONST[17].xxxx 34: MOV TEMP[6].x, TEMP[9].xxxx 35: FRC TEMP[8].x, TEMP[8].xxxx 36: FRC TEMP[10].x, TEMP[9].xxxx 37: MOV TEMP[8].y, TEMP[10].xxxx 38: FLR TEMP[9].x, TEMP[9].xxxx 39: ADD TEMP[9].x, TEMP[9].xxxx, IMM[1].zzzz 40: MOV TEMP[8].z, TEMP[9].xxxx 41: MOV TEMP[8].xyz, TEMP[8].xyzx 42: ELSE :0 43: FRC TEMP[7].x, TEMP[7].xxxx 44: FRC TEMP[9].x, TEMP[6].xxxx 45: MOV TEMP[7].y, TEMP[9].xxxx 46: FLR TEMP[6].x, TEMP[6].xxxx 47: MOV TEMP[7].z, TEMP[6].xxxx 48: MOV TEMP[8].xyz, TEMP[7].xyzx 49: ENDIF 50: MOV TEMP[6].x, CONST[16].xxxx 51: MUL TEMP[7].x, TEMP[4].yyyy, CONST[16].xxxx 52: MOV TEMP[9].x, TEMP[7].xxxx 53: FLR TEMP[7].x, TEMP[7].xxxx 54: MUL TEMP[7].x, TEMP[7].xxxx, CONST[16].xxxx 55: FSGE TEMP[10].x, TEMP[4].yyyy, IMM[1].xxxx 56: UIF TEMP[10].xxxx :0 57: MOV TEMP[6].x, CONST[17].xxxx 58: ADD TEMP[10].x, TEMP[4].yyyy, IMM[1].yyyy 59: MUL TEMP[10].x, TEMP[10].xxxx, CONST[17].xxxx 60: MOV TEMP[9].x, TEMP[10].xxxx 61: FLR TEMP[11].x, TEMP[10].xxxx 62: MUL TEMP[11].x, TEMP[11].xxxx, CONST[17].xxxx 63: MOV TEMP[7].x, TEMP[11].xxxx 64: FRC TEMP[10].x, TEMP[10].xxxx 65: FRC TEMP[12].x, TEMP[11].xxxx 66: MOV TEMP[10].y, TEMP[12].xxxx 67: FLR TEMP[11].x, TEMP[11].xxxx 68: ADD TEMP[11].x, TEMP[11].xxxx, IMM[1].zzzz 69: MOV TEMP[10].z, TEMP[11].xxxx 70: MOV TEMP[10].xyz, TEMP[10].xyzx 71: ELSE :0 72: FRC TEMP[9].x, TEMP[9].xxxx 73: FRC TEMP[11].x, TEMP[7].xxxx 74: MOV TEMP[9].y, TEMP[11].xxxx 75: FLR TEMP[7].x, TEMP[7].xxxx 76: MOV TEMP[9].z, TEMP[7].xxxx 77: MOV TEMP[10].xyz, TEMP[9].xyzx 78: ENDIF 79: MOV TEMP[7].x, CONST[16].xxxx 80: MUL TEMP[9].x, TEMP[4].zzzz, CONST[16].xxxx 81: MOV TEMP[11].x, TEMP[9].xxxx 82: FLR TEMP[9].x, TEMP[9].xxxx 83: MUL TEMP[9].x, TEMP[9].xxxx, CONST[16].xxxx 84: FSGE TEMP[12].x, TEMP[4].zzzz, IMM[1].xxxx 85: UIF TEMP[12].xxxx :0 86: MOV TEMP[7].x, CONST[17].xxxx 87: ADD TEMP[4].x, TEMP[4].zzzz, IMM[1].yyyy 88: MUL TEMP[4].x, TEMP[4].xxxx, CONST[17].xxxx 89: MOV TEMP[11].x, TEMP[4].xxxx 90: FLR TEMP[12].x, TEMP[4].xxxx 91: MUL TEMP[12].x, TEMP[12].xxxx, CONST[17].xxxx 92: MOV TEMP[9].x, TEMP[12].xxxx 93: FRC TEMP[4].x, TEMP[4].xxxx 94: FRC TEMP[13].x, TEMP[12].xxxx 95: MOV TEMP[4].y, TEMP[13].xxxx 96: FLR TEMP[12].x, TEMP[12].xxxx 97: ADD TEMP[12].x, TEMP[12].xxxx, IMM[1].zzzz 98: MOV TEMP[4].z, TEMP[12].xxxx 99: MOV TEMP[4].xyz, TEMP[4].xyzx 100: ELSE :0 101: FRC TEMP[11].x, TEMP[11].xxxx 102: FRC TEMP[12].x, TEMP[9].xxxx 103: MOV TEMP[11].y, TEMP[12].xxxx 104: FLR TEMP[9].x, TEMP[9].xxxx 105: MOV TEMP[11].z, TEMP[9].xxxx 106: MOV TEMP[4].xyz, TEMP[11].xyzx 107: ENDIF 108: ADD TEMP[9].xyz, IN[4].xyzz, -CONST[0].xyzz 109: DP3 TEMP[9].x, TEMP[9].xyzz, TEMP[9].xyzz 110: MUL TEMP[9].x, CONST[22].xxxx, TEMP[9].xxxx 111: LG2 TEMP[9].x, TEMP[9].xxxx 112: MUL TEMP[9].x, TEMP[9].xxxx, IMM[1].wwww 113: MUL TEMP[9].x, TEMP[9].xxxx, CONST[21].xxxx 114: MOV TEMP[11].xy, IN[4].xyxx 115: MOV TEMP[12].x, IMM[2].xxxx 116: FSNE TEMP[13].x, CONST[16].xxxx, TEMP[5].xxxx 117: UIF TEMP[13].xxxx :0 118: MOV TEMP[12].x, IMM[2].yyyy 119: RCP TEMP[13].x, CONST[19].xxxx 120: MUL TEMP[11].xy, IN[4].xyyy, TEMP[13].xxxx 121: ELSE :0 122: RCP TEMP[13].x, CONST[18].xxxx 123: MUL TEMP[11].xy, TEMP[11].xyyy, TEMP[13].xxxx 124: ENDIF 125: FRC TEMP[11].xy, TEMP[11].xyyy 126: MUL TEMP[13].x, CONST[20].xxxx, IMM[2].wwww 127: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[12].xxxx 128: ADD TEMP[13].x, IMM[2].zzzz, -TEMP[13].xxxx 129: MUL TEMP[12].x, TEMP[12].xxxx, CONST[20].xxxx 130: MAD TEMP[11].xy, TEMP[11].xyyy, TEMP[13].xxxx, TEMP[12].xxxx 131: MAD TEMP[11].xy, TEMP[11].xyyy, TEMP[5].xxxx, TEMP[8].xyyy 132: MOV TEMP[12].xy, TEMP[11].xyyy 133: MOV TEMP[12].w, TEMP[9].xxxx 134: TXL TEMP[12], TEMP[12], SAMP[9], 2D 135: FSEQ TEMP[13].x, TEMP[8].zzzz, IMM[1].zzzz 136: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz 137: MOV TEMP[14].xy, TEMP[11].xyyy 138: MOV TEMP[14].w, TEMP[9].xxxx 139: TXL TEMP[14], TEMP[14], SAMP[7], 2D 140: FSEQ TEMP[15].x, TEMP[8].zzzz, IMM[3].xxxx 141: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz 142: MOV TEMP[16].xy, TEMP[11].xyyy 143: MOV TEMP[16].w, TEMP[9].xxxx 144: TXL TEMP[16], TEMP[16], SAMP[5], 2D 145: FSEQ TEMP[17].x, TEMP[8].zzzz, IMM[2].wwww 146: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz 147: MOV TEMP[18].xy, TEMP[11].xyyy 148: MOV TEMP[18].w, TEMP[9].xxxx 149: TXL TEMP[18], TEMP[18], SAMP[3], 2D 150: FSEQ TEMP[19].x, TEMP[8].zzzz, IMM[2].zzzz 151: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz 152: MOV TEMP[11].xy, TEMP[11].xyyy 153: MOV TEMP[11].w, TEMP[9].xxxx 154: TXL TEMP[11], TEMP[11], SAMP[1], 2D 155: FSEQ TEMP[20].x, TEMP[8].zzzz, IMM[3].yyyy 156: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz 157: MUL TEMP[11], TEMP[11], TEMP[20].xxxx 158: MAD TEMP[11], TEMP[18], TEMP[19].xxxx, TEMP[11] 159: MAD TEMP[11], TEMP[16], TEMP[17].xxxx, TEMP[11] 160: MAD TEMP[11], TEMP[14], TEMP[15].xxxx, TEMP[11] 161: MAD TEMP[11], TEMP[12], TEMP[13].xxxx, TEMP[11] 162: MOV TEMP[12].xy, IN[4].zyzz 163: MOV TEMP[13].x, IMM[2].xxxx 164: FSNE TEMP[14].x, CONST[16].xxxx, TEMP[5].xxxx 165: UIF TEMP[14].xxxx :0 166: MOV TEMP[13].x, IMM[2].yyyy 167: RCP TEMP[14].x, CONST[19].xxxx 168: MUL TEMP[12].xy, IN[4].zyyy, TEMP[14].xxxx 169: ELSE :0 170: RCP TEMP[14].x, CONST[18].xxxx 171: MUL TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx 172: ENDIF 173: FRC TEMP[12].xy, TEMP[12].xyyy 174: MUL TEMP[14].x, CONST[20].xxxx, IMM[2].wwww 175: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[13].xxxx 176: ADD TEMP[14].x, IMM[2].zzzz, -TEMP[14].xxxx 177: MUL TEMP[13].x, TEMP[13].xxxx, CONST[20].xxxx 178: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx, TEMP[13].xxxx 179: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[5].xxxx, TEMP[8].xyyy 180: MOV TEMP[13].xy, TEMP[12].xyyy 181: MOV TEMP[13].w, TEMP[9].xxxx 182: TXL TEMP[13], TEMP[13], SAMP[9], 2D 183: FSEQ TEMP[14].x, TEMP[8].zzzz, IMM[1].zzzz 184: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz 185: MOV TEMP[15].xy, TEMP[12].xyyy 186: MOV TEMP[15].w, TEMP[9].xxxx 187: TXL TEMP[15], TEMP[15], SAMP[7], 2D 188: FSEQ TEMP[16].x, TEMP[8].zzzz, IMM[3].xxxx 189: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz 190: MOV TEMP[17].xy, TEMP[12].xyyy 191: MOV TEMP[17].w, TEMP[9].xxxx 192: TXL TEMP[17], TEMP[17], SAMP[5], 2D 193: FSEQ TEMP[18].x, TEMP[8].zzzz, IMM[2].wwww 194: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz 195: MOV TEMP[19].xy, TEMP[12].xyyy 196: MOV TEMP[19].w, TEMP[9].xxxx 197: TXL TEMP[19], TEMP[19], SAMP[3], 2D 198: FSEQ TEMP[20].x, TEMP[8].zzzz, IMM[2].zzzz 199: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz 200: MOV TEMP[12].xy, TEMP[12].xyyy 201: MOV TEMP[12].w, TEMP[9].xxxx 202: TXL TEMP[12], TEMP[12], SAMP[1], 2D 203: FSEQ TEMP[21].x, TEMP[8].zzzz, IMM[3].yyyy 204: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz 205: MUL TEMP[12], TEMP[12], TEMP[21].xxxx 206: MAD TEMP[12], TEMP[19], TEMP[20].xxxx, TEMP[12] 207: MAD TEMP[12], TEMP[17], TEMP[18].xxxx, TEMP[12] 208: MAD TEMP[12], TEMP[15], TEMP[16].xxxx, TEMP[12] 209: MAD TEMP[12], TEMP[13], TEMP[14].xxxx, TEMP[12] 210: MOV TEMP[13].xy, IN[4].zxzz 211: MOV TEMP[14].x, IMM[2].xxxx 212: FSNE TEMP[15].x, CONST[16].xxxx, TEMP[5].xxxx 213: UIF TEMP[15].xxxx :0 214: MOV TEMP[14].x, IMM[2].yyyy 215: RCP TEMP[15].x, CONST[19].xxxx 216: MUL TEMP[13].xy, IN[4].zxxx, TEMP[15].xxxx 217: ELSE :0 218: RCP TEMP[15].x, CONST[18].xxxx 219: MUL TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx 220: ENDIF 221: FRC TEMP[13].xy, TEMP[13].xyyy 222: MUL TEMP[15].x, CONST[20].xxxx, IMM[2].wwww 223: MUL TEMP[15].x, TEMP[15].xxxx, TEMP[14].xxxx 224: ADD TEMP[15].x, IMM[2].zzzz, -TEMP[15].xxxx 225: MUL TEMP[14].x, TEMP[14].xxxx, CONST[20].xxxx 226: MAD TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx, TEMP[14].xxxx 227: MAD TEMP[13].xy, TEMP[13].xyyy, TEMP[5].xxxx, TEMP[8].xyyy 228: MOV TEMP[14].xy, TEMP[13].xyyy 229: MOV TEMP[14].w, TEMP[9].xxxx 230: TXL TEMP[14], TEMP[14], SAMP[9], 2D 231: FSEQ TEMP[15].x, TEMP[8].zzzz, IMM[1].zzzz 232: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz 233: MOV TEMP[16].xy, TEMP[13].xyyy 234: MOV TEMP[16].w, TEMP[9].xxxx 235: TXL TEMP[16], TEMP[16], SAMP[7], 2D 236: FSEQ TEMP[17].x, TEMP[8].zzzz, IMM[3].xxxx 237: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz 238: MOV TEMP[18].xy, TEMP[13].xyyy 239: MOV TEMP[18].w, TEMP[9].xxxx 240: TXL TEMP[18], TEMP[18], SAMP[5], 2D 241: FSEQ TEMP[19].x, TEMP[8].zzzz, IMM[2].wwww 242: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz 243: MOV TEMP[20].xy, TEMP[13].xyyy 244: MOV TEMP[20].w, TEMP[9].xxxx 245: TXL TEMP[20], TEMP[20], SAMP[3], 2D 246: FSEQ TEMP[21].x, TEMP[8].zzzz, IMM[2].zzzz 247: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz 248: MOV TEMP[13].xy, TEMP[13].xyyy 249: MOV TEMP[13].w, TEMP[9].xxxx 250: TXL TEMP[13], TEMP[13], SAMP[1], 2D 251: FSEQ TEMP[22].x, TEMP[8].zzzz, IMM[3].yyyy 252: AND TEMP[22].x, TEMP[22].xxxx, IMM[2].zzzz 253: MUL TEMP[13], TEMP[13], TEMP[22].xxxx 254: MAD TEMP[13], TEMP[20], TEMP[21].xxxx, TEMP[13] 255: MAD TEMP[13], TEMP[18], TEMP[19].xxxx, TEMP[13] 256: MAD TEMP[13], TEMP[16], TEMP[17].xxxx, TEMP[13] 257: MAD TEMP[13], TEMP[14], TEMP[15].xxxx, TEMP[13] 258: MOV TEMP[14].xy, IN[4].xyxx 259: MOV TEMP[15].x, IMM[2].xxxx 260: FSNE TEMP[16].x, CONST[16].xxxx, TEMP[6].xxxx 261: UIF TEMP[16].xxxx :0 262: MOV TEMP[15].x, IMM[2].yyyy 263: RCP TEMP[16].x, CONST[19].xxxx 264: MUL TEMP[14].xy, IN[4].xyyy, TEMP[16].xxxx 265: ELSE :0 266: RCP TEMP[16].x, CONST[18].xxxx 267: MUL TEMP[14].xy, TEMP[14].xyyy, TEMP[16].xxxx 268: ENDIF 269: FRC TEMP[14].xy, TEMP[14].xyyy 270: MUL TEMP[16].x, CONST[20].xxxx, IMM[2].wwww 271: MUL TEMP[16].x, TEMP[16].xxxx, TEMP[15].xxxx 272: ADD TEMP[16].x, IMM[2].zzzz, -TEMP[16].xxxx 273: MUL TEMP[15].x, TEMP[15].xxxx, CONST[20].xxxx 274: MAD TEMP[14].xy, TEMP[14].xyyy, TEMP[16].xxxx, TEMP[15].xxxx 275: MAD TEMP[14].xy, TEMP[14].xyyy, TEMP[6].xxxx, TEMP[10].xyyy 276: MOV TEMP[15].xy, TEMP[14].xyyy 277: MOV TEMP[15].w, TEMP[9].xxxx 278: TXL TEMP[15], TEMP[15], SAMP[9], 2D 279: FSEQ TEMP[16].x, TEMP[10].zzzz, IMM[1].zzzz 280: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz 281: MOV TEMP[17].xy, TEMP[14].xyyy 282: MOV TEMP[17].w, TEMP[9].xxxx 283: TXL TEMP[17], TEMP[17], SAMP[7], 2D 284: FSEQ TEMP[18].x, TEMP[10].zzzz, IMM[3].xxxx 285: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz 286: MOV TEMP[19].xy, TEMP[14].xyyy 287: MOV TEMP[19].w, TEMP[9].xxxx 288: TXL TEMP[19], TEMP[19], SAMP[5], 2D 289: FSEQ TEMP[20].x, TEMP[10].zzzz, IMM[2].wwww 290: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz 291: MOV TEMP[21].xy, TEMP[14].xyyy 292: MOV TEMP[21].w, TEMP[9].xxxx 293: TXL TEMP[21], TEMP[21], SAMP[3], 2D 294: FSEQ TEMP[22].x, TEMP[10].zzzz, IMM[2].zzzz 295: AND TEMP[22].x, TEMP[22].xxxx, IMM[2].zzzz 296: MOV TEMP[14].xy, TEMP[14].xyyy 297: MOV TEMP[14].w, TEMP[9].xxxx 298: TXL TEMP[14], TEMP[14], SAMP[1], 2D 299: FSEQ TEMP[23].x, TEMP[10].zzzz, IMM[3].yyyy 300: AND TEMP[23].x, TEMP[23].xxxx, IMM[2].zzzz 301: MUL TEMP[14], TEMP[14], TEMP[23].xxxx 302: MAD TEMP[14], TEMP[21], TEMP[22].xxxx, TEMP[14] 303: MAD TEMP[14], TEMP[19], TEMP[20].xxxx, TEMP[14] 304: MAD TEMP[14], TEMP[17], TEMP[18].xxxx, TEMP[14] 305: MAD TEMP[14], TEMP[15], TEMP[16].xxxx, TEMP[14] 306: MOV TEMP[15].xy, IN[4].zyzz 307: MOV TEMP[16].x, IMM[2].xxxx 308: FSNE TEMP[17].x, CONST[16].xxxx, TEMP[6].xxxx 309: UIF TEMP[17].xxxx :0 310: MOV TEMP[16].x, IMM[2].yyyy 311: RCP TEMP[17].x, CONST[19].xxxx 312: MUL TEMP[15].xy, IN[4].zyyy, TEMP[17].xxxx 313: ELSE :0 314: RCP TEMP[17].x, CONST[18].xxxx 315: MUL TEMP[15].xy, TEMP[15].xyyy, TEMP[17].xxxx 316: ENDIF 317: FRC TEMP[15].xy, TEMP[15].xyyy 318: MUL TEMP[17].x, CONST[20].xxxx, IMM[2].wwww 319: MUL TEMP[17].x, TEMP[17].xxxx, TEMP[16].xxxx 320: ADD TEMP[17].x, IMM[2].zzzz, -TEMP[17].xxxx 321: MUL TEMP[16].x, TEMP[16].xxxx, CONST[20].xxxx 322: MAD TEMP[15].xy, TEMP[15].xyyy, TEMP[17].xxxx, TEMP[16].xxxx 323: MAD TEMP[15].xy, TEMP[15].xyyy, TEMP[6].xxxx, TEMP[10].xyyy 324: MOV TEMP[16].xy, TEMP[15].xyyy 325: MOV TEMP[16].w, TEMP[9].xxxx 326: TXL TEMP[16], TEMP[16], SAMP[9], 2D 327: FSEQ TEMP[17].x, TEMP[10].zzzz, IMM[1].zzzz 328: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz 329: MOV TEMP[18].xy, TEMP[15].xyyy 330: MOV TEMP[18].w, TEMP[9].xxxx 331: TXL TEMP[18], TEMP[18], SAMP[7], 2D 332: FSEQ TEMP[19].x, TEMP[10].zzzz, IMM[3].xxxx 333: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz 334: MOV TEMP[20].xy, TEMP[15].xyyy 335: MOV TEMP[20].w, TEMP[9].xxxx 336: TXL TEMP[20], TEMP[20], SAMP[5], 2D 337: FSEQ TEMP[21].x, TEMP[10].zzzz, IMM[2].wwww 338: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz 339: MOV TEMP[22].xy, TEMP[15].xyyy 340: MOV TEMP[22].w, TEMP[9].xxxx 341: TXL TEMP[22], TEMP[22], SAMP[3], 2D 342: FSEQ TEMP[23].x, TEMP[10].zzzz, IMM[2].zzzz 343: AND TEMP[23].x, TEMP[23].xxxx, IMM[2].zzzz 344: MOV TEMP[15].xy, TEMP[15].xyyy 345: MOV TEMP[15].w, TEMP[9].xxxx 346: TXL TEMP[15], TEMP[15], SAMP[1], 2D 347: FSEQ TEMP[24].x, TEMP[10].zzzz, IMM[3].yyyy 348: AND TEMP[24].x, TEMP[24].xxxx, IMM[2].zzzz 349: MUL TEMP[15], TEMP[15], TEMP[24].xxxx 350: MAD TEMP[15], TEMP[22], TEMP[23].xxxx, TEMP[15] 351: MAD TEMP[15], TEMP[20], TEMP[21].xxxx, TEMP[15] 352: MAD TEMP[15], TEMP[18], TEMP[19].xxxx, TEMP[15] 353: MAD TEMP[15], TEMP[16], TEMP[17].xxxx, TEMP[15] 354: MOV TEMP[16].xy, IN[4].zxzz 355: MOV TEMP[17].x, IMM[2].xxxx 356: FSNE TEMP[18].x, CONST[16].xxxx, TEMP[6].xxxx 357: UIF TEMP[18].xxxx :0 358: MOV TEMP[17].x, IMM[2].yyyy 359: RCP TEMP[18].x, CONST[19].xxxx 360: MUL TEMP[16].xy, IN[4].zxxx, TEMP[18].xxxx 361: ELSE :0 362: RCP TEMP[18].x, CONST[18].xxxx 363: MUL TEMP[16].xy, TEMP[16].xyyy, TEMP[18].xxxx 364: ENDIF 365: FRC TEMP[16].xy, TEMP[16].xyyy 366: MUL TEMP[18].x, CONST[20].xxxx, IMM[2].wwww 367: MUL TEMP[18].x, TEMP[18].xxxx, TEMP[17].xxxx 368: ADD TEMP[18].x, IMM[2].zzzz, -TEMP[18].xxxx 369: MUL TEMP[17].x, TEMP[17].xxxx, CONST[20].xxxx 370: MAD TEMP[16].xy, TEMP[16].xyyy, TEMP[18].xxxx, TEMP[17].xxxx 371: MAD TEMP[16].xy, TEMP[16].xyyy, TEMP[6].xxxx, TEMP[10].xyyy 372: MOV TEMP[17].xy, TEMP[16].xyyy 373: MOV TEMP[17].w, TEMP[9].xxxx 374: TXL TEMP[17], TEMP[17], SAMP[9], 2D 375: FSEQ TEMP[18].x, TEMP[10].zzzz, IMM[1].zzzz 376: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz 377: MOV TEMP[19].xy, TEMP[16].xyyy 378: MOV TEMP[19].w, TEMP[9].xxxx 379: TXL TEMP[19], TEMP[19], SAMP[7], 2D 380: FSEQ TEMP[20].x, TEMP[10].zzzz, IMM[3].xxxx 381: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz 382: MOV TEMP[21].xy, TEMP[16].xyyy 383: MOV TEMP[21].w, TEMP[9].xxxx 384: TXL TEMP[21], TEMP[21], SAMP[5], 2D 385: FSEQ TEMP[22].x, TEMP[10].zzzz, IMM[2].wwww 386: AND TEMP[22].x, TEMP[22].xxxx, IMM[2].zzzz 387: MOV TEMP[23].xy, TEMP[16].xyyy 388: MOV TEMP[23].w, TEMP[9].xxxx 389: TXL TEMP[23], TEMP[23], SAMP[3], 2D 390: FSEQ TEMP[24].x, TEMP[10].zzzz, IMM[2].zzzz 391: AND TEMP[24].x, TEMP[24].xxxx, IMM[2].zzzz 392: MOV TEMP[16].xy, TEMP[16].xyyy 393: MOV TEMP[16].w, TEMP[9].xxxx 394: TXL TEMP[16], TEMP[16], SAMP[1], 2D 395: FSEQ TEMP[25].x, TEMP[10].zzzz, IMM[3].yyyy 396: AND TEMP[25].x, TEMP[25].xxxx, IMM[2].zzzz 397: MUL TEMP[16], TEMP[16], TEMP[25].xxxx 398: MAD TEMP[16], TEMP[23], TEMP[24].xxxx, TEMP[16] 399: MAD TEMP[16], TEMP[21], TEMP[22].xxxx, TEMP[16] 400: MAD TEMP[16], TEMP[19], TEMP[20].xxxx, TEMP[16] 401: MAD TEMP[16], TEMP[17], TEMP[18].xxxx, TEMP[16] 402: MOV TEMP[17].xy, IN[4].xyxx 403: MOV TEMP[18].x, IMM[2].xxxx 404: FSNE TEMP[19].x, CONST[16].xxxx, TEMP[7].xxxx 405: UIF TEMP[19].xxxx :0 406: MOV TEMP[18].x, IMM[2].yyyy 407: RCP TEMP[19].x, CONST[19].xxxx 408: MUL TEMP[17].xy, IN[4].xyyy, TEMP[19].xxxx 409: ELSE :0 410: RCP TEMP[19].x, CONST[18].xxxx 411: MUL TEMP[17].xy, TEMP[17].xyyy, TEMP[19].xxxx 412: ENDIF 413: FRC TEMP[17].xy, TEMP[17].xyyy 414: MUL TEMP[19].x, CONST[20].xxxx, IMM[2].wwww 415: MUL TEMP[19].x, TEMP[19].xxxx, TEMP[18].xxxx 416: ADD TEMP[19].x, IMM[2].zzzz, -TEMP[19].xxxx 417: MUL TEMP[18].x, TEMP[18].xxxx, CONST[20].xxxx 418: MAD TEMP[17].xy, TEMP[17].xyyy, TEMP[19].xxxx, TEMP[18].xxxx 419: MAD TEMP[17].xy, TEMP[17].xyyy, TEMP[7].xxxx, TEMP[4].xyyy 420: MOV TEMP[18].xy, TEMP[17].xyyy 421: MOV TEMP[18].w, TEMP[9].xxxx 422: TXL TEMP[18], TEMP[18], SAMP[9], 2D 423: FSEQ TEMP[19].x, TEMP[4].zzzz, IMM[1].zzzz 424: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz 425: MOV TEMP[20].xy, TEMP[17].xyyy 426: MOV TEMP[20].w, TEMP[9].xxxx 427: TXL TEMP[20], TEMP[20], SAMP[7], 2D 428: FSEQ TEMP[21].x, TEMP[4].zzzz, IMM[3].xxxx 429: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz 430: MOV TEMP[22].xy, TEMP[17].xyyy 431: MOV TEMP[22].w, TEMP[9].xxxx 432: TXL TEMP[22], TEMP[22], SAMP[5], 2D 433: FSEQ TEMP[23].x, TEMP[4].zzzz, IMM[2].wwww 434: AND TEMP[23].x, TEMP[23].xxxx, IMM[2].zzzz 435: MOV TEMP[24].xy, TEMP[17].xyyy 436: MOV TEMP[24].w, TEMP[9].xxxx 437: TXL TEMP[24], TEMP[24], SAMP[3], 2D 438: FSEQ TEMP[25].x, TEMP[4].zzzz, IMM[2].zzzz 439: AND TEMP[25].x, TEMP[25].xxxx, IMM[2].zzzz 440: MOV TEMP[17].xy, TEMP[17].xyyy 441: MOV TEMP[17].w, TEMP[9].xxxx 442: TXL TEMP[17], TEMP[17], SAMP[1], 2D 443: FSEQ TEMP[26].x, TEMP[4].zzzz, IMM[3].yyyy 444: AND TEMP[26].x, TEMP[26].xxxx, IMM[2].zzzz 445: MUL TEMP[17], TEMP[17], TEMP[26].xxxx 446: MAD TEMP[17], TEMP[24], TEMP[25].xxxx, TEMP[17] 447: MAD TEMP[17], TEMP[22], TEMP[23].xxxx, TEMP[17] 448: MAD TEMP[17], TEMP[20], TEMP[21].xxxx, TEMP[17] 449: MAD TEMP[17], TEMP[18], TEMP[19].xxxx, TEMP[17] 450: MOV TEMP[18].xy, IN[4].zyzz 451: MOV TEMP[19].x, IMM[2].xxxx 452: FSNE TEMP[20].x, CONST[16].xxxx, TEMP[7].xxxx 453: UIF TEMP[20].xxxx :0 454: MOV TEMP[19].x, IMM[2].yyyy 455: RCP TEMP[20].x, CONST[19].xxxx 456: MUL TEMP[18].xy, IN[4].zyyy, TEMP[20].xxxx 457: ELSE :0 458: RCP TEMP[20].x, CONST[18].xxxx 459: MUL TEMP[18].xy, TEMP[18].xyyy, TEMP[20].xxxx 460: ENDIF 461: FRC TEMP[18].xy, TEMP[18].xyyy 462: MUL TEMP[20].x, CONST[20].xxxx, IMM[2].wwww 463: MUL TEMP[20].x, TEMP[20].xxxx, TEMP[19].xxxx 464: ADD TEMP[20].x, IMM[2].zzzz, -TEMP[20].xxxx 465: MUL TEMP[19].x, TEMP[19].xxxx, CONST[20].xxxx 466: MAD TEMP[18].xy, TEMP[18].xyyy, TEMP[20].xxxx, TEMP[19].xxxx 467: MAD TEMP[18].xy, TEMP[18].xyyy, TEMP[7].xxxx, TEMP[4].xyyy 468: MOV TEMP[19].xy, TEMP[18].xyyy 469: MOV TEMP[19].w, TEMP[9].xxxx 470: TXL TEMP[19], TEMP[19], SAMP[9], 2D 471: FSEQ TEMP[20].x, TEMP[4].zzzz, IMM[1].zzzz 472: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz 473: MOV TEMP[21].xy, TEMP[18].xyyy 474: MOV TEMP[21].w, TEMP[9].xxxx 475: TXL TEMP[21], TEMP[21], SAMP[7], 2D 476: FSEQ TEMP[22].x, TEMP[4].zzzz, IMM[3].xxxx 477: AND TEMP[22].x, TEMP[22].xxxx, IMM[2].zzzz 478: MOV TEMP[23].xy, TEMP[18].xyyy 479: MOV TEMP[23].w, TEMP[9].xxxx 480: TXL TEMP[23], TEMP[23], SAMP[5], 2D 481: FSEQ TEMP[24].x, TEMP[4].zzzz, IMM[2].wwww 482: AND TEMP[24].x, TEMP[24].xxxx, IMM[2].zzzz 483: MOV TEMP[25].xy, TEMP[18].xyyy 484: MOV TEMP[25].w, TEMP[9].xxxx 485: TXL TEMP[25], TEMP[25], SAMP[3], 2D 486: FSEQ TEMP[26].x, TEMP[4].zzzz, IMM[2].zzzz 487: AND TEMP[26].x, TEMP[26].xxxx, IMM[2].zzzz 488: MOV TEMP[18].xy, TEMP[18].xyyy 489: MOV TEMP[18].w, TEMP[9].xxxx 490: TXL TEMP[18], TEMP[18], SAMP[1], 2D 491: FSEQ TEMP[27].x, TEMP[4].zzzz, IMM[3].yyyy 492: AND TEMP[27].x, TEMP[27].xxxx, IMM[2].zzzz 493: MUL TEMP[18], TEMP[18], TEMP[27].xxxx 494: MAD TEMP[18], TEMP[25], TEMP[26].xxxx, TEMP[18] 495: MAD TEMP[18], TEMP[23], TEMP[24].xxxx, TEMP[18] 496: MAD TEMP[18], TEMP[21], TEMP[22].xxxx, TEMP[18] 497: MAD TEMP[18], TEMP[19], TEMP[20].xxxx, TEMP[18] 498: MOV TEMP[19].xy, IN[4].zxzz 499: MOV TEMP[20].x, IMM[2].xxxx 500: FSNE TEMP[21].x, CONST[16].xxxx, TEMP[7].xxxx 501: UIF TEMP[21].xxxx :0 502: MOV TEMP[20].x, IMM[2].yyyy 503: RCP TEMP[21].x, CONST[19].xxxx 504: MUL TEMP[19].xy, IN[4].zxxx, TEMP[21].xxxx 505: ELSE :0 506: RCP TEMP[21].x, CONST[18].xxxx 507: MUL TEMP[19].xy, TEMP[19].xyyy, TEMP[21].xxxx 508: ENDIF 509: FRC TEMP[19].xy, TEMP[19].xyyy 510: MUL TEMP[21].x, CONST[20].xxxx, IMM[2].wwww 511: MUL TEMP[21].x, TEMP[21].xxxx, TEMP[20].xxxx 512: ADD TEMP[21].x, IMM[2].zzzz, -TEMP[21].xxxx 513: MUL TEMP[20].x, TEMP[20].xxxx, CONST[20].xxxx 514: MAD TEMP[19].xy, TEMP[19].xyyy, TEMP[21].xxxx, TEMP[20].xxxx 515: MAD TEMP[19].xy, TEMP[19].xyyy, TEMP[7].xxxx, TEMP[4].xyyy 516: MOV TEMP[20].xy, TEMP[19].xyyy 517: MOV TEMP[20].w, TEMP[9].xxxx 518: TXL TEMP[20], TEMP[20], SAMP[9], 2D 519: FSEQ TEMP[21].x, TEMP[4].zzzz, IMM[1].zzzz 520: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz 521: MOV TEMP[22].xy, TEMP[19].xyyy 522: MOV TEMP[22].w, TEMP[9].xxxx 523: TXL TEMP[22], TEMP[22], SAMP[7], 2D 524: FSEQ TEMP[23].x, TEMP[4].zzzz, IMM[3].xxxx 525: AND TEMP[23].x, TEMP[23].xxxx, IMM[2].zzzz 526: MOV TEMP[24].xy, TEMP[19].xyyy 527: MOV TEMP[24].w, TEMP[9].xxxx 528: TXL TEMP[24], TEMP[24], SAMP[5], 2D 529: FSEQ TEMP[25].x, TEMP[4].zzzz, IMM[2].wwww 530: AND TEMP[25].x, TEMP[25].xxxx, IMM[2].zzzz 531: MOV TEMP[26].xy, TEMP[19].xyyy 532: MOV TEMP[26].w, TEMP[9].xxxx 533: TXL TEMP[26], TEMP[26], SAMP[3], 2D 534: FSEQ TEMP[27].x, TEMP[4].zzzz, IMM[2].zzzz 535: AND TEMP[27].x, TEMP[27].xxxx, IMM[2].zzzz 536: MOV TEMP[19].xy, TEMP[19].xyyy 537: MOV TEMP[19].w, TEMP[9].xxxx 538: TXL TEMP[19], TEMP[19], SAMP[1], 2D 539: FSEQ TEMP[28].x, TEMP[4].zzzz, IMM[3].yyyy 540: AND TEMP[28].x, TEMP[28].xxxx, IMM[2].zzzz 541: MUL TEMP[19], TEMP[19], TEMP[28].xxxx 542: MAD TEMP[19], TEMP[26], TEMP[27].xxxx, TEMP[19] 543: MAD TEMP[19], TEMP[24], TEMP[25].xxxx, TEMP[19] 544: MAD TEMP[19], TEMP[22], TEMP[23].xxxx, TEMP[19] 545: MAD TEMP[19], TEMP[20], TEMP[21].xxxx, TEMP[19] 546: MUL TEMP[17], TEMP[17], TEMP[3].zzzz 547: MAD TEMP[17], TEMP[18], TEMP[3].xxxx, TEMP[17] 548: MAD TEMP[17], TEMP[19], TEMP[3].yyyy, TEMP[17] 549: MUL TEMP[14], TEMP[14], TEMP[3].zzzz 550: MAD TEMP[14], TEMP[15], TEMP[3].xxxx, TEMP[14] 551: MAD TEMP[14], TEMP[16], TEMP[3].yyyy, TEMP[14] 552: MUL TEMP[11], TEMP[11], TEMP[3].zzzz 553: MAD TEMP[11], TEMP[12], TEMP[3].xxxx, TEMP[11] 554: MAD TEMP[11], TEMP[13], TEMP[3].yyyy, TEMP[11] 555: MUL TEMP[11], IN[1].xxxx, TEMP[11] 556: MAD TEMP[11], IN[1].yyyy, TEMP[14], TEMP[11] 557: MAD TEMP[11].xyz, IN[1].zzzz, TEMP[17], TEMP[11] 558: MOV TEMP[12].xy, IN[4].zyzz 559: MOV TEMP[13].x, IMM[2].xxxx 560: FSNE TEMP[14].x, CONST[16].xxxx, TEMP[5].xxxx 561: UIF TEMP[14].xxxx :0 562: MOV TEMP[13].x, IMM[2].yyyy 563: RCP TEMP[14].x, CONST[19].xxxx 564: MUL TEMP[12].xy, IN[4].zyyy, TEMP[14].xxxx 565: ELSE :0 566: RCP TEMP[14].x, CONST[18].xxxx 567: MUL TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx 568: ENDIF 569: FRC TEMP[12].xy, TEMP[12].xyyy 570: MUL TEMP[14].x, CONST[20].xxxx, IMM[2].wwww 571: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[13].xxxx 572: ADD TEMP[14].x, IMM[2].zzzz, -TEMP[14].xxxx 573: MUL TEMP[13].x, TEMP[13].xxxx, CONST[20].xxxx 574: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx, TEMP[13].xxxx 575: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[5].xxxx, TEMP[8].xyyy 576: MOV TEMP[13].xy, TEMP[12].xyyy 577: MOV TEMP[13].w, TEMP[9].xxxx 578: TXL TEMP[13], TEMP[13], SAMP[10], 2D 579: FSEQ TEMP[14].x, TEMP[8].zzzz, IMM[1].zzzz 580: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz 581: MOV TEMP[15].xy, TEMP[12].xyyy 582: MOV TEMP[15].w, TEMP[9].xxxx 583: TXL TEMP[15], TEMP[15], SAMP[8], 2D 584: FSEQ TEMP[16].x, TEMP[8].zzzz, IMM[3].xxxx 585: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz 586: MOV TEMP[17].xy, TEMP[12].xyyy 587: MOV TEMP[17].w, TEMP[9].xxxx 588: TXL TEMP[17], TEMP[17], SAMP[6], 2D 589: FSEQ TEMP[18].x, TEMP[8].zzzz, IMM[2].wwww 590: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz 591: MOV TEMP[19].xy, TEMP[12].xyyy 592: MOV TEMP[19].w, TEMP[9].xxxx 593: TXL TEMP[19], TEMP[19], SAMP[4], 2D 594: FSEQ TEMP[20].x, TEMP[8].zzzz, IMM[2].zzzz 595: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz 596: MOV TEMP[12].xy, TEMP[12].xyyy 597: MOV TEMP[12].w, TEMP[9].xxxx 598: TXL TEMP[12], TEMP[12], SAMP[2], 2D 599: FSEQ TEMP[21].x, TEMP[8].zzzz, IMM[3].yyyy 600: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz 601: MUL TEMP[12], TEMP[12], TEMP[21].xxxx 602: MAD TEMP[12], TEMP[19], TEMP[20].xxxx, TEMP[12] 603: MAD TEMP[12], TEMP[17], TEMP[18].xxxx, TEMP[12] 604: MAD TEMP[12], TEMP[15], TEMP[16].xxxx, TEMP[12] 605: MAD TEMP[12].yw, TEMP[13], TEMP[14].xxxx, TEMP[12] 606: MAD TEMP[12].xy, TEMP[12].wyyy, IMM[2].wwww, IMM[3].zzzz 607: DP2 TEMP[13].x, TEMP[12].xyyy, TEMP[12].xyyy 608: MOV_SAT TEMP[29].x, TEMP[13].xxxx 609: MOV TEMP[13].xy, IN[4].zxzz 610: MOV TEMP[14].x, IMM[2].xxxx 611: FSNE TEMP[15].x, CONST[16].xxxx, TEMP[5].xxxx 612: UIF TEMP[15].xxxx :0 613: MOV TEMP[14].x, IMM[2].yyyy 614: RCP TEMP[15].x, CONST[19].xxxx 615: MUL TEMP[13].xy, IN[4].zxxx, TEMP[15].xxxx 616: ELSE :0 617: RCP TEMP[15].x, CONST[18].xxxx 618: MUL TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx 619: ENDIF 620: FRC TEMP[13].xy, TEMP[13].xyyy 621: MUL TEMP[15].x, CONST[20].xxxx, IMM[2].wwww 622: MUL TEMP[15].x, TEMP[15].xxxx, TEMP[14].xxxx 623: ADD TEMP[15].x, IMM[2].zzzz, -TEMP[15].xxxx 624: MUL TEMP[14].x, TEMP[14].xxxx, CONST[20].xxxx 625: MAD TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx, TEMP[14].xxxx 626: MAD TEMP[13].xy, TEMP[13].xyyy, TEMP[5].xxxx, TEMP[8].xyyy 627: MOV TEMP[14].xy, TEMP[13].xyyy 628: MOV TEMP[14].w, TEMP[9].xxxx 629: TXL TEMP[14], TEMP[14], SAMP[10], 2D 630: FSEQ TEMP[15].x, TEMP[8].zzzz, IMM[1].zzzz 631: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz 632: MOV TEMP[16].xy, TEMP[13].xyyy 633: MOV TEMP[16].w, TEMP[9].xxxx 634: TXL TEMP[16], TEMP[16], SAMP[8], 2D 635: FSEQ TEMP[17].x, TEMP[8].zzzz, IMM[3].xxxx 636: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz 637: MOV TEMP[18].xy, TEMP[13].xyyy 638: MOV TEMP[18].w, TEMP[9].xxxx 639: TXL TEMP[18], TEMP[18], SAMP[6], 2D 640: FSEQ TEMP[19].x, TEMP[8].zzzz, IMM[2].wwww 641: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz 642: MOV TEMP[20].xy, TEMP[13].xyyy 643: MOV TEMP[20].w, TEMP[9].xxxx 644: TXL TEMP[20], TEMP[20], SAMP[4], 2D 645: FSEQ TEMP[21].x, TEMP[8].zzzz, IMM[2].zzzz 646: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz 647: MOV TEMP[13].xy, TEMP[13].xyyy 648: MOV TEMP[13].w, TEMP[9].xxxx 649: TXL TEMP[13], TEMP[13], SAMP[2], 2D 650: FSEQ TEMP[22].x, TEMP[8].zzzz, IMM[3].yyyy 651: AND TEMP[22].x, TEMP[22].xxxx, IMM[2].zzzz 652: MUL TEMP[13], TEMP[13], TEMP[22].xxxx 653: MAD TEMP[13], TEMP[20], TEMP[21].xxxx, TEMP[13] 654: MAD TEMP[13], TEMP[18], TEMP[19].xxxx, TEMP[13] 655: MAD TEMP[13], TEMP[16], TEMP[17].xxxx, TEMP[13] 656: MAD TEMP[13].yw, TEMP[14], TEMP[15].xxxx, TEMP[13] 657: MAD TEMP[13].xy, TEMP[13].wyyy, IMM[2].wwww, IMM[3].zzzz 658: DP2 TEMP[14].x, TEMP[13].xyyy, TEMP[13].xyyy 659: MOV_SAT TEMP[30].x, TEMP[14].xxxx 660: MOV TEMP[14].xy, IN[4].xyxx 661: MOV TEMP[15].x, IMM[2].xxxx 662: FSNE TEMP[16].x, CONST[16].xxxx, TEMP[5].xxxx 663: UIF TEMP[16].xxxx :0 664: MOV TEMP[15].x, IMM[2].yyyy 665: RCP TEMP[16].x, CONST[19].xxxx 666: MUL TEMP[14].xy, IN[4].xyyy, TEMP[16].xxxx 667: ELSE :0 668: RCP TEMP[16].x, CONST[18].xxxx 669: MUL TEMP[14].xy, TEMP[14].xyyy, TEMP[16].xxxx 670: ENDIF 671: FRC TEMP[14].xy, TEMP[14].xyyy 672: MUL TEMP[16].x, CONST[20].xxxx, IMM[2].wwww 673: MUL TEMP[16].x, TEMP[16].xxxx, TEMP[15].xxxx 674: ADD TEMP[16].x, IMM[2].zzzz, -TEMP[16].xxxx 675: MUL TEMP[15].x, TEMP[15].xxxx, CONST[20].xxxx 676: MAD TEMP[14].xy, TEMP[14].xyyy, TEMP[16].xxxx, TEMP[15].xxxx 677: MAD TEMP[5].xy, TEMP[14].xyyy, TEMP[5].xxxx, TEMP[8].xyyy 678: MOV TEMP[14].xy, TEMP[5].xyyy 679: MOV TEMP[14].w, TEMP[9].xxxx 680: TXL TEMP[14], TEMP[14], SAMP[10], 2D 681: FSEQ TEMP[15].x, TEMP[8].zzzz, IMM[1].zzzz 682: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz 683: MOV TEMP[16].xy, TEMP[5].xyyy 684: MOV TEMP[16].w, TEMP[9].xxxx 685: TXL TEMP[16], TEMP[16], SAMP[8], 2D 686: FSEQ TEMP[17].x, TEMP[8].zzzz, IMM[3].xxxx 687: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz 688: MOV TEMP[18].xy, TEMP[5].xyyy 689: MOV TEMP[18].w, TEMP[9].xxxx 690: TXL TEMP[18], TEMP[18], SAMP[6], 2D 691: FSEQ TEMP[19].x, TEMP[8].zzzz, IMM[2].wwww 692: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz 693: MOV TEMP[20].xy, TEMP[5].xyyy 694: MOV TEMP[20].w, TEMP[9].xxxx 695: TXL TEMP[20], TEMP[20], SAMP[4], 2D 696: FSEQ TEMP[21].x, TEMP[8].zzzz, IMM[2].zzzz 697: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz 698: MOV TEMP[5].xy, TEMP[5].xyyy 699: MOV TEMP[5].w, TEMP[9].xxxx 700: TXL TEMP[5], TEMP[5], SAMP[2], 2D 701: FSEQ TEMP[8].x, TEMP[8].zzzz, IMM[3].yyyy 702: AND TEMP[8].x, TEMP[8].xxxx, IMM[2].zzzz 703: MUL TEMP[5], TEMP[5], TEMP[8].xxxx 704: MAD TEMP[5], TEMP[20], TEMP[21].xxxx, TEMP[5] 705: MAD TEMP[5], TEMP[18], TEMP[19].xxxx, TEMP[5] 706: MAD TEMP[5], TEMP[16], TEMP[17].xxxx, TEMP[5] 707: MAD TEMP[5].yw, TEMP[14], TEMP[15].xxxx, TEMP[5] 708: MAD TEMP[5].xy, TEMP[5].wyyy, IMM[2].wwww, IMM[3].zzzz 709: DP2 TEMP[8].x, TEMP[5].xyyy, TEMP[5].xyyy 710: MOV_SAT TEMP[31].x, TEMP[8].xxxx 711: MOV TEMP[8].x, IMM[3].yyyy 712: MOV TEMP[8].y, TEMP[12].xxxx 713: MOV TEMP[8].z, TEMP[12].yyyy 714: MOV TEMP[12].y, IMM[3].yyyy 715: MOV TEMP[12].x, TEMP[13].yyyy 716: MOV TEMP[12].z, TEMP[13].xxxx 717: MOV TEMP[13].z, IMM[3].yyyy 718: MOV TEMP[13].xy, TEMP[5].xyxx 719: MUL TEMP[5].xyz, TEMP[8].xyzz, TEMP[3].xxxx 720: MAD TEMP[5].xyz, TEMP[12].xyzz, TEMP[3].yyyy, TEMP[5].xyzz 721: MAD TEMP[5].xyz, TEMP[13].xyzz, TEMP[3].zzzz, TEMP[5].xyzz 722: MOV TEMP[8].xy, IN[4].zyzz 723: MOV TEMP[12].x, IMM[2].xxxx 724: FSNE TEMP[13].x, CONST[16].xxxx, TEMP[6].xxxx 725: UIF TEMP[13].xxxx :0 726: MOV TEMP[12].x, IMM[2].yyyy 727: RCP TEMP[13].x, CONST[19].xxxx 728: MUL TEMP[8].xy, IN[4].zyyy, TEMP[13].xxxx 729: ELSE :0 730: RCP TEMP[13].x, CONST[18].xxxx 731: MUL TEMP[8].xy, TEMP[8].xyyy, TEMP[13].xxxx 732: ENDIF 733: FRC TEMP[8].xy, TEMP[8].xyyy 734: MUL TEMP[13].x, CONST[20].xxxx, IMM[2].wwww 735: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[12].xxxx 736: ADD TEMP[13].x, IMM[2].zzzz, -TEMP[13].xxxx 737: MUL TEMP[12].x, TEMP[12].xxxx, CONST[20].xxxx 738: MAD TEMP[8].xy, TEMP[8].xyyy, TEMP[13].xxxx, TEMP[12].xxxx 739: MAD TEMP[8].xy, TEMP[8].xyyy, TEMP[6].xxxx, TEMP[10].xyyy 740: MOV TEMP[12].xy, TEMP[8].xyyy 741: MOV TEMP[12].w, TEMP[9].xxxx 742: TXL TEMP[12], TEMP[12], SAMP[10], 2D 743: FSEQ TEMP[13].x, TEMP[10].zzzz, IMM[1].zzzz 744: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz 745: MOV TEMP[14].xy, TEMP[8].xyyy 746: MOV TEMP[14].w, TEMP[9].xxxx 747: TXL TEMP[14], TEMP[14], SAMP[8], 2D 748: FSEQ TEMP[15].x, TEMP[10].zzzz, IMM[3].xxxx 749: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz 750: MOV TEMP[16].xy, TEMP[8].xyyy 751: MOV TEMP[16].w, TEMP[9].xxxx 752: TXL TEMP[16], TEMP[16], SAMP[6], 2D 753: FSEQ TEMP[17].x, TEMP[10].zzzz, IMM[2].wwww 754: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz 755: MOV TEMP[18].xy, TEMP[8].xyyy 756: MOV TEMP[18].w, TEMP[9].xxxx 757: TXL TEMP[18], TEMP[18], SAMP[4], 2D 758: FSEQ TEMP[19].x, TEMP[10].zzzz, IMM[2].zzzz 759: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz 760: MOV TEMP[8].xy, TEMP[8].xyyy 761: MOV TEMP[8].w, TEMP[9].xxxx 762: TXL TEMP[8], TEMP[8], SAMP[2], 2D 763: FSEQ TEMP[20].x, TEMP[10].zzzz, IMM[3].yyyy 764: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz 765: MUL TEMP[8], TEMP[8], TEMP[20].xxxx 766: MAD TEMP[8], TEMP[18], TEMP[19].xxxx, TEMP[8] 767: MAD TEMP[8], TEMP[16], TEMP[17].xxxx, TEMP[8] 768: MAD TEMP[8], TEMP[14], TEMP[15].xxxx, TEMP[8] 769: MAD TEMP[8].yw, TEMP[12], TEMP[13].xxxx, TEMP[8] 770: MAD TEMP[8].xy, TEMP[8].wyyy, IMM[2].wwww, IMM[3].zzzz 771: DP2 TEMP[12].x, TEMP[8].xyyy, TEMP[8].xyyy 772: MOV_SAT TEMP[32].x, TEMP[12].xxxx 773: MOV TEMP[12].xy, IN[4].zxzz 774: MOV TEMP[13].x, IMM[2].xxxx 775: FSNE TEMP[14].x, CONST[16].xxxx, TEMP[6].xxxx 776: UIF TEMP[14].xxxx :0 777: MOV TEMP[13].x, IMM[2].yyyy 778: RCP TEMP[14].x, CONST[19].xxxx 779: MUL TEMP[12].xy, IN[4].zxxx, TEMP[14].xxxx 780: ELSE :0 781: RCP TEMP[14].x, CONST[18].xxxx 782: MUL TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx 783: ENDIF 784: FRC TEMP[12].xy, TEMP[12].xyyy 785: MUL TEMP[14].x, CONST[20].xxxx, IMM[2].wwww 786: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[13].xxxx 787: ADD TEMP[14].x, IMM[2].zzzz, -TEMP[14].xxxx 788: MUL TEMP[13].x, TEMP[13].xxxx, CONST[20].xxxx 789: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx, TEMP[13].xxxx 790: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[6].xxxx, TEMP[10].xyyy 791: MOV TEMP[13].xy, TEMP[12].xyyy 792: MOV TEMP[13].w, TEMP[9].xxxx 793: TXL TEMP[13], TEMP[13], SAMP[10], 2D 794: FSEQ TEMP[14].x, TEMP[10].zzzz, IMM[1].zzzz 795: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz 796: MOV TEMP[15].xy, TEMP[12].xyyy 797: MOV TEMP[15].w, TEMP[9].xxxx 798: TXL TEMP[15], TEMP[15], SAMP[8], 2D 799: FSEQ TEMP[16].x, TEMP[10].zzzz, IMM[3].xxxx 800: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz 801: MOV TEMP[17].xy, TEMP[12].xyyy 802: MOV TEMP[17].w, TEMP[9].xxxx 803: TXL TEMP[17], TEMP[17], SAMP[6], 2D 804: FSEQ TEMP[18].x, TEMP[10].zzzz, IMM[2].wwww 805: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz 806: MOV TEMP[19].xy, TEMP[12].xyyy 807: MOV TEMP[19].w, TEMP[9].xxxx 808: TXL TEMP[19], TEMP[19], SAMP[4], 2D 809: FSEQ TEMP[20].x, TEMP[10].zzzz, IMM[2].zzzz 810: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz 811: MOV TEMP[12].xy, TEMP[12].xyyy 812: MOV TEMP[12].w, TEMP[9].xxxx 813: TXL TEMP[12], TEMP[12], SAMP[2], 2D 814: FSEQ TEMP[21].x, TEMP[10].zzzz, IMM[3].yyyy 815: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz 816: MUL TEMP[12], TEMP[12], TEMP[21].xxxx 817: MAD TEMP[12], TEMP[19], TEMP[20].xxxx, TEMP[12] 818: MAD TEMP[12], TEMP[17], TEMP[18].xxxx, TEMP[12] 819: MAD TEMP[12], TEMP[15], TEMP[16].xxxx, TEMP[12] 820: MAD TEMP[12].yw, TEMP[13], TEMP[14].xxxx, TEMP[12] 821: MAD TEMP[12].xy, TEMP[12].wyyy, IMM[2].wwww, IMM[3].zzzz 822: DP2 TEMP[13].x, TEMP[12].xyyy, TEMP[12].xyyy 823: MOV_SAT TEMP[33].x, TEMP[13].xxxx 824: MOV TEMP[13].xy, IN[4].xyxx 825: MOV TEMP[14].x, IMM[2].xxxx 826: FSNE TEMP[15].x, CONST[16].xxxx, TEMP[6].xxxx 827: UIF TEMP[15].xxxx :0 828: MOV TEMP[14].x, IMM[2].yyyy 829: RCP TEMP[15].x, CONST[19].xxxx 830: MUL TEMP[13].xy, IN[4].xyyy, TEMP[15].xxxx 831: ELSE :0 832: RCP TEMP[15].x, CONST[18].xxxx 833: MUL TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx 834: ENDIF 835: FRC TEMP[13].xy, TEMP[13].xyyy 836: MUL TEMP[15].x, CONST[20].xxxx, IMM[2].wwww 837: MUL TEMP[15].x, TEMP[15].xxxx, TEMP[14].xxxx 838: ADD TEMP[15].x, IMM[2].zzzz, -TEMP[15].xxxx 839: MUL TEMP[14].x, TEMP[14].xxxx, CONST[20].xxxx 840: MAD TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx, TEMP[14].xxxx 841: MAD TEMP[6].xy, TEMP[13].xyyy, TEMP[6].xxxx, TEMP[10].xyyy 842: MOV TEMP[13].xy, TEMP[6].xyyy 843: MOV TEMP[13].w, TEMP[9].xxxx 844: TXL TEMP[13], TEMP[13], SAMP[10], 2D 845: FSEQ TEMP[14].x, TEMP[10].zzzz, IMM[1].zzzz 846: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz 847: MOV TEMP[15].xy, TEMP[6].xyyy 848: MOV TEMP[15].w, TEMP[9].xxxx 849: TXL TEMP[15], TEMP[15], SAMP[8], 2D 850: FSEQ TEMP[16].x, TEMP[10].zzzz, IMM[3].xxxx 851: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz 852: MOV TEMP[17].xy, TEMP[6].xyyy 853: MOV TEMP[17].w, TEMP[9].xxxx 854: TXL TEMP[17], TEMP[17], SAMP[6], 2D 855: FSEQ TEMP[18].x, TEMP[10].zzzz, IMM[2].wwww 856: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz 857: MOV TEMP[19].xy, TEMP[6].xyyy 858: MOV TEMP[19].w, TEMP[9].xxxx 859: TXL TEMP[19], TEMP[19], SAMP[4], 2D 860: FSEQ TEMP[20].x, TEMP[10].zzzz, IMM[2].zzzz 861: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz 862: MOV TEMP[6].xy, TEMP[6].xyyy 863: MOV TEMP[6].w, TEMP[9].xxxx 864: TXL TEMP[6], TEMP[6], SAMP[2], 2D 865: FSEQ TEMP[10].x, TEMP[10].zzzz, IMM[3].yyyy 866: AND TEMP[10].x, TEMP[10].xxxx, IMM[2].zzzz 867: MUL TEMP[6], TEMP[6], TEMP[10].xxxx 868: MAD TEMP[6], TEMP[19], TEMP[20].xxxx, TEMP[6] 869: MAD TEMP[6], TEMP[17], TEMP[18].xxxx, TEMP[6] 870: MAD TEMP[6], TEMP[15], TEMP[16].xxxx, TEMP[6] 871: MAD TEMP[6].yw, TEMP[13], TEMP[14].xxxx, TEMP[6] 872: MAD TEMP[6].xy, TEMP[6].wyyy, IMM[2].wwww, IMM[3].zzzz 873: DP2 TEMP[10].x, TEMP[6].xyyy, TEMP[6].xyyy 874: MOV_SAT TEMP[34].x, TEMP[10].xxxx 875: MOV TEMP[10].x, IMM[3].yyyy 876: MOV TEMP[10].y, TEMP[8].xxxx 877: MOV TEMP[10].z, TEMP[8].yyyy 878: MOV TEMP[8].y, IMM[3].yyyy 879: MOV TEMP[8].x, TEMP[12].yyyy 880: MOV TEMP[8].z, TEMP[12].xxxx 881: MOV TEMP[12].z, IMM[3].yyyy 882: MOV TEMP[12].xy, TEMP[6].xyxx 883: MUL TEMP[6].xyz, TEMP[10].xyzz, TEMP[3].xxxx 884: MAD TEMP[6].xyz, TEMP[8].xyzz, TEMP[3].yyyy, TEMP[6].xyzz 885: MAD TEMP[6].xyz, TEMP[12].xyzz, TEMP[3].zzzz, TEMP[6].xyzz 886: MOV TEMP[8].xy, IN[4].zyzz 887: MOV TEMP[10].x, IMM[2].xxxx 888: FSNE TEMP[12].x, CONST[16].xxxx, TEMP[7].xxxx 889: UIF TEMP[12].xxxx :0 890: MOV TEMP[10].x, IMM[2].yyyy 891: RCP TEMP[12].x, CONST[19].xxxx 892: MUL TEMP[8].xy, IN[4].zyyy, TEMP[12].xxxx 893: ELSE :0 894: RCP TEMP[12].x, CONST[18].xxxx 895: MUL TEMP[8].xy, TEMP[8].xyyy, TEMP[12].xxxx 896: ENDIF 897: FRC TEMP[8].xy, TEMP[8].xyyy 898: MUL TEMP[12].x, CONST[20].xxxx, IMM[2].wwww 899: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[10].xxxx 900: ADD TEMP[12].x, IMM[2].zzzz, -TEMP[12].xxxx 901: MUL TEMP[10].x, TEMP[10].xxxx, CONST[20].xxxx 902: MAD TEMP[8].xy, TEMP[8].xyyy, TEMP[12].xxxx, TEMP[10].xxxx 903: MAD TEMP[8].xy, TEMP[8].xyyy, TEMP[7].xxxx, TEMP[4].xyyy 904: MOV TEMP[10].xy, TEMP[8].xyyy 905: MOV TEMP[10].w, TEMP[9].xxxx 906: TXL TEMP[10], TEMP[10], SAMP[10], 2D 907: FSEQ TEMP[12].x, TEMP[4].zzzz, IMM[1].zzzz 908: AND TEMP[12].x, TEMP[12].xxxx, IMM[2].zzzz 909: MOV TEMP[13].xy, TEMP[8].xyyy 910: MOV TEMP[13].w, TEMP[9].xxxx 911: TXL TEMP[13], TEMP[13], SAMP[8], 2D 912: FSEQ TEMP[14].x, TEMP[4].zzzz, IMM[3].xxxx 913: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz 914: MOV TEMP[15].xy, TEMP[8].xyyy 915: MOV TEMP[15].w, TEMP[9].xxxx 916: TXL TEMP[15], TEMP[15], SAMP[6], 2D 917: FSEQ TEMP[16].x, TEMP[4].zzzz, IMM[2].wwww 918: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz 919: MOV TEMP[17].xy, TEMP[8].xyyy 920: MOV TEMP[17].w, TEMP[9].xxxx 921: TXL TEMP[17], TEMP[17], SAMP[4], 2D 922: FSEQ TEMP[18].x, TEMP[4].zzzz, IMM[2].zzzz 923: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz 924: MOV TEMP[8].xy, TEMP[8].xyyy 925: MOV TEMP[8].w, TEMP[9].xxxx 926: TXL TEMP[8], TEMP[8], SAMP[2], 2D 927: FSEQ TEMP[19].x, TEMP[4].zzzz, IMM[3].yyyy 928: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz 929: MUL TEMP[8], TEMP[8], TEMP[19].xxxx 930: MAD TEMP[8], TEMP[17], TEMP[18].xxxx, TEMP[8] 931: MAD TEMP[8], TEMP[15], TEMP[16].xxxx, TEMP[8] 932: MAD TEMP[8], TEMP[13], TEMP[14].xxxx, TEMP[8] 933: MAD TEMP[8].yw, TEMP[10], TEMP[12].xxxx, TEMP[8] 934: MAD TEMP[8].xy, TEMP[8].wyyy, IMM[2].wwww, IMM[3].zzzz 935: DP2 TEMP[10].x, TEMP[8].xyyy, TEMP[8].xyyy 936: MOV_SAT TEMP[35].x, TEMP[10].xxxx 937: MOV TEMP[10].xy, IN[4].zxzz 938: MOV TEMP[12].x, IMM[2].xxxx 939: FSNE TEMP[13].x, CONST[16].xxxx, TEMP[7].xxxx 940: UIF TEMP[13].xxxx :0 941: MOV TEMP[12].x, IMM[2].yyyy 942: RCP TEMP[13].x, CONST[19].xxxx 943: MUL TEMP[10].xy, IN[4].zxxx, TEMP[13].xxxx 944: ELSE :0 945: RCP TEMP[13].x, CONST[18].xxxx 946: MUL TEMP[10].xy, TEMP[10].xyyy, TEMP[13].xxxx 947: ENDIF 948: FRC TEMP[10].xy, TEMP[10].xyyy 949: MUL TEMP[13].x, CONST[20].xxxx, IMM[2].wwww 950: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[12].xxxx 951: ADD TEMP[13].x, IMM[2].zzzz, -TEMP[13].xxxx 952: MUL TEMP[12].x, TEMP[12].xxxx, CONST[20].xxxx 953: MAD TEMP[10].xy, TEMP[10].xyyy, TEMP[13].xxxx, TEMP[12].xxxx 954: MAD TEMP[10].xy, TEMP[10].xyyy, TEMP[7].xxxx, TEMP[4].xyyy 955: MOV TEMP[12].xy, TEMP[10].xyyy 956: MOV TEMP[12].w, TEMP[9].xxxx 957: TXL TEMP[12], TEMP[12], SAMP[10], 2D 958: FSEQ TEMP[13].x, TEMP[4].zzzz, IMM[1].zzzz 959: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz 960: MOV TEMP[14].xy, TEMP[10].xyyy 961: MOV TEMP[14].w, TEMP[9].xxxx 962: TXL TEMP[14], TEMP[14], SAMP[8], 2D 963: FSEQ TEMP[15].x, TEMP[4].zzzz, IMM[3].xxxx 964: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz 965: MOV TEMP[16].xy, TEMP[10].xyyy 966: MOV TEMP[16].w, TEMP[9].xxxx 967: TXL TEMP[16], TEMP[16], SAMP[6], 2D 968: FSEQ TEMP[17].x, TEMP[4].zzzz, IMM[2].wwww 969: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz 970: MOV TEMP[18].xy, TEMP[10].xyyy 971: MOV TEMP[18].w, TEMP[9].xxxx 972: TXL TEMP[18], TEMP[18], SAMP[4], 2D 973: FSEQ TEMP[19].x, TEMP[4].zzzz, IMM[2].zzzz 974: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz 975: MOV TEMP[10].xy, TEMP[10].xyyy 976: MOV TEMP[10].w, TEMP[9].xxxx 977: TXL TEMP[10], TEMP[10], SAMP[2], 2D 978: FSEQ TEMP[20].x, TEMP[4].zzzz, IMM[3].yyyy 979: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz 980: MUL TEMP[10], TEMP[10], TEMP[20].xxxx 981: MAD TEMP[10], TEMP[18], TEMP[19].xxxx, TEMP[10] 982: MAD TEMP[10], TEMP[16], TEMP[17].xxxx, TEMP[10] 983: MAD TEMP[10], TEMP[14], TEMP[15].xxxx, TEMP[10] 984: MAD TEMP[10].yw, TEMP[12], TEMP[13].xxxx, TEMP[10] 985: MAD TEMP[10].xy, TEMP[10].wyyy, IMM[2].wwww, IMM[3].zzzz 986: DP2 TEMP[12].x, TEMP[10].xyyy, TEMP[10].xyyy 987: MOV_SAT TEMP[36].x, TEMP[12].xxxx 988: MOV TEMP[12].xy, IN[4].xyxx 989: MOV TEMP[13].x, IMM[2].xxxx 990: FSNE TEMP[14].x, CONST[16].xxxx, TEMP[7].xxxx 991: UIF TEMP[14].xxxx :0 992: MOV TEMP[13].x, IMM[2].yyyy 993: RCP TEMP[14].x, CONST[19].xxxx 994: MUL TEMP[12].xy, IN[4].xyyy, TEMP[14].xxxx 995: ELSE :0 996: RCP TEMP[14].x, CONST[18].xxxx 997: MUL TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx 998: ENDIF 999: FRC TEMP[12].xy, TEMP[12].xyyy 1000: MUL TEMP[14].x, CONST[20].xxxx, IMM[2].wwww 1001: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[13].xxxx 1002: ADD TEMP[14].x, IMM[2].zzzz, -TEMP[14].xxxx 1003: MUL TEMP[13].x, TEMP[13].xxxx, CONST[20].xxxx 1004: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx, TEMP[13].xxxx 1005: MAD TEMP[7].xy, TEMP[12].xyyy, TEMP[7].xxxx, TEMP[4].xyyy 1006: MOV TEMP[12].xy, TEMP[7].xyyy 1007: MOV TEMP[12].w, TEMP[9].xxxx 1008: TXL TEMP[12], TEMP[12], SAMP[10], 2D 1009: FSEQ TEMP[13].x, TEMP[4].zzzz, IMM[1].zzzz 1010: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz 1011: MOV TEMP[14].xy, TEMP[7].xyyy 1012: MOV TEMP[14].w, TEMP[9].xxxx 1013: TXL TEMP[14], TEMP[14], SAMP[8], 2D 1014: FSEQ TEMP[15].x, TEMP[4].zzzz, IMM[3].xxxx 1015: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz 1016: MOV TEMP[16].xy, TEMP[7].xyyy 1017: MOV TEMP[16].w, TEMP[9].xxxx 1018: TXL TEMP[16], TEMP[16], SAMP[6], 2D 1019: FSEQ TEMP[17].x, TEMP[4].zzzz, IMM[2].wwww 1020: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz 1021: MOV TEMP[18].xy, TEMP[7].xyyy 1022: MOV TEMP[18].w, TEMP[9].xxxx 1023: TXL TEMP[18], TEMP[18], SAMP[4], 2D 1024: FSEQ TEMP[19].x, TEMP[4].zzzz, IMM[2].zzzz 1025: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz 1026: MOV TEMP[7].xy, TEMP[7].xyyy 1027: MOV TEMP[7].w, TEMP[9].xxxx 1028: TXL TEMP[7], TEMP[7], SAMP[2], 2D 1029: FSEQ TEMP[4].x, TEMP[4].zzzz, IMM[3].yyyy 1030: AND TEMP[4].x, TEMP[4].xxxx, IMM[2].zzzz 1031: MUL TEMP[4], TEMP[7], TEMP[4].xxxx 1032: MAD TEMP[4], TEMP[18], TEMP[19].xxxx, TEMP[4] 1033: MAD TEMP[4], TEMP[16], TEMP[17].xxxx, TEMP[4] 1034: MAD TEMP[4], TEMP[14], TEMP[15].xxxx, TEMP[4] 1035: MAD TEMP[4].yw, TEMP[12], TEMP[13].xxxx, TEMP[4] 1036: MAD TEMP[4].xy, TEMP[4].wyyy, IMM[2].wwww, IMM[3].zzzz 1037: DP2 TEMP[7].x, TEMP[4].xyyy, TEMP[4].xyyy 1038: MOV_SAT TEMP[37].x, TEMP[7].xxxx 1039: MOV TEMP[7].x, IMM[3].yyyy 1040: MOV TEMP[7].y, TEMP[8].xxxx 1041: MOV TEMP[7].z, TEMP[8].yyyy 1042: MOV TEMP[8].y, IMM[3].yyyy 1043: MOV TEMP[8].x, TEMP[10].yyyy 1044: MOV TEMP[8].z, TEMP[10].xxxx 1045: MOV TEMP[9].z, IMM[3].yyyy 1046: MOV TEMP[9].xy, TEMP[4].xyxx 1047: MOV TEMP[4].w, IMM[2].zzzz 1048: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[3].xxxx 1049: MAD TEMP[7].xyz, TEMP[8].xyzz, TEMP[3].yyyy, TEMP[7].xyzz 1050: MAD TEMP[3].xyz, TEMP[9].xyzz, TEMP[3].zzzz, TEMP[7].xyzz 1051: MUL TEMP[5].xyz, IN[1].xxxx, TEMP[5].xyzz 1052: MAD TEMP[5].xyz, IN[1].yyyy, TEMP[6].xyzz, TEMP[5].xyzz 1053: MAD TEMP[4].xyz, IN[1].zzzz, TEMP[3].xyzz, TEMP[5].xyzz 1054: DP4 TEMP[3].x, TEMP[4], TEMP[4] 1055: RSQ TEMP[3].x, TEMP[3].xxxx 1056: MUL TEMP[3].xyz, TEMP[4], TEMP[3].xxxx 1057: MUL TEMP[3].xyz, TEMP[3].xyzz, IN[0].wwww 1058: ADD TEMP[3].xyz, IN[3].yzww, -TEMP[3].xyzz 1059: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz 1060: RSQ TEMP[4].x, TEMP[4].xxxx 1061: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx 1062: MOV TEMP[4].xy, IN[2].xyyy 1063: MOV TEMP[4].w, IN[2].wwww 1064: TXP TEMP[4].x, TEMP[4], SAMP[0], 2D 1065: MOV TEMP[5].w, IMM[3].yyyy 1066: MUL TEMP[5].xyz, TEMP[11].xyzz, TEMP[0].xyzz 1067: ADD TEMP[0].xyz, TEMP[1].xyzz, TEMP[2].xyzz 1068: DP3 TEMP[2].x, TEMP[0].xyzz, TEMP[0].xyzz 1069: RSQ TEMP[2].x, TEMP[2].xxxx 1070: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[2].xxxx 1071: DP3 TEMP[0].x, TEMP[3].xyzz, TEMP[0].xyzz 1072: MAX TEMP[0].x, IMM[3].wwww, TEMP[0].xxxx 1073: MUL TEMP[2].x, IMM[4].xxxx, IN[1].wwww 1074: POW TEMP[0].x, TEMP[0].xxxx, TEMP[2].xxxx 1075: MOV_SAT TEMP[0].x, TEMP[0].xxxx 1076: MOV TEMP[2].w, IMM[3].yyyy 1077: MOV TEMP[2].xyz, CONST[23].xyzx 1078: MOV TEMP[6].w, IMM[2].zzzz 1079: MUL TEMP[7].x, IMM[2].wwww, TEMP[0].xxxx 1080: ADD TEMP[7].x, IMM[3].xxxx, -TEMP[7].xxxx 1081: MUL TEMP[7].x, TEMP[0].xxxx, TEMP[7].xxxx 1082: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[7].xxxx 1083: MUL TEMP[0].x, TEMP[0].xxxx, IN[1].wwww 1084: MUL TEMP[7].xyz, TEMP[11].xyzz, CONST[4].xyzz 1085: DP3 TEMP[1].x, TEMP[3].xyzz, TEMP[1].xyzz 1086: MOV_SAT TEMP[1].x, TEMP[1].xxxx 1087: MUL TEMP[3], CONST[24], IMM[2].wwww 1088: MUL TEMP[3], TEMP[3], TEMP[4].xxxx 1089: MAX TEMP[2], TEMP[3], TEMP[2] 1090: MIN TEMP[2].xyz, TEMP[2], IMM[4].yyyz 1091: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[11].xyzz 1092: MAD TEMP[1].xyz, TEMP[7].xyzz, TEMP[1].xxxx, TEMP[2].xyzz 1093: MAD TEMP[0].xyz, CONST[4].xyzz, TEMP[0].xxxx, TEMP[1].xyzz 1094: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[4].xxxx 1095: MUL TEMP[6].xyz, TEMP[0].xyzz, IMM[0].wwww 1096: ADD TEMP[0].xyz, TEMP[5], TEMP[6] 1097: MAD TEMP[1].x, IN[3].xxxx, CONST[3].zzzz, CONST[3].wwww 1098: MOV_SAT TEMP[1].x, TEMP[1].xxxx 1099: LRP TEMP[5].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[2].xyzz 1100: MOV TEMP[5].w, IMM[2].zzzz 1101: MOV OUT[0], TEMP[5] 1102: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 272) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 288) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 304) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 320) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 336) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 352) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 368) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 372) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 376) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 384) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 388) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 392) %51 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %52 = load <32 x i8>, <32 x i8> addrspace(2)* %51, align 32, !tbaa !0 %53 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %54 = load <16 x i8>, <16 x i8> addrspace(2)* %53, align 16, !tbaa !0 %55 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %56 = load <8 x i32>, <8 x i32> addrspace(2)* %55, align 32, !tbaa !0 %57 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %58 = load <4 x i32>, <4 x i32> addrspace(2)* %57, align 16, !tbaa !0 %59 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %60 = load <8 x i32>, <8 x i32> addrspace(2)* %59, align 32, !tbaa !0 %61 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %62 = load <4 x i32>, <4 x i32> addrspace(2)* %61, align 16, !tbaa !0 %63 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %64 = load <8 x i32>, <8 x i32> addrspace(2)* %63, align 32, !tbaa !0 %65 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %66 = load <4 x i32>, <4 x i32> addrspace(2)* %65, align 16, !tbaa !0 %67 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %68 = load <8 x i32>, <8 x i32> addrspace(2)* %67, align 32, !tbaa !0 %69 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %70 = load <4 x i32>, <4 x i32> addrspace(2)* %69, align 16, !tbaa !0 %71 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5 %72 = load <8 x i32>, <8 x i32> addrspace(2)* %71, align 32, !tbaa !0 %73 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5 %74 = load <4 x i32>, <4 x i32> addrspace(2)* %73, align 16, !tbaa !0 %75 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 6 %76 = load <8 x i32>, <8 x i32> addrspace(2)* %75, align 32, !tbaa !0 %77 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 6 %78 = load <4 x i32>, <4 x i32> addrspace(2)* %77, align 16, !tbaa !0 %79 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 7 %80 = load <8 x i32>, <8 x i32> addrspace(2)* %79, align 32, !tbaa !0 %81 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 7 %82 = load <4 x i32>, <4 x i32> addrspace(2)* %81, align 16, !tbaa !0 %83 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 8 %84 = load <8 x i32>, <8 x i32> addrspace(2)* %83, align 32, !tbaa !0 %85 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 8 %86 = load <4 x i32>, <4 x i32> addrspace(2)* %85, align 16, !tbaa !0 %87 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 9 %88 = load <8 x i32>, <8 x i32> addrspace(2)* %87, align 32, !tbaa !0 %89 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 9 %90 = load <4 x i32>, <4 x i32> addrspace(2)* %89, align 16, !tbaa !0 %91 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 10 %92 = load <8 x i32>, <8 x i32> addrspace(2)* %91, align 32, !tbaa !0 %93 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 10 %94 = load <4 x i32>, <4 x i32> addrspace(2)* %93, align 16, !tbaa !0 %95 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %96 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %97 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %98 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %99 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %100 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %101 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %102 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %103 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %104 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %105 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %106 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %107 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %108 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %109 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7) %110 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %111 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %112 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %113 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7) %114 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7) %115 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %116 = fmul float %27, %27 %117 = fmul float %28, %28 %118 = fadd float %117, %116 %119 = fmul float %29, %29 %120 = fadd float %118, %119 %121 = call float @llvm.AMDGPU.rsq.clamped.f32(float %120) %122 = fmul float %27, %121 %123 = fmul float %28, %121 %124 = fmul float %29, %121 %125 = fsub float %24, %110 %126 = fsub float %25, %111 %127 = fsub float %26, %112 %128 = fmul float %125, %125 %129 = fmul float %126, %126 %130 = fadd float %129, %128 %131 = fmul float %127, %127 %132 = fadd float %130, %131 %133 = call float @llvm.AMDGPU.rsq.clamped.f32(float %132) %134 = fmul float %125, %133 %135 = fmul float %126, %133 %136 = fmul float %127, %133 %137 = call float @llvm.fabs.f32(float %107) %138 = call float @llvm.fabs.f32(float %108) %139 = call float @llvm.fabs.f32(float %109) %140 = fmul float %137, %137 %141 = fmul float %138, %138 %142 = fadd float %141, %140 %143 = fmul float %139, %139 %144 = fadd float %142, %143 %145 = call float @llvm.AMDGPU.rsq.clamped.f32(float %144) %146 = fmul float %137, %145 %147 = fadd float %146, 0xBFC99999A0000000 %148 = fmul float %138, %145 %149 = fadd float %148, 0xBFC99999A0000000 %150 = fmul float %139, %145 %151 = fadd float %150, 0xBFC99999A0000000 %152 = fmul float %147, 7.000000e+00 %153 = fmul float %149, 7.000000e+00 %154 = fmul float %151, 7.000000e+00 %155 = call float @llvm.maxnum.f32(float %152, float 0x3F847AE140000000) %156 = call float @llvm.maxnum.f32(float %153, float 0x3F847AE140000000) %157 = call float @llvm.maxnum.f32(float %154, float 0x3F847AE140000000) %158 = fadd float %155, %156 %159 = fadd float %158, %157 %160 = fdiv float 1.000000e+00, %159 %161 = fmul float %155, %160 %162 = fmul float %156, %160 %163 = fmul float %157, %160 %164 = fadd float %95, 5.000000e-01 %165 = fadd float %96, 5.000000e-01 %166 = fadd float %97, 5.000000e-01 %167 = call float @llvm.floor.f32(float %164) %168 = call float @llvm.floor.f32(float %165) %169 = call float @llvm.floor.f32(float %166) %170 = fmul float %167, %38 %171 = call float @llvm.floor.f32(float %170) %172 = fmul float %171, %38 %173 = fcmp ult float %167, 6.400000e+01 br i1 %173, label %ELSE, label %IF IF: ; preds = %main_body %174 = fadd float %167, -6.400000e+01 %175 = fmul float %174, %39 %176 = call float @llvm.floor.f32(float %175) %177 = fmul float %176, %39 %178 = call float @llvm.floor.f32(float %175) %179 = fsub float %175, %178 %180 = call float @llvm.floor.f32(float %177) %181 = fsub float %177, %180 %182 = call float @llvm.floor.f32(float %177) %183 = fadd float %182, 4.000000e+00 br label %ENDIF ELSE: ; preds = %main_body %184 = call float @llvm.floor.f32(float %170) %185 = fsub float %170, %184 %186 = call float @llvm.floor.f32(float %172) %187 = fsub float %172, %186 %188 = call float @llvm.floor.f32(float %172) br label %ENDIF ENDIF: ; preds = %ELSE, %IF %temp32.0 = phi float [ %179, %IF ], [ %185, %ELSE ] %temp33.0 = phi float [ %181, %IF ], [ %187, %ELSE ] %temp34.0 = phi float [ %183, %IF ], [ %188, %ELSE ] %temp20.0 = phi float [ %39, %IF ], [ %38, %ELSE ] %189 = fmul float %168, %38 %190 = call float @llvm.floor.f32(float %189) %191 = fmul float %190, %38 %192 = fcmp ult float %168, 6.400000e+01 br i1 %192, label %ELSE154, label %IF153 IF153: ; preds = %ENDIF %193 = fadd float %168, -6.400000e+01 %194 = fmul float %193, %39 %195 = call float @llvm.floor.f32(float %194) %196 = fmul float %195, %39 %197 = call float @llvm.floor.f32(float %194) %198 = fsub float %194, %197 %199 = call float @llvm.floor.f32(float %196) %200 = fsub float %196, %199 %201 = call float @llvm.floor.f32(float %196) %202 = fadd float %201, 4.000000e+00 br label %ENDIF152 ELSE154: ; preds = %ENDIF %203 = call float @llvm.floor.f32(float %189) %204 = fsub float %189, %203 %205 = call float @llvm.floor.f32(float %191) %206 = fsub float %191, %205 %207 = call float @llvm.floor.f32(float %191) br label %ENDIF152 ENDIF152: ; preds = %ELSE154, %IF153 %temp40.0 = phi float [ %198, %IF153 ], [ %204, %ELSE154 ] %temp41.0 = phi float [ %200, %IF153 ], [ %206, %ELSE154 ] %temp42.0 = phi float [ %202, %IF153 ], [ %207, %ELSE154 ] %temp24.0 = phi float [ %39, %IF153 ], [ %38, %ELSE154 ] %208 = fmul float %169, %38 %209 = call float @llvm.floor.f32(float %208) %210 = fmul float %209, %38 %211 = fcmp ult float %169, 6.400000e+01 br i1 %211, label %ELSE157, label %IF156 IF156: ; preds = %ENDIF152 %212 = fadd float %169, -6.400000e+01 %213 = fmul float %212, %39 %214 = call float @llvm.floor.f32(float %213) %215 = fmul float %214, %39 %216 = call float @llvm.floor.f32(float %213) %217 = fsub float %213, %216 %218 = call float @llvm.floor.f32(float %215) %219 = fsub float %215, %218 %220 = call float @llvm.floor.f32(float %215) %221 = fadd float %220, 4.000000e+00 br label %ENDIF155 ELSE157: ; preds = %ENDIF152 %222 = call float @llvm.floor.f32(float %208) %223 = fsub float %208, %222 %224 = call float @llvm.floor.f32(float %210) %225 = fsub float %210, %224 %226 = call float @llvm.floor.f32(float %210) br label %ENDIF155 ENDIF155: ; preds = %ELSE157, %IF156 %temp28.0 = phi float [ %39, %IF156 ], [ %38, %ELSE157 ] %temp18.0 = phi float [ %221, %IF156 ], [ %226, %ELSE157 ] %temp17.0 = phi float [ %219, %IF156 ], [ %225, %ELSE157 ] %temp16.0 = phi float [ %217, %IF156 ], [ %223, %ELSE157 ] %227 = fsub float %110, %24 %228 = fsub float %111, %25 %229 = fsub float %112, %26 %230 = fmul float %227, %227 %231 = fmul float %228, %228 %232 = fadd float %231, %230 %233 = fmul float %229, %229 %234 = fadd float %232, %233 %235 = fmul float %44, %234 %236 = call float @llvm.log2.f32(float %235) %237 = fmul float %236, 0x3FE62E4300000000 %238 = fmul float %237, %43 %239 = fcmp une float %38, %temp20.0 %.sink212 = select i1 %239, float %41, float %40 %temp48.0 = select i1 %239, float 1.953125e-03, float 3.906250e-03 %240 = fdiv float 1.000000e+00, %.sink212 %241 = fmul float %110, %240 %242 = fmul float %111, %240 %243 = call float @llvm.floor.f32(float %241) %244 = fsub float %241, %243 %245 = call float @llvm.floor.f32(float %242) %246 = fsub float %242, %245 %247 = fmul float %42, 2.000000e+00 %248 = fmul float %247, %temp48.0 %249 = fsub float 1.000000e+00, %248 %250 = fmul float %temp48.0, %42 %251 = fmul float %244, %249 %252 = fadd float %251, %250 %253 = fmul float %246, %249 %254 = fadd float %253, %250 %255 = fmul float %252, %temp20.0 %256 = fadd float %255, %temp32.0 %257 = fmul float %254, %temp20.0 %258 = fadd float %257, %temp33.0 %259 = bitcast float %256 to i32 %260 = bitcast float %258 to i32 %261 = bitcast float %238 to i32 %262 = insertelement <4 x i32> undef, i32 %259, i32 0 %263 = insertelement <4 x i32> %262, i32 %260, i32 1 %264 = insertelement <4 x i32> %263, i32 %261, i32 2 %265 = bitcast <8 x i32> %88 to <32 x i8> %266 = bitcast <4 x i32> %90 to <16 x i8> %267 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %264, <32 x i8> %265, <16 x i8> %266, i32 2) %268 = extractelement <4 x float> %267, i32 0 %269 = extractelement <4 x float> %267, i32 1 %270 = extractelement <4 x float> %267, i32 2 %271 = fcmp oeq float %temp34.0, 4.000000e+00 %272 = select i1 %271, float 1.000000e+00, float 0.000000e+00 %273 = bitcast float %256 to i32 %274 = bitcast float %258 to i32 %275 = bitcast float %238 to i32 %276 = insertelement <4 x i32> undef, i32 %273, i32 0 %277 = insertelement <4 x i32> %276, i32 %274, i32 1 %278 = insertelement <4 x i32> %277, i32 %275, i32 2 %279 = bitcast <8 x i32> %80 to <32 x i8> %280 = bitcast <4 x i32> %82 to <16 x i8> %281 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %278, <32 x i8> %279, <16 x i8> %280, i32 2) %282 = extractelement <4 x float> %281, i32 0 %283 = extractelement <4 x float> %281, i32 1 %284 = extractelement <4 x float> %281, i32 2 %285 = fcmp oeq float %temp34.0, 3.000000e+00 %286 = select i1 %285, float 1.000000e+00, float 0.000000e+00 %287 = bitcast float %256 to i32 %288 = bitcast float %258 to i32 %289 = bitcast float %238 to i32 %290 = insertelement <4 x i32> undef, i32 %287, i32 0 %291 = insertelement <4 x i32> %290, i32 %288, i32 1 %292 = insertelement <4 x i32> %291, i32 %289, i32 2 %293 = bitcast <8 x i32> %72 to <32 x i8> %294 = bitcast <4 x i32> %74 to <16 x i8> %295 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %292, <32 x i8> %293, <16 x i8> %294, i32 2) %296 = extractelement <4 x float> %295, i32 0 %297 = extractelement <4 x float> %295, i32 1 %298 = extractelement <4 x float> %295, i32 2 %299 = fcmp oeq float %temp34.0, 2.000000e+00 %300 = select i1 %299, float 1.000000e+00, float 0.000000e+00 %301 = bitcast float %256 to i32 %302 = bitcast float %258 to i32 %303 = bitcast float %238 to i32 %304 = insertelement <4 x i32> undef, i32 %301, i32 0 %305 = insertelement <4 x i32> %304, i32 %302, i32 1 %306 = insertelement <4 x i32> %305, i32 %303, i32 2 %307 = bitcast <8 x i32> %64 to <32 x i8> %308 = bitcast <4 x i32> %66 to <16 x i8> %309 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %306, <32 x i8> %307, <16 x i8> %308, i32 2) %310 = extractelement <4 x float> %309, i32 0 %311 = extractelement <4 x float> %309, i32 1 %312 = extractelement <4 x float> %309, i32 2 %313 = fcmp oeq float %temp34.0, 1.000000e+00 %314 = select i1 %313, float 1.000000e+00, float 0.000000e+00 %315 = bitcast float %256 to i32 %316 = bitcast float %258 to i32 %317 = bitcast float %238 to i32 %318 = insertelement <4 x i32> undef, i32 %315, i32 0 %319 = insertelement <4 x i32> %318, i32 %316, i32 1 %320 = insertelement <4 x i32> %319, i32 %317, i32 2 %321 = bitcast <8 x i32> %56 to <32 x i8> %322 = bitcast <4 x i32> %58 to <16 x i8> %323 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %320, <32 x i8> %321, <16 x i8> %322, i32 2) %324 = extractelement <4 x float> %323, i32 0 %325 = extractelement <4 x float> %323, i32 1 %326 = extractelement <4 x float> %323, i32 2 %327 = fcmp oeq float %temp34.0, 0.000000e+00 %328 = select i1 %327, float 1.000000e+00, float 0.000000e+00 %329 = fmul float %324, %328 %330 = fmul float %325, %328 %331 = fmul float %326, %328 %332 = fmul float %310, %314 %333 = fadd float %332, %329 %334 = fmul float %311, %314 %335 = fadd float %334, %330 %336 = fmul float %312, %314 %337 = fadd float %336, %331 %338 = fmul float %296, %300 %339 = fadd float %338, %333 %340 = fmul float %297, %300 %341 = fadd float %340, %335 %342 = fmul float %298, %300 %343 = fadd float %342, %337 %344 = fmul float %282, %286 %345 = fadd float %344, %339 %346 = fmul float %283, %286 %347 = fadd float %346, %341 %348 = fmul float %284, %286 %349 = fadd float %348, %343 %350 = fmul float %268, %272 %351 = fadd float %350, %345 %352 = fmul float %269, %272 %353 = fadd float %352, %347 %354 = fmul float %270, %272 %355 = fadd float %354, %349 %356 = fcmp une float %38, %temp20.0 %.sink213 = select i1 %356, float %41, float %40 %temp52.0 = select i1 %356, float 1.953125e-03, float 3.906250e-03 %357 = fdiv float 1.000000e+00, %.sink213 %358 = fmul float %112, %357 %359 = fmul float %111, %357 %360 = call float @llvm.floor.f32(float %358) %361 = fsub float %358, %360 %362 = call float @llvm.floor.f32(float %359) %363 = fsub float %359, %362 %364 = fmul float %42, 2.000000e+00 %365 = fmul float %364, %temp52.0 %366 = fsub float 1.000000e+00, %365 %367 = fmul float %temp52.0, %42 %368 = fmul float %361, %366 %369 = fadd float %368, %367 %370 = fmul float %363, %366 %371 = fadd float %370, %367 %372 = fmul float %369, %temp20.0 %373 = fadd float %372, %temp32.0 %374 = fmul float %371, %temp20.0 %375 = fadd float %374, %temp33.0 %376 = bitcast float %373 to i32 %377 = bitcast float %375 to i32 %378 = bitcast float %238 to i32 %379 = insertelement <4 x i32> undef, i32 %376, i32 0 %380 = insertelement <4 x i32> %379, i32 %377, i32 1 %381 = insertelement <4 x i32> %380, i32 %378, i32 2 %382 = bitcast <8 x i32> %88 to <32 x i8> %383 = bitcast <4 x i32> %90 to <16 x i8> %384 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %381, <32 x i8> %382, <16 x i8> %383, i32 2) %385 = extractelement <4 x float> %384, i32 0 %386 = extractelement <4 x float> %384, i32 1 %387 = extractelement <4 x float> %384, i32 2 %388 = fcmp oeq float %temp34.0, 4.000000e+00 %389 = select i1 %388, float 1.000000e+00, float 0.000000e+00 %390 = bitcast float %373 to i32 %391 = bitcast float %375 to i32 %392 = bitcast float %238 to i32 %393 = insertelement <4 x i32> undef, i32 %390, i32 0 %394 = insertelement <4 x i32> %393, i32 %391, i32 1 %395 = insertelement <4 x i32> %394, i32 %392, i32 2 %396 = bitcast <8 x i32> %80 to <32 x i8> %397 = bitcast <4 x i32> %82 to <16 x i8> %398 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %395, <32 x i8> %396, <16 x i8> %397, i32 2) %399 = extractelement <4 x float> %398, i32 0 %400 = extractelement <4 x float> %398, i32 1 %401 = extractelement <4 x float> %398, i32 2 %402 = fcmp oeq float %temp34.0, 3.000000e+00 %403 = select i1 %402, float 1.000000e+00, float 0.000000e+00 %404 = bitcast float %373 to i32 %405 = bitcast float %375 to i32 %406 = bitcast float %238 to i32 %407 = insertelement <4 x i32> undef, i32 %404, i32 0 %408 = insertelement <4 x i32> %407, i32 %405, i32 1 %409 = insertelement <4 x i32> %408, i32 %406, i32 2 %410 = bitcast <8 x i32> %72 to <32 x i8> %411 = bitcast <4 x i32> %74 to <16 x i8> %412 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %409, <32 x i8> %410, <16 x i8> %411, i32 2) %413 = extractelement <4 x float> %412, i32 0 %414 = extractelement <4 x float> %412, i32 1 %415 = extractelement <4 x float> %412, i32 2 %416 = fcmp oeq float %temp34.0, 2.000000e+00 %417 = select i1 %416, float 1.000000e+00, float 0.000000e+00 %418 = bitcast float %373 to i32 %419 = bitcast float %375 to i32 %420 = bitcast float %238 to i32 %421 = insertelement <4 x i32> undef, i32 %418, i32 0 %422 = insertelement <4 x i32> %421, i32 %419, i32 1 %423 = insertelement <4 x i32> %422, i32 %420, i32 2 %424 = bitcast <8 x i32> %64 to <32 x i8> %425 = bitcast <4 x i32> %66 to <16 x i8> %426 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %423, <32 x i8> %424, <16 x i8> %425, i32 2) %427 = extractelement <4 x float> %426, i32 0 %428 = extractelement <4 x float> %426, i32 1 %429 = extractelement <4 x float> %426, i32 2 %430 = fcmp oeq float %temp34.0, 1.000000e+00 %431 = select i1 %430, float 1.000000e+00, float 0.000000e+00 %432 = bitcast float %373 to i32 %433 = bitcast float %375 to i32 %434 = bitcast float %238 to i32 %435 = insertelement <4 x i32> undef, i32 %432, i32 0 %436 = insertelement <4 x i32> %435, i32 %433, i32 1 %437 = insertelement <4 x i32> %436, i32 %434, i32 2 %438 = bitcast <8 x i32> %56 to <32 x i8> %439 = bitcast <4 x i32> %58 to <16 x i8> %440 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %437, <32 x i8> %438, <16 x i8> %439, i32 2) %441 = extractelement <4 x float> %440, i32 0 %442 = extractelement <4 x float> %440, i32 1 %443 = extractelement <4 x float> %440, i32 2 %444 = fcmp oeq float %temp34.0, 0.000000e+00 %445 = select i1 %444, float 1.000000e+00, float 0.000000e+00 %446 = fmul float %441, %445 %447 = fmul float %442, %445 %448 = fmul float %443, %445 %449 = fmul float %427, %431 %450 = fadd float %449, %446 %451 = fmul float %428, %431 %452 = fadd float %451, %447 %453 = fmul float %429, %431 %454 = fadd float %453, %448 %455 = fmul float %413, %417 %456 = fadd float %455, %450 %457 = fmul float %414, %417 %458 = fadd float %457, %452 %459 = fmul float %415, %417 %460 = fadd float %459, %454 %461 = fmul float %399, %403 %462 = fadd float %461, %456 %463 = fmul float %400, %403 %464 = fadd float %463, %458 %465 = fmul float %401, %403 %466 = fadd float %465, %460 %467 = fmul float %385, %389 %468 = fadd float %467, %462 %469 = fmul float %386, %389 %470 = fadd float %469, %464 %471 = fmul float %387, %389 %472 = fadd float %471, %466 %473 = fcmp une float %38, %temp20.0 %.sink214 = select i1 %473, float %41, float %40 %temp56.0 = select i1 %473, float 1.953125e-03, float 3.906250e-03 %474 = fdiv float 1.000000e+00, %.sink214 %475 = fmul float %112, %474 %476 = fmul float %110, %474 %477 = call float @llvm.floor.f32(float %475) %478 = fsub float %475, %477 %479 = call float @llvm.floor.f32(float %476) %480 = fsub float %476, %479 %481 = fmul float %42, 2.000000e+00 %482 = fmul float %481, %temp56.0 %483 = fsub float 1.000000e+00, %482 %484 = fmul float %temp56.0, %42 %485 = fmul float %478, %483 %486 = fadd float %485, %484 %487 = fmul float %480, %483 %488 = fadd float %487, %484 %489 = fmul float %486, %temp20.0 %490 = fadd float %489, %temp32.0 %491 = fmul float %488, %temp20.0 %492 = fadd float %491, %temp33.0 %493 = bitcast float %490 to i32 %494 = bitcast float %492 to i32 %495 = bitcast float %238 to i32 %496 = insertelement <4 x i32> undef, i32 %493, i32 0 %497 = insertelement <4 x i32> %496, i32 %494, i32 1 %498 = insertelement <4 x i32> %497, i32 %495, i32 2 %499 = bitcast <8 x i32> %88 to <32 x i8> %500 = bitcast <4 x i32> %90 to <16 x i8> %501 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %498, <32 x i8> %499, <16 x i8> %500, i32 2) %502 = extractelement <4 x float> %501, i32 0 %503 = extractelement <4 x float> %501, i32 1 %504 = extractelement <4 x float> %501, i32 2 %505 = fcmp oeq float %temp34.0, 4.000000e+00 %506 = select i1 %505, float 1.000000e+00, float 0.000000e+00 %507 = bitcast float %490 to i32 %508 = bitcast float %492 to i32 %509 = bitcast float %238 to i32 %510 = insertelement <4 x i32> undef, i32 %507, i32 0 %511 = insertelement <4 x i32> %510, i32 %508, i32 1 %512 = insertelement <4 x i32> %511, i32 %509, i32 2 %513 = bitcast <8 x i32> %80 to <32 x i8> %514 = bitcast <4 x i32> %82 to <16 x i8> %515 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %512, <32 x i8> %513, <16 x i8> %514, i32 2) %516 = extractelement <4 x float> %515, i32 0 %517 = extractelement <4 x float> %515, i32 1 %518 = extractelement <4 x float> %515, i32 2 %519 = fcmp oeq float %temp34.0, 3.000000e+00 %520 = select i1 %519, float 1.000000e+00, float 0.000000e+00 %521 = bitcast float %490 to i32 %522 = bitcast float %492 to i32 %523 = bitcast float %238 to i32 %524 = insertelement <4 x i32> undef, i32 %521, i32 0 %525 = insertelement <4 x i32> %524, i32 %522, i32 1 %526 = insertelement <4 x i32> %525, i32 %523, i32 2 %527 = bitcast <8 x i32> %72 to <32 x i8> %528 = bitcast <4 x i32> %74 to <16 x i8> %529 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %526, <32 x i8> %527, <16 x i8> %528, i32 2) %530 = extractelement <4 x float> %529, i32 0 %531 = extractelement <4 x float> %529, i32 1 %532 = extractelement <4 x float> %529, i32 2 %533 = fcmp oeq float %temp34.0, 2.000000e+00 %534 = select i1 %533, float 1.000000e+00, float 0.000000e+00 %535 = bitcast float %490 to i32 %536 = bitcast float %492 to i32 %537 = bitcast float %238 to i32 %538 = insertelement <4 x i32> undef, i32 %535, i32 0 %539 = insertelement <4 x i32> %538, i32 %536, i32 1 %540 = insertelement <4 x i32> %539, i32 %537, i32 2 %541 = bitcast <8 x i32> %64 to <32 x i8> %542 = bitcast <4 x i32> %66 to <16 x i8> %543 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %540, <32 x i8> %541, <16 x i8> %542, i32 2) %544 = extractelement <4 x float> %543, i32 0 %545 = extractelement <4 x float> %543, i32 1 %546 = extractelement <4 x float> %543, i32 2 %547 = fcmp oeq float %temp34.0, 1.000000e+00 %548 = select i1 %547, float 1.000000e+00, float 0.000000e+00 %549 = bitcast float %490 to i32 %550 = bitcast float %492 to i32 %551 = bitcast float %238 to i32 %552 = insertelement <4 x i32> undef, i32 %549, i32 0 %553 = insertelement <4 x i32> %552, i32 %550, i32 1 %554 = insertelement <4 x i32> %553, i32 %551, i32 2 %555 = bitcast <8 x i32> %56 to <32 x i8> %556 = bitcast <4 x i32> %58 to <16 x i8> %557 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %554, <32 x i8> %555, <16 x i8> %556, i32 2) %558 = extractelement <4 x float> %557, i32 0 %559 = extractelement <4 x float> %557, i32 1 %560 = extractelement <4 x float> %557, i32 2 %561 = fcmp oeq float %temp34.0, 0.000000e+00 %562 = select i1 %561, float 1.000000e+00, float 0.000000e+00 %563 = fmul float %558, %562 %564 = fmul float %559, %562 %565 = fmul float %560, %562 %566 = fmul float %544, %548 %567 = fadd float %566, %563 %568 = fmul float %545, %548 %569 = fadd float %568, %564 %570 = fmul float %546, %548 %571 = fadd float %570, %565 %572 = fmul float %530, %534 %573 = fadd float %572, %567 %574 = fmul float %531, %534 %575 = fadd float %574, %569 %576 = fmul float %532, %534 %577 = fadd float %576, %571 %578 = fmul float %516, %520 %579 = fadd float %578, %573 %580 = fmul float %517, %520 %581 = fadd float %580, %575 %582 = fmul float %518, %520 %583 = fadd float %582, %577 %584 = fmul float %502, %506 %585 = fadd float %584, %579 %586 = fmul float %503, %506 %587 = fadd float %586, %581 %588 = fmul float %504, %506 %589 = fadd float %588, %583 %590 = fcmp une float %38, %temp24.0 %.sink215 = select i1 %590, float %41, float %40 %temp60.0 = select i1 %590, float 1.953125e-03, float 3.906250e-03 %591 = fdiv float 1.000000e+00, %.sink215 %592 = fmul float %110, %591 %593 = fmul float %111, %591 %594 = call float @llvm.floor.f32(float %592) %595 = fsub float %592, %594 %596 = call float @llvm.floor.f32(float %593) %597 = fsub float %593, %596 %598 = fmul float %42, 2.000000e+00 %599 = fmul float %598, %temp60.0 %600 = fsub float 1.000000e+00, %599 %601 = fmul float %temp60.0, %42 %602 = fmul float %595, %600 %603 = fadd float %602, %601 %604 = fmul float %597, %600 %605 = fadd float %604, %601 %606 = fmul float %603, %temp24.0 %607 = fadd float %606, %temp40.0 %608 = fmul float %605, %temp24.0 %609 = fadd float %608, %temp41.0 %610 = bitcast float %607 to i32 %611 = bitcast float %609 to i32 %612 = bitcast float %238 to i32 %613 = insertelement <4 x i32> undef, i32 %610, i32 0 %614 = insertelement <4 x i32> %613, i32 %611, i32 1 %615 = insertelement <4 x i32> %614, i32 %612, i32 2 %616 = bitcast <8 x i32> %88 to <32 x i8> %617 = bitcast <4 x i32> %90 to <16 x i8> %618 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %615, <32 x i8> %616, <16 x i8> %617, i32 2) %619 = extractelement <4 x float> %618, i32 0 %620 = extractelement <4 x float> %618, i32 1 %621 = extractelement <4 x float> %618, i32 2 %622 = fcmp oeq float %temp42.0, 4.000000e+00 %623 = select i1 %622, float 1.000000e+00, float 0.000000e+00 %624 = bitcast float %607 to i32 %625 = bitcast float %609 to i32 %626 = bitcast float %238 to i32 %627 = insertelement <4 x i32> undef, i32 %624, i32 0 %628 = insertelement <4 x i32> %627, i32 %625, i32 1 %629 = insertelement <4 x i32> %628, i32 %626, i32 2 %630 = bitcast <8 x i32> %80 to <32 x i8> %631 = bitcast <4 x i32> %82 to <16 x i8> %632 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %629, <32 x i8> %630, <16 x i8> %631, i32 2) %633 = extractelement <4 x float> %632, i32 0 %634 = extractelement <4 x float> %632, i32 1 %635 = extractelement <4 x float> %632, i32 2 %636 = fcmp oeq float %temp42.0, 3.000000e+00 %637 = select i1 %636, float 1.000000e+00, float 0.000000e+00 %638 = bitcast float %607 to i32 %639 = bitcast float %609 to i32 %640 = bitcast float %238 to i32 %641 = insertelement <4 x i32> undef, i32 %638, i32 0 %642 = insertelement <4 x i32> %641, i32 %639, i32 1 %643 = insertelement <4 x i32> %642, i32 %640, i32 2 %644 = bitcast <8 x i32> %72 to <32 x i8> %645 = bitcast <4 x i32> %74 to <16 x i8> %646 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %643, <32 x i8> %644, <16 x i8> %645, i32 2) %647 = extractelement <4 x float> %646, i32 0 %648 = extractelement <4 x float> %646, i32 1 %649 = extractelement <4 x float> %646, i32 2 %650 = fcmp oeq float %temp42.0, 2.000000e+00 %651 = select i1 %650, float 1.000000e+00, float 0.000000e+00 %652 = bitcast float %607 to i32 %653 = bitcast float %609 to i32 %654 = bitcast float %238 to i32 %655 = insertelement <4 x i32> undef, i32 %652, i32 0 %656 = insertelement <4 x i32> %655, i32 %653, i32 1 %657 = insertelement <4 x i32> %656, i32 %654, i32 2 %658 = bitcast <8 x i32> %64 to <32 x i8> %659 = bitcast <4 x i32> %66 to <16 x i8> %660 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %657, <32 x i8> %658, <16 x i8> %659, i32 2) %661 = extractelement <4 x float> %660, i32 0 %662 = extractelement <4 x float> %660, i32 1 %663 = extractelement <4 x float> %660, i32 2 %664 = fcmp oeq float %temp42.0, 1.000000e+00 %665 = select i1 %664, float 1.000000e+00, float 0.000000e+00 %666 = bitcast float %607 to i32 %667 = bitcast float %609 to i32 %668 = bitcast float %238 to i32 %669 = insertelement <4 x i32> undef, i32 %666, i32 0 %670 = insertelement <4 x i32> %669, i32 %667, i32 1 %671 = insertelement <4 x i32> %670, i32 %668, i32 2 %672 = bitcast <8 x i32> %56 to <32 x i8> %673 = bitcast <4 x i32> %58 to <16 x i8> %674 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %671, <32 x i8> %672, <16 x i8> %673, i32 2) %675 = extractelement <4 x float> %674, i32 0 %676 = extractelement <4 x float> %674, i32 1 %677 = extractelement <4 x float> %674, i32 2 %678 = fcmp oeq float %temp42.0, 0.000000e+00 %679 = select i1 %678, float 1.000000e+00, float 0.000000e+00 %680 = fmul float %675, %679 %681 = fmul float %676, %679 %682 = fmul float %677, %679 %683 = fmul float %661, %665 %684 = fadd float %683, %680 %685 = fmul float %662, %665 %686 = fadd float %685, %681 %687 = fmul float %663, %665 %688 = fadd float %687, %682 %689 = fmul float %647, %651 %690 = fadd float %689, %684 %691 = fmul float %648, %651 %692 = fadd float %691, %686 %693 = fmul float %649, %651 %694 = fadd float %693, %688 %695 = fmul float %633, %637 %696 = fadd float %695, %690 %697 = fmul float %634, %637 %698 = fadd float %697, %692 %699 = fmul float %635, %637 %700 = fadd float %699, %694 %701 = fmul float %619, %623 %702 = fadd float %701, %696 %703 = fmul float %620, %623 %704 = fadd float %703, %698 %705 = fmul float %621, %623 %706 = fadd float %705, %700 %707 = fcmp une float %38, %temp24.0 %.sink216 = select i1 %707, float %41, float %40 %temp64.0 = select i1 %707, float 1.953125e-03, float 3.906250e-03 %708 = fdiv float 1.000000e+00, %.sink216 %709 = fmul float %112, %708 %710 = fmul float %111, %708 %711 = call float @llvm.floor.f32(float %709) %712 = fsub float %709, %711 %713 = call float @llvm.floor.f32(float %710) %714 = fsub float %710, %713 %715 = fmul float %42, 2.000000e+00 %716 = fmul float %715, %temp64.0 %717 = fsub float 1.000000e+00, %716 %718 = fmul float %temp64.0, %42 %719 = fmul float %712, %717 %720 = fadd float %719, %718 %721 = fmul float %714, %717 %722 = fadd float %721, %718 %723 = fmul float %720, %temp24.0 %724 = fadd float %723, %temp40.0 %725 = fmul float %722, %temp24.0 %726 = fadd float %725, %temp41.0 %727 = bitcast float %724 to i32 %728 = bitcast float %726 to i32 %729 = bitcast float %238 to i32 %730 = insertelement <4 x i32> undef, i32 %727, i32 0 %731 = insertelement <4 x i32> %730, i32 %728, i32 1 %732 = insertelement <4 x i32> %731, i32 %729, i32 2 %733 = bitcast <8 x i32> %88 to <32 x i8> %734 = bitcast <4 x i32> %90 to <16 x i8> %735 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %732, <32 x i8> %733, <16 x i8> %734, i32 2) %736 = extractelement <4 x float> %735, i32 0 %737 = extractelement <4 x float> %735, i32 1 %738 = extractelement <4 x float> %735, i32 2 %739 = fcmp oeq float %temp42.0, 4.000000e+00 %740 = select i1 %739, float 1.000000e+00, float 0.000000e+00 %741 = bitcast float %724 to i32 %742 = bitcast float %726 to i32 %743 = bitcast float %238 to i32 %744 = insertelement <4 x i32> undef, i32 %741, i32 0 %745 = insertelement <4 x i32> %744, i32 %742, i32 1 %746 = insertelement <4 x i32> %745, i32 %743, i32 2 %747 = bitcast <8 x i32> %80 to <32 x i8> %748 = bitcast <4 x i32> %82 to <16 x i8> %749 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %746, <32 x i8> %747, <16 x i8> %748, i32 2) %750 = extractelement <4 x float> %749, i32 0 %751 = extractelement <4 x float> %749, i32 1 %752 = extractelement <4 x float> %749, i32 2 %753 = fcmp oeq float %temp42.0, 3.000000e+00 %754 = select i1 %753, float 1.000000e+00, float 0.000000e+00 %755 = bitcast float %724 to i32 %756 = bitcast float %726 to i32 %757 = bitcast float %238 to i32 %758 = insertelement <4 x i32> undef, i32 %755, i32 0 %759 = insertelement <4 x i32> %758, i32 %756, i32 1 %760 = insertelement <4 x i32> %759, i32 %757, i32 2 %761 = bitcast <8 x i32> %72 to <32 x i8> %762 = bitcast <4 x i32> %74 to <16 x i8> %763 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %760, <32 x i8> %761, <16 x i8> %762, i32 2) %764 = extractelement <4 x float> %763, i32 0 %765 = extractelement <4 x float> %763, i32 1 %766 = extractelement <4 x float> %763, i32 2 %767 = fcmp oeq float %temp42.0, 2.000000e+00 %768 = select i1 %767, float 1.000000e+00, float 0.000000e+00 %769 = bitcast float %724 to i32 %770 = bitcast float %726 to i32 %771 = bitcast float %238 to i32 %772 = insertelement <4 x i32> undef, i32 %769, i32 0 %773 = insertelement <4 x i32> %772, i32 %770, i32 1 %774 = insertelement <4 x i32> %773, i32 %771, i32 2 %775 = bitcast <8 x i32> %64 to <32 x i8> %776 = bitcast <4 x i32> %66 to <16 x i8> %777 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %774, <32 x i8> %775, <16 x i8> %776, i32 2) %778 = extractelement <4 x float> %777, i32 0 %779 = extractelement <4 x float> %777, i32 1 %780 = extractelement <4 x float> %777, i32 2 %781 = fcmp oeq float %temp42.0, 1.000000e+00 %782 = select i1 %781, float 1.000000e+00, float 0.000000e+00 %783 = bitcast float %724 to i32 %784 = bitcast float %726 to i32 %785 = bitcast float %238 to i32 %786 = insertelement <4 x i32> undef, i32 %783, i32 0 %787 = insertelement <4 x i32> %786, i32 %784, i32 1 %788 = insertelement <4 x i32> %787, i32 %785, i32 2 %789 = bitcast <8 x i32> %56 to <32 x i8> %790 = bitcast <4 x i32> %58 to <16 x i8> %791 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %788, <32 x i8> %789, <16 x i8> %790, i32 2) %792 = extractelement <4 x float> %791, i32 0 %793 = extractelement <4 x float> %791, i32 1 %794 = extractelement <4 x float> %791, i32 2 %795 = fcmp oeq float %temp42.0, 0.000000e+00 %796 = select i1 %795, float 1.000000e+00, float 0.000000e+00 %797 = fmul float %792, %796 %798 = fmul float %793, %796 %799 = fmul float %794, %796 %800 = fmul float %778, %782 %801 = fadd float %800, %797 %802 = fmul float %779, %782 %803 = fadd float %802, %798 %804 = fmul float %780, %782 %805 = fadd float %804, %799 %806 = fmul float %764, %768 %807 = fadd float %806, %801 %808 = fmul float %765, %768 %809 = fadd float %808, %803 %810 = fmul float %766, %768 %811 = fadd float %810, %805 %812 = fmul float %750, %754 %813 = fadd float %812, %807 %814 = fmul float %751, %754 %815 = fadd float %814, %809 %816 = fmul float %752, %754 %817 = fadd float %816, %811 %818 = fmul float %736, %740 %819 = fadd float %818, %813 %820 = fmul float %737, %740 %821 = fadd float %820, %815 %822 = fmul float %738, %740 %823 = fadd float %822, %817 %824 = fcmp une float %38, %temp24.0 %.sink217 = select i1 %824, float %41, float %40 %temp68.0 = select i1 %824, float 1.953125e-03, float 3.906250e-03 %825 = fdiv float 1.000000e+00, %.sink217 %826 = fmul float %112, %825 %827 = fmul float %110, %825 %828 = call float @llvm.floor.f32(float %826) %829 = fsub float %826, %828 %830 = call float @llvm.floor.f32(float %827) %831 = fsub float %827, %830 %832 = fmul float %42, 2.000000e+00 %833 = fmul float %832, %temp68.0 %834 = fsub float 1.000000e+00, %833 %835 = fmul float %temp68.0, %42 %836 = fmul float %829, %834 %837 = fadd float %836, %835 %838 = fmul float %831, %834 %839 = fadd float %838, %835 %840 = fmul float %837, %temp24.0 %841 = fadd float %840, %temp40.0 %842 = fmul float %839, %temp24.0 %843 = fadd float %842, %temp41.0 %844 = bitcast float %841 to i32 %845 = bitcast float %843 to i32 %846 = bitcast float %238 to i32 %847 = insertelement <4 x i32> undef, i32 %844, i32 0 %848 = insertelement <4 x i32> %847, i32 %845, i32 1 %849 = insertelement <4 x i32> %848, i32 %846, i32 2 %850 = bitcast <8 x i32> %88 to <32 x i8> %851 = bitcast <4 x i32> %90 to <16 x i8> %852 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %849, <32 x i8> %850, <16 x i8> %851, i32 2) %853 = extractelement <4 x float> %852, i32 0 %854 = extractelement <4 x float> %852, i32 1 %855 = extractelement <4 x float> %852, i32 2 %856 = fcmp oeq float %temp42.0, 4.000000e+00 %857 = select i1 %856, float 1.000000e+00, float 0.000000e+00 %858 = bitcast float %841 to i32 %859 = bitcast float %843 to i32 %860 = bitcast float %238 to i32 %861 = insertelement <4 x i32> undef, i32 %858, i32 0 %862 = insertelement <4 x i32> %861, i32 %859, i32 1 %863 = insertelement <4 x i32> %862, i32 %860, i32 2 %864 = bitcast <8 x i32> %80 to <32 x i8> %865 = bitcast <4 x i32> %82 to <16 x i8> %866 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %863, <32 x i8> %864, <16 x i8> %865, i32 2) %867 = extractelement <4 x float> %866, i32 0 %868 = extractelement <4 x float> %866, i32 1 %869 = extractelement <4 x float> %866, i32 2 %870 = fcmp oeq float %temp42.0, 3.000000e+00 %871 = select i1 %870, float 1.000000e+00, float 0.000000e+00 %872 = bitcast float %841 to i32 %873 = bitcast float %843 to i32 %874 = bitcast float %238 to i32 %875 = insertelement <4 x i32> undef, i32 %872, i32 0 %876 = insertelement <4 x i32> %875, i32 %873, i32 1 %877 = insertelement <4 x i32> %876, i32 %874, i32 2 %878 = bitcast <8 x i32> %72 to <32 x i8> %879 = bitcast <4 x i32> %74 to <16 x i8> %880 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %877, <32 x i8> %878, <16 x i8> %879, i32 2) %881 = extractelement <4 x float> %880, i32 0 %882 = extractelement <4 x float> %880, i32 1 %883 = extractelement <4 x float> %880, i32 2 %884 = fcmp oeq float %temp42.0, 2.000000e+00 %885 = select i1 %884, float 1.000000e+00, float 0.000000e+00 %886 = bitcast float %841 to i32 %887 = bitcast float %843 to i32 %888 = bitcast float %238 to i32 %889 = insertelement <4 x i32> undef, i32 %886, i32 0 %890 = insertelement <4 x i32> %889, i32 %887, i32 1 %891 = insertelement <4 x i32> %890, i32 %888, i32 2 %892 = bitcast <8 x i32> %64 to <32 x i8> %893 = bitcast <4 x i32> %66 to <16 x i8> %894 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %891, <32 x i8> %892, <16 x i8> %893, i32 2) %895 = extractelement <4 x float> %894, i32 0 %896 = extractelement <4 x float> %894, i32 1 %897 = extractelement <4 x float> %894, i32 2 %898 = fcmp oeq float %temp42.0, 1.000000e+00 %899 = select i1 %898, float 1.000000e+00, float 0.000000e+00 %900 = bitcast float %841 to i32 %901 = bitcast float %843 to i32 %902 = bitcast float %238 to i32 %903 = insertelement <4 x i32> undef, i32 %900, i32 0 %904 = insertelement <4 x i32> %903, i32 %901, i32 1 %905 = insertelement <4 x i32> %904, i32 %902, i32 2 %906 = bitcast <8 x i32> %56 to <32 x i8> %907 = bitcast <4 x i32> %58 to <16 x i8> %908 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %905, <32 x i8> %906, <16 x i8> %907, i32 2) %909 = extractelement <4 x float> %908, i32 0 %910 = extractelement <4 x float> %908, i32 1 %911 = extractelement <4 x float> %908, i32 2 %912 = fcmp oeq float %temp42.0, 0.000000e+00 %913 = select i1 %912, float 1.000000e+00, float 0.000000e+00 %914 = fmul float %909, %913 %915 = fmul float %910, %913 %916 = fmul float %911, %913 %917 = fmul float %895, %899 %918 = fadd float %917, %914 %919 = fmul float %896, %899 %920 = fadd float %919, %915 %921 = fmul float %897, %899 %922 = fadd float %921, %916 %923 = fmul float %881, %885 %924 = fadd float %923, %918 %925 = fmul float %882, %885 %926 = fadd float %925, %920 %927 = fmul float %883, %885 %928 = fadd float %927, %922 %929 = fmul float %867, %871 %930 = fadd float %929, %924 %931 = fmul float %868, %871 %932 = fadd float %931, %926 %933 = fmul float %869, %871 %934 = fadd float %933, %928 %935 = fmul float %853, %857 %936 = fadd float %935, %930 %937 = fmul float %854, %857 %938 = fadd float %937, %932 %939 = fmul float %855, %857 %940 = fadd float %939, %934 %941 = fcmp une float %38, %temp28.0 %.sink218 = select i1 %941, float %41, float %40 %temp72.0 = select i1 %941, float 1.953125e-03, float 3.906250e-03 %942 = fdiv float 1.000000e+00, %.sink218 %943 = fmul float %110, %942 %944 = fmul float %111, %942 %945 = call float @llvm.floor.f32(float %943) %946 = fsub float %943, %945 %947 = call float @llvm.floor.f32(float %944) %948 = fsub float %944, %947 %949 = fmul float %42, 2.000000e+00 %950 = fmul float %949, %temp72.0 %951 = fsub float 1.000000e+00, %950 %952 = fmul float %temp72.0, %42 %953 = fmul float %946, %951 %954 = fadd float %953, %952 %955 = fmul float %948, %951 %956 = fadd float %955, %952 %957 = fmul float %954, %temp28.0 %958 = fadd float %957, %temp16.0 %959 = fmul float %956, %temp28.0 %960 = fadd float %959, %temp17.0 %961 = bitcast float %958 to i32 %962 = bitcast float %960 to i32 %963 = bitcast float %238 to i32 %964 = insertelement <4 x i32> undef, i32 %961, i32 0 %965 = insertelement <4 x i32> %964, i32 %962, i32 1 %966 = insertelement <4 x i32> %965, i32 %963, i32 2 %967 = bitcast <8 x i32> %88 to <32 x i8> %968 = bitcast <4 x i32> %90 to <16 x i8> %969 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %966, <32 x i8> %967, <16 x i8> %968, i32 2) %970 = extractelement <4 x float> %969, i32 0 %971 = extractelement <4 x float> %969, i32 1 %972 = extractelement <4 x float> %969, i32 2 %973 = fcmp oeq float %temp18.0, 4.000000e+00 %974 = select i1 %973, float 1.000000e+00, float 0.000000e+00 %975 = bitcast float %958 to i32 %976 = bitcast float %960 to i32 %977 = bitcast float %238 to i32 %978 = insertelement <4 x i32> undef, i32 %975, i32 0 %979 = insertelement <4 x i32> %978, i32 %976, i32 1 %980 = insertelement <4 x i32> %979, i32 %977, i32 2 %981 = bitcast <8 x i32> %80 to <32 x i8> %982 = bitcast <4 x i32> %82 to <16 x i8> %983 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %980, <32 x i8> %981, <16 x i8> %982, i32 2) %984 = extractelement <4 x float> %983, i32 0 %985 = extractelement <4 x float> %983, i32 1 %986 = extractelement <4 x float> %983, i32 2 %987 = fcmp oeq float %temp18.0, 3.000000e+00 %988 = select i1 %987, float 1.000000e+00, float 0.000000e+00 %989 = bitcast float %958 to i32 %990 = bitcast float %960 to i32 %991 = bitcast float %238 to i32 %992 = insertelement <4 x i32> undef, i32 %989, i32 0 %993 = insertelement <4 x i32> %992, i32 %990, i32 1 %994 = insertelement <4 x i32> %993, i32 %991, i32 2 %995 = bitcast <8 x i32> %72 to <32 x i8> %996 = bitcast <4 x i32> %74 to <16 x i8> %997 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %994, <32 x i8> %995, <16 x i8> %996, i32 2) %998 = extractelement <4 x float> %997, i32 0 %999 = extractelement <4 x float> %997, i32 1 %1000 = extractelement <4 x float> %997, i32 2 %1001 = fcmp oeq float %temp18.0, 2.000000e+00 %1002 = select i1 %1001, float 1.000000e+00, float 0.000000e+00 %1003 = bitcast float %958 to i32 %1004 = bitcast float %960 to i32 %1005 = bitcast float %238 to i32 %1006 = insertelement <4 x i32> undef, i32 %1003, i32 0 %1007 = insertelement <4 x i32> %1006, i32 %1004, i32 1 %1008 = insertelement <4 x i32> %1007, i32 %1005, i32 2 %1009 = bitcast <8 x i32> %64 to <32 x i8> %1010 = bitcast <4 x i32> %66 to <16 x i8> %1011 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1008, <32 x i8> %1009, <16 x i8> %1010, i32 2) %1012 = extractelement <4 x float> %1011, i32 0 %1013 = extractelement <4 x float> %1011, i32 1 %1014 = extractelement <4 x float> %1011, i32 2 %1015 = fcmp oeq float %temp18.0, 1.000000e+00 %1016 = select i1 %1015, float 1.000000e+00, float 0.000000e+00 %1017 = bitcast float %958 to i32 %1018 = bitcast float %960 to i32 %1019 = bitcast float %238 to i32 %1020 = insertelement <4 x i32> undef, i32 %1017, i32 0 %1021 = insertelement <4 x i32> %1020, i32 %1018, i32 1 %1022 = insertelement <4 x i32> %1021, i32 %1019, i32 2 %1023 = bitcast <8 x i32> %56 to <32 x i8> %1024 = bitcast <4 x i32> %58 to <16 x i8> %1025 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1022, <32 x i8> %1023, <16 x i8> %1024, i32 2) %1026 = extractelement <4 x float> %1025, i32 0 %1027 = extractelement <4 x float> %1025, i32 1 %1028 = extractelement <4 x float> %1025, i32 2 %1029 = fcmp oeq float %temp18.0, 0.000000e+00 %1030 = select i1 %1029, float 1.000000e+00, float 0.000000e+00 %1031 = fmul float %1026, %1030 %1032 = fmul float %1027, %1030 %1033 = fmul float %1028, %1030 %1034 = fmul float %1012, %1016 %1035 = fadd float %1034, %1031 %1036 = fmul float %1013, %1016 %1037 = fadd float %1036, %1032 %1038 = fmul float %1014, %1016 %1039 = fadd float %1038, %1033 %1040 = fmul float %998, %1002 %1041 = fadd float %1040, %1035 %1042 = fmul float %999, %1002 %1043 = fadd float %1042, %1037 %1044 = fmul float %1000, %1002 %1045 = fadd float %1044, %1039 %1046 = fmul float %984, %988 %1047 = fadd float %1046, %1041 %1048 = fmul float %985, %988 %1049 = fadd float %1048, %1043 %1050 = fmul float %986, %988 %1051 = fadd float %1050, %1045 %1052 = fmul float %970, %974 %1053 = fadd float %1052, %1047 %1054 = fmul float %971, %974 %1055 = fadd float %1054, %1049 %1056 = fmul float %972, %974 %1057 = fadd float %1056, %1051 %1058 = fcmp une float %38, %temp28.0 %.sink219 = select i1 %1058, float %41, float %40 %temp76.0 = select i1 %1058, float 1.953125e-03, float 3.906250e-03 %1059 = fdiv float 1.000000e+00, %.sink219 %1060 = fmul float %112, %1059 %1061 = fmul float %111, %1059 %1062 = call float @llvm.floor.f32(float %1060) %1063 = fsub float %1060, %1062 %1064 = call float @llvm.floor.f32(float %1061) %1065 = fsub float %1061, %1064 %1066 = fmul float %42, 2.000000e+00 %1067 = fmul float %1066, %temp76.0 %1068 = fsub float 1.000000e+00, %1067 %1069 = fmul float %temp76.0, %42 %1070 = fmul float %1063, %1068 %1071 = fadd float %1070, %1069 %1072 = fmul float %1065, %1068 %1073 = fadd float %1072, %1069 %1074 = fmul float %1071, %temp28.0 %1075 = fadd float %1074, %temp16.0 %1076 = fmul float %1073, %temp28.0 %1077 = fadd float %1076, %temp17.0 %1078 = bitcast float %1075 to i32 %1079 = bitcast float %1077 to i32 %1080 = bitcast float %238 to i32 %1081 = insertelement <4 x i32> undef, i32 %1078, i32 0 %1082 = insertelement <4 x i32> %1081, i32 %1079, i32 1 %1083 = insertelement <4 x i32> %1082, i32 %1080, i32 2 %1084 = bitcast <8 x i32> %88 to <32 x i8> %1085 = bitcast <4 x i32> %90 to <16 x i8> %1086 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1083, <32 x i8> %1084, <16 x i8> %1085, i32 2) %1087 = extractelement <4 x float> %1086, i32 0 %1088 = extractelement <4 x float> %1086, i32 1 %1089 = extractelement <4 x float> %1086, i32 2 %1090 = fcmp oeq float %temp18.0, 4.000000e+00 %1091 = select i1 %1090, float 1.000000e+00, float 0.000000e+00 %1092 = bitcast float %1075 to i32 %1093 = bitcast float %1077 to i32 %1094 = bitcast float %238 to i32 %1095 = insertelement <4 x i32> undef, i32 %1092, i32 0 %1096 = insertelement <4 x i32> %1095, i32 %1093, i32 1 %1097 = insertelement <4 x i32> %1096, i32 %1094, i32 2 %1098 = bitcast <8 x i32> %80 to <32 x i8> %1099 = bitcast <4 x i32> %82 to <16 x i8> %1100 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1097, <32 x i8> %1098, <16 x i8> %1099, i32 2) %1101 = extractelement <4 x float> %1100, i32 0 %1102 = extractelement <4 x float> %1100, i32 1 %1103 = extractelement <4 x float> %1100, i32 2 %1104 = fcmp oeq float %temp18.0, 3.000000e+00 %1105 = select i1 %1104, float 1.000000e+00, float 0.000000e+00 %1106 = bitcast float %1075 to i32 %1107 = bitcast float %1077 to i32 %1108 = bitcast float %238 to i32 %1109 = insertelement <4 x i32> undef, i32 %1106, i32 0 %1110 = insertelement <4 x i32> %1109, i32 %1107, i32 1 %1111 = insertelement <4 x i32> %1110, i32 %1108, i32 2 %1112 = bitcast <8 x i32> %72 to <32 x i8> %1113 = bitcast <4 x i32> %74 to <16 x i8> %1114 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1111, <32 x i8> %1112, <16 x i8> %1113, i32 2) %1115 = extractelement <4 x float> %1114, i32 0 %1116 = extractelement <4 x float> %1114, i32 1 %1117 = extractelement <4 x float> %1114, i32 2 %1118 = fcmp oeq float %temp18.0, 2.000000e+00 %1119 = select i1 %1118, float 1.000000e+00, float 0.000000e+00 %1120 = bitcast float %1075 to i32 %1121 = bitcast float %1077 to i32 %1122 = bitcast float %238 to i32 %1123 = insertelement <4 x i32> undef, i32 %1120, i32 0 %1124 = insertelement <4 x i32> %1123, i32 %1121, i32 1 %1125 = insertelement <4 x i32> %1124, i32 %1122, i32 2 %1126 = bitcast <8 x i32> %64 to <32 x i8> %1127 = bitcast <4 x i32> %66 to <16 x i8> %1128 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1125, <32 x i8> %1126, <16 x i8> %1127, i32 2) %1129 = extractelement <4 x float> %1128, i32 0 %1130 = extractelement <4 x float> %1128, i32 1 %1131 = extractelement <4 x float> %1128, i32 2 %1132 = fcmp oeq float %temp18.0, 1.000000e+00 %1133 = select i1 %1132, float 1.000000e+00, float 0.000000e+00 %1134 = bitcast float %1075 to i32 %1135 = bitcast float %1077 to i32 %1136 = bitcast float %238 to i32 %1137 = insertelement <4 x i32> undef, i32 %1134, i32 0 %1138 = insertelement <4 x i32> %1137, i32 %1135, i32 1 %1139 = insertelement <4 x i32> %1138, i32 %1136, i32 2 %1140 = bitcast <8 x i32> %56 to <32 x i8> %1141 = bitcast <4 x i32> %58 to <16 x i8> %1142 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1139, <32 x i8> %1140, <16 x i8> %1141, i32 2) %1143 = extractelement <4 x float> %1142, i32 0 %1144 = extractelement <4 x float> %1142, i32 1 %1145 = extractelement <4 x float> %1142, i32 2 %1146 = fcmp oeq float %temp18.0, 0.000000e+00 %1147 = select i1 %1146, float 1.000000e+00, float 0.000000e+00 %1148 = fmul float %1143, %1147 %1149 = fmul float %1144, %1147 %1150 = fmul float %1145, %1147 %1151 = fmul float %1129, %1133 %1152 = fadd float %1151, %1148 %1153 = fmul float %1130, %1133 %1154 = fadd float %1153, %1149 %1155 = fmul float %1131, %1133 %1156 = fadd float %1155, %1150 %1157 = fmul float %1115, %1119 %1158 = fadd float %1157, %1152 %1159 = fmul float %1116, %1119 %1160 = fadd float %1159, %1154 %1161 = fmul float %1117, %1119 %1162 = fadd float %1161, %1156 %1163 = fmul float %1101, %1105 %1164 = fadd float %1163, %1158 %1165 = fmul float %1102, %1105 %1166 = fadd float %1165, %1160 %1167 = fmul float %1103, %1105 %1168 = fadd float %1167, %1162 %1169 = fmul float %1087, %1091 %1170 = fadd float %1169, %1164 %1171 = fmul float %1088, %1091 %1172 = fadd float %1171, %1166 %1173 = fmul float %1089, %1091 %1174 = fadd float %1173, %1168 %1175 = fcmp une float %38, %temp28.0 %.sink220 = select i1 %1175, float %41, float %40 %temp80.0 = select i1 %1175, float 1.953125e-03, float 3.906250e-03 %1176 = fdiv float 1.000000e+00, %.sink220 %1177 = fmul float %112, %1176 %1178 = fmul float %110, %1176 %1179 = call float @llvm.floor.f32(float %1177) %1180 = fsub float %1177, %1179 %1181 = call float @llvm.floor.f32(float %1178) %1182 = fsub float %1178, %1181 %1183 = fmul float %42, 2.000000e+00 %1184 = fmul float %1183, %temp80.0 %1185 = fsub float 1.000000e+00, %1184 %1186 = fmul float %temp80.0, %42 %1187 = fmul float %1180, %1185 %1188 = fadd float %1187, %1186 %1189 = fmul float %1182, %1185 %1190 = fadd float %1189, %1186 %1191 = fmul float %1188, %temp28.0 %1192 = fadd float %1191, %temp16.0 %1193 = fmul float %1190, %temp28.0 %1194 = fadd float %1193, %temp17.0 %1195 = bitcast float %1192 to i32 %1196 = bitcast float %1194 to i32 %1197 = bitcast float %238 to i32 %1198 = insertelement <4 x i32> undef, i32 %1195, i32 0 %1199 = insertelement <4 x i32> %1198, i32 %1196, i32 1 %1200 = insertelement <4 x i32> %1199, i32 %1197, i32 2 %1201 = bitcast <8 x i32> %88 to <32 x i8> %1202 = bitcast <4 x i32> %90 to <16 x i8> %1203 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1200, <32 x i8> %1201, <16 x i8> %1202, i32 2) %1204 = extractelement <4 x float> %1203, i32 0 %1205 = extractelement <4 x float> %1203, i32 1 %1206 = extractelement <4 x float> %1203, i32 2 %1207 = fcmp oeq float %temp18.0, 4.000000e+00 %1208 = select i1 %1207, float 1.000000e+00, float 0.000000e+00 %1209 = bitcast float %1192 to i32 %1210 = bitcast float %1194 to i32 %1211 = bitcast float %238 to i32 %1212 = insertelement <4 x i32> undef, i32 %1209, i32 0 %1213 = insertelement <4 x i32> %1212, i32 %1210, i32 1 %1214 = insertelement <4 x i32> %1213, i32 %1211, i32 2 %1215 = bitcast <8 x i32> %80 to <32 x i8> %1216 = bitcast <4 x i32> %82 to <16 x i8> %1217 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1214, <32 x i8> %1215, <16 x i8> %1216, i32 2) %1218 = extractelement <4 x float> %1217, i32 0 %1219 = extractelement <4 x float> %1217, i32 1 %1220 = extractelement <4 x float> %1217, i32 2 %1221 = fcmp oeq float %temp18.0, 3.000000e+00 %1222 = select i1 %1221, float 1.000000e+00, float 0.000000e+00 %1223 = bitcast float %1192 to i32 %1224 = bitcast float %1194 to i32 %1225 = bitcast float %238 to i32 %1226 = insertelement <4 x i32> undef, i32 %1223, i32 0 %1227 = insertelement <4 x i32> %1226, i32 %1224, i32 1 %1228 = insertelement <4 x i32> %1227, i32 %1225, i32 2 %1229 = bitcast <8 x i32> %72 to <32 x i8> %1230 = bitcast <4 x i32> %74 to <16 x i8> %1231 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1228, <32 x i8> %1229, <16 x i8> %1230, i32 2) %1232 = extractelement <4 x float> %1231, i32 0 %1233 = extractelement <4 x float> %1231, i32 1 %1234 = extractelement <4 x float> %1231, i32 2 %1235 = fcmp oeq float %temp18.0, 2.000000e+00 %1236 = select i1 %1235, float 1.000000e+00, float 0.000000e+00 %1237 = bitcast float %1192 to i32 %1238 = bitcast float %1194 to i32 %1239 = bitcast float %238 to i32 %1240 = insertelement <4 x i32> undef, i32 %1237, i32 0 %1241 = insertelement <4 x i32> %1240, i32 %1238, i32 1 %1242 = insertelement <4 x i32> %1241, i32 %1239, i32 2 %1243 = bitcast <8 x i32> %64 to <32 x i8> %1244 = bitcast <4 x i32> %66 to <16 x i8> %1245 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1242, <32 x i8> %1243, <16 x i8> %1244, i32 2) %1246 = extractelement <4 x float> %1245, i32 0 %1247 = extractelement <4 x float> %1245, i32 1 %1248 = extractelement <4 x float> %1245, i32 2 %1249 = fcmp oeq float %temp18.0, 1.000000e+00 %1250 = select i1 %1249, float 1.000000e+00, float 0.000000e+00 %1251 = bitcast float %1192 to i32 %1252 = bitcast float %1194 to i32 %1253 = bitcast float %238 to i32 %1254 = insertelement <4 x i32> undef, i32 %1251, i32 0 %1255 = insertelement <4 x i32> %1254, i32 %1252, i32 1 %1256 = insertelement <4 x i32> %1255, i32 %1253, i32 2 %1257 = bitcast <8 x i32> %56 to <32 x i8> %1258 = bitcast <4 x i32> %58 to <16 x i8> %1259 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1256, <32 x i8> %1257, <16 x i8> %1258, i32 2) %1260 = extractelement <4 x float> %1259, i32 0 %1261 = extractelement <4 x float> %1259, i32 1 %1262 = extractelement <4 x float> %1259, i32 2 %1263 = fcmp oeq float %temp18.0, 0.000000e+00 %1264 = select i1 %1263, float 1.000000e+00, float 0.000000e+00 %1265 = fmul float %1260, %1264 %1266 = fmul float %1261, %1264 %1267 = fmul float %1262, %1264 %1268 = fmul float %1246, %1250 %1269 = fadd float %1268, %1265 %1270 = fmul float %1247, %1250 %1271 = fadd float %1270, %1266 %1272 = fmul float %1248, %1250 %1273 = fadd float %1272, %1267 %1274 = fmul float %1232, %1236 %1275 = fadd float %1274, %1269 %1276 = fmul float %1233, %1236 %1277 = fadd float %1276, %1271 %1278 = fmul float %1234, %1236 %1279 = fadd float %1278, %1273 %1280 = fmul float %1218, %1222 %1281 = fadd float %1280, %1275 %1282 = fmul float %1219, %1222 %1283 = fadd float %1282, %1277 %1284 = fmul float %1220, %1222 %1285 = fadd float %1284, %1279 %1286 = fmul float %1204, %1208 %1287 = fadd float %1286, %1281 %1288 = fmul float %1205, %1208 %1289 = fadd float %1288, %1283 %1290 = fmul float %1206, %1208 %1291 = fadd float %1290, %1285 %1292 = fmul float %1053, %163 %1293 = fmul float %1055, %163 %1294 = fmul float %1057, %163 %1295 = fmul float %1170, %161 %1296 = fadd float %1295, %1292 %1297 = fmul float %1172, %161 %1298 = fadd float %1297, %1293 %1299 = fmul float %1174, %161 %1300 = fadd float %1299, %1294 %1301 = fmul float %1287, %162 %1302 = fadd float %1301, %1296 %1303 = fmul float %1289, %162 %1304 = fadd float %1303, %1298 %1305 = fmul float %1291, %162 %1306 = fadd float %1305, %1300 %1307 = fmul float %702, %163 %1308 = fmul float %704, %163 %1309 = fmul float %706, %163 %1310 = fmul float %819, %161 %1311 = fadd float %1310, %1307 %1312 = fmul float %821, %161 %1313 = fadd float %1312, %1308 %1314 = fmul float %823, %161 %1315 = fadd float %1314, %1309 %1316 = fmul float %936, %162 %1317 = fadd float %1316, %1311 %1318 = fmul float %938, %162 %1319 = fadd float %1318, %1313 %1320 = fmul float %940, %162 %1321 = fadd float %1320, %1315 %1322 = fmul float %351, %163 %1323 = fmul float %353, %163 %1324 = fmul float %355, %163 %1325 = fmul float %468, %161 %1326 = fadd float %1325, %1322 %1327 = fmul float %470, %161 %1328 = fadd float %1327, %1323 %1329 = fmul float %472, %161 %1330 = fadd float %1329, %1324 %1331 = fmul float %585, %162 %1332 = fadd float %1331, %1326 %1333 = fmul float %587, %162 %1334 = fadd float %1333, %1328 %1335 = fmul float %589, %162 %1336 = fadd float %1335, %1330 %1337 = fmul float %99, %1332 %1338 = fmul float %99, %1334 %1339 = fmul float %99, %1336 %1340 = fmul float %100, %1317 %1341 = fadd float %1340, %1337 %1342 = fmul float %100, %1319 %1343 = fadd float %1342, %1338 %1344 = fmul float %100, %1321 %1345 = fadd float %1344, %1339 %1346 = fmul float %101, %1302 %1347 = fadd float %1346, %1341 %1348 = fmul float %101, %1304 %1349 = fadd float %1348, %1343 %1350 = fmul float %101, %1306 %1351 = fadd float %1350, %1345 %1352 = fcmp une float %38, %temp20.0 %.sink221 = select i1 %1352, float %41, float %40 %temp52.2 = select i1 %1352, float 1.953125e-03, float 3.906250e-03 %1353 = fdiv float 1.000000e+00, %.sink221 %1354 = fmul float %112, %1353 %1355 = fmul float %111, %1353 %1356 = call float @llvm.floor.f32(float %1354) %1357 = fsub float %1354, %1356 %1358 = call float @llvm.floor.f32(float %1355) %1359 = fsub float %1355, %1358 %1360 = fmul float %42, 2.000000e+00 %1361 = fmul float %1360, %temp52.2 %1362 = fsub float 1.000000e+00, %1361 %1363 = fmul float %temp52.2, %42 %1364 = fmul float %1357, %1362 %1365 = fadd float %1364, %1363 %1366 = fmul float %1359, %1362 %1367 = fadd float %1366, %1363 %1368 = fmul float %1365, %temp20.0 %1369 = fadd float %1368, %temp32.0 %1370 = fmul float %1367, %temp20.0 %1371 = fadd float %1370, %temp33.0 %1372 = bitcast float %1369 to i32 %1373 = bitcast float %1371 to i32 %1374 = bitcast float %238 to i32 %1375 = insertelement <4 x i32> undef, i32 %1372, i32 0 %1376 = insertelement <4 x i32> %1375, i32 %1373, i32 1 %1377 = insertelement <4 x i32> %1376, i32 %1374, i32 2 %1378 = bitcast <8 x i32> %92 to <32 x i8> %1379 = bitcast <4 x i32> %94 to <16 x i8> %1380 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1377, <32 x i8> %1378, <16 x i8> %1379, i32 2) %1381 = extractelement <4 x float> %1380, i32 1 %1382 = extractelement <4 x float> %1380, i32 3 %1383 = fcmp oeq float %temp34.0, 4.000000e+00 %1384 = select i1 %1383, float 1.000000e+00, float 0.000000e+00 %1385 = bitcast float %1369 to i32 %1386 = bitcast float %1371 to i32 %1387 = bitcast float %238 to i32 %1388 = insertelement <4 x i32> undef, i32 %1385, i32 0 %1389 = insertelement <4 x i32> %1388, i32 %1386, i32 1 %1390 = insertelement <4 x i32> %1389, i32 %1387, i32 2 %1391 = bitcast <8 x i32> %84 to <32 x i8> %1392 = bitcast <4 x i32> %86 to <16 x i8> %1393 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1390, <32 x i8> %1391, <16 x i8> %1392, i32 2) %1394 = extractelement <4 x float> %1393, i32 1 %1395 = extractelement <4 x float> %1393, i32 3 %1396 = fcmp oeq float %temp34.0, 3.000000e+00 %1397 = select i1 %1396, float 1.000000e+00, float 0.000000e+00 %1398 = bitcast float %1369 to i32 %1399 = bitcast float %1371 to i32 %1400 = bitcast float %238 to i32 %1401 = insertelement <4 x i32> undef, i32 %1398, i32 0 %1402 = insertelement <4 x i32> %1401, i32 %1399, i32 1 %1403 = insertelement <4 x i32> %1402, i32 %1400, i32 2 %1404 = bitcast <8 x i32> %76 to <32 x i8> %1405 = bitcast <4 x i32> %78 to <16 x i8> %1406 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1403, <32 x i8> %1404, <16 x i8> %1405, i32 2) %1407 = extractelement <4 x float> %1406, i32 1 %1408 = extractelement <4 x float> %1406, i32 3 %1409 = fcmp oeq float %temp34.0, 2.000000e+00 %1410 = select i1 %1409, float 1.000000e+00, float 0.000000e+00 %1411 = bitcast float %1369 to i32 %1412 = bitcast float %1371 to i32 %1413 = bitcast float %238 to i32 %1414 = insertelement <4 x i32> undef, i32 %1411, i32 0 %1415 = insertelement <4 x i32> %1414, i32 %1412, i32 1 %1416 = insertelement <4 x i32> %1415, i32 %1413, i32 2 %1417 = bitcast <8 x i32> %68 to <32 x i8> %1418 = bitcast <4 x i32> %70 to <16 x i8> %1419 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1416, <32 x i8> %1417, <16 x i8> %1418, i32 2) %1420 = extractelement <4 x float> %1419, i32 1 %1421 = extractelement <4 x float> %1419, i32 3 %1422 = fcmp oeq float %temp34.0, 1.000000e+00 %1423 = select i1 %1422, float 1.000000e+00, float 0.000000e+00 %1424 = bitcast float %1369 to i32 %1425 = bitcast float %1371 to i32 %1426 = bitcast float %238 to i32 %1427 = insertelement <4 x i32> undef, i32 %1424, i32 0 %1428 = insertelement <4 x i32> %1427, i32 %1425, i32 1 %1429 = insertelement <4 x i32> %1428, i32 %1426, i32 2 %1430 = bitcast <8 x i32> %60 to <32 x i8> %1431 = bitcast <4 x i32> %62 to <16 x i8> %1432 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1429, <32 x i8> %1430, <16 x i8> %1431, i32 2) %1433 = extractelement <4 x float> %1432, i32 1 %1434 = extractelement <4 x float> %1432, i32 3 %1435 = fcmp oeq float %temp34.0, 0.000000e+00 %1436 = select i1 %1435, float 1.000000e+00, float 0.000000e+00 %1437 = fmul float %1433, %1436 %1438 = fmul float %1434, %1436 %1439 = fmul float %1420, %1423 %1440 = fadd float %1439, %1437 %1441 = fmul float %1421, %1423 %1442 = fadd float %1441, %1438 %1443 = fmul float %1407, %1410 %1444 = fadd float %1443, %1440 %1445 = fmul float %1408, %1410 %1446 = fadd float %1445, %1442 %1447 = fmul float %1394, %1397 %1448 = fadd float %1447, %1444 %1449 = fmul float %1395, %1397 %1450 = fadd float %1449, %1446 %1451 = fmul float %1381, %1384 %1452 = fadd float %1451, %1448 %1453 = fmul float %1382, %1384 %1454 = fadd float %1453, %1450 %1455 = fmul float %1454, 2.000000e+00 %1456 = fadd float %1455, -1.000000e+00 %1457 = fmul float %1452, 2.000000e+00 %1458 = fadd float %1457, -1.000000e+00 %1459 = fmul float %1456, %1456 %1460 = fmul float %1458, %1458 %1461 = fadd float %1459, %1460 %1462 = call float @llvm.AMDIL.clamp.(float %1461, float 0.000000e+00, float 1.000000e+00) %1463 = fcmp une float %38, %temp20.0 %.sink222 = select i1 %1463, float %41, float %40 %temp56.2 = select i1 %1463, float 1.953125e-03, float 3.906250e-03 %1464 = fdiv float 1.000000e+00, %.sink222 %1465 = fmul float %112, %1464 %1466 = fmul float %110, %1464 %1467 = call float @llvm.floor.f32(float %1465) %1468 = fsub float %1465, %1467 %1469 = call float @llvm.floor.f32(float %1466) %1470 = fsub float %1466, %1469 %1471 = fmul float %42, 2.000000e+00 %1472 = fmul float %1471, %temp56.2 %1473 = fsub float 1.000000e+00, %1472 %1474 = fmul float %temp56.2, %42 %1475 = fmul float %1468, %1473 %1476 = fadd float %1475, %1474 %1477 = fmul float %1470, %1473 %1478 = fadd float %1477, %1474 %1479 = fmul float %1476, %temp20.0 %1480 = fadd float %1479, %temp32.0 %1481 = fmul float %1478, %temp20.0 %1482 = fadd float %1481, %temp33.0 %1483 = bitcast float %1480 to i32 %1484 = bitcast float %1482 to i32 %1485 = bitcast float %238 to i32 %1486 = insertelement <4 x i32> undef, i32 %1483, i32 0 %1487 = insertelement <4 x i32> %1486, i32 %1484, i32 1 %1488 = insertelement <4 x i32> %1487, i32 %1485, i32 2 %1489 = bitcast <8 x i32> %92 to <32 x i8> %1490 = bitcast <4 x i32> %94 to <16 x i8> %1491 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1488, <32 x i8> %1489, <16 x i8> %1490, i32 2) %1492 = extractelement <4 x float> %1491, i32 1 %1493 = extractelement <4 x float> %1491, i32 3 %1494 = fcmp oeq float %temp34.0, 4.000000e+00 %1495 = select i1 %1494, float 1.000000e+00, float 0.000000e+00 %1496 = bitcast float %1480 to i32 %1497 = bitcast float %1482 to i32 %1498 = bitcast float %238 to i32 %1499 = insertelement <4 x i32> undef, i32 %1496, i32 0 %1500 = insertelement <4 x i32> %1499, i32 %1497, i32 1 %1501 = insertelement <4 x i32> %1500, i32 %1498, i32 2 %1502 = bitcast <8 x i32> %84 to <32 x i8> %1503 = bitcast <4 x i32> %86 to <16 x i8> %1504 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1501, <32 x i8> %1502, <16 x i8> %1503, i32 2) %1505 = extractelement <4 x float> %1504, i32 1 %1506 = extractelement <4 x float> %1504, i32 3 %1507 = fcmp oeq float %temp34.0, 3.000000e+00 %1508 = select i1 %1507, float 1.000000e+00, float 0.000000e+00 %1509 = bitcast float %1480 to i32 %1510 = bitcast float %1482 to i32 %1511 = bitcast float %238 to i32 %1512 = insertelement <4 x i32> undef, i32 %1509, i32 0 %1513 = insertelement <4 x i32> %1512, i32 %1510, i32 1 %1514 = insertelement <4 x i32> %1513, i32 %1511, i32 2 %1515 = bitcast <8 x i32> %76 to <32 x i8> %1516 = bitcast <4 x i32> %78 to <16 x i8> %1517 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1514, <32 x i8> %1515, <16 x i8> %1516, i32 2) %1518 = extractelement <4 x float> %1517, i32 1 %1519 = extractelement <4 x float> %1517, i32 3 %1520 = fcmp oeq float %temp34.0, 2.000000e+00 %1521 = select i1 %1520, float 1.000000e+00, float 0.000000e+00 %1522 = bitcast float %1480 to i32 %1523 = bitcast float %1482 to i32 %1524 = bitcast float %238 to i32 %1525 = insertelement <4 x i32> undef, i32 %1522, i32 0 %1526 = insertelement <4 x i32> %1525, i32 %1523, i32 1 %1527 = insertelement <4 x i32> %1526, i32 %1524, i32 2 %1528 = bitcast <8 x i32> %68 to <32 x i8> %1529 = bitcast <4 x i32> %70 to <16 x i8> %1530 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1527, <32 x i8> %1528, <16 x i8> %1529, i32 2) %1531 = extractelement <4 x float> %1530, i32 1 %1532 = extractelement <4 x float> %1530, i32 3 %1533 = fcmp oeq float %temp34.0, 1.000000e+00 %1534 = select i1 %1533, float 1.000000e+00, float 0.000000e+00 %1535 = bitcast float %1480 to i32 %1536 = bitcast float %1482 to i32 %1537 = bitcast float %238 to i32 %1538 = insertelement <4 x i32> undef, i32 %1535, i32 0 %1539 = insertelement <4 x i32> %1538, i32 %1536, i32 1 %1540 = insertelement <4 x i32> %1539, i32 %1537, i32 2 %1541 = bitcast <8 x i32> %60 to <32 x i8> %1542 = bitcast <4 x i32> %62 to <16 x i8> %1543 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1540, <32 x i8> %1541, <16 x i8> %1542, i32 2) %1544 = extractelement <4 x float> %1543, i32 1 %1545 = extractelement <4 x float> %1543, i32 3 %1546 = fcmp oeq float %temp34.0, 0.000000e+00 %1547 = select i1 %1546, float 1.000000e+00, float 0.000000e+00 %1548 = fmul float %1544, %1547 %1549 = fmul float %1545, %1547 %1550 = fmul float %1531, %1534 %1551 = fadd float %1550, %1548 %1552 = fmul float %1532, %1534 %1553 = fadd float %1552, %1549 %1554 = fmul float %1518, %1521 %1555 = fadd float %1554, %1551 %1556 = fmul float %1519, %1521 %1557 = fadd float %1556, %1553 %1558 = fmul float %1505, %1508 %1559 = fadd float %1558, %1555 %1560 = fmul float %1506, %1508 %1561 = fadd float %1560, %1557 %1562 = fmul float %1492, %1495 %1563 = fadd float %1562, %1559 %1564 = fmul float %1493, %1495 %1565 = fadd float %1564, %1561 %1566 = fmul float %1565, 2.000000e+00 %1567 = fadd float %1566, -1.000000e+00 %1568 = fmul float %1563, 2.000000e+00 %1569 = fadd float %1568, -1.000000e+00 %1570 = fmul float %1567, %1567 %1571 = fmul float %1569, %1569 %1572 = fadd float %1570, %1571 %1573 = call float @llvm.AMDIL.clamp.(float %1572, float 0.000000e+00, float 1.000000e+00) %1574 = fcmp une float %38, %temp20.0 %.sink223 = select i1 %1574, float %41, float %40 %temp60.2 = select i1 %1574, float 1.953125e-03, float 3.906250e-03 %1575 = fdiv float 1.000000e+00, %.sink223 %1576 = fmul float %110, %1575 %1577 = fmul float %111, %1575 %1578 = call float @llvm.floor.f32(float %1576) %1579 = fsub float %1576, %1578 %1580 = call float @llvm.floor.f32(float %1577) %1581 = fsub float %1577, %1580 %1582 = fmul float %42, 2.000000e+00 %1583 = fmul float %1582, %temp60.2 %1584 = fsub float 1.000000e+00, %1583 %1585 = fmul float %temp60.2, %42 %1586 = fmul float %1579, %1584 %1587 = fadd float %1586, %1585 %1588 = fmul float %1581, %1584 %1589 = fadd float %1588, %1585 %1590 = fmul float %1587, %temp20.0 %1591 = fadd float %1590, %temp32.0 %1592 = fmul float %1589, %temp20.0 %1593 = fadd float %1592, %temp33.0 %1594 = bitcast float %1591 to i32 %1595 = bitcast float %1593 to i32 %1596 = bitcast float %238 to i32 %1597 = insertelement <4 x i32> undef, i32 %1594, i32 0 %1598 = insertelement <4 x i32> %1597, i32 %1595, i32 1 %1599 = insertelement <4 x i32> %1598, i32 %1596, i32 2 %1600 = bitcast <8 x i32> %92 to <32 x i8> %1601 = bitcast <4 x i32> %94 to <16 x i8> %1602 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1599, <32 x i8> %1600, <16 x i8> %1601, i32 2) %1603 = extractelement <4 x float> %1602, i32 1 %1604 = extractelement <4 x float> %1602, i32 3 %1605 = fcmp oeq float %temp34.0, 4.000000e+00 %1606 = select i1 %1605, float 1.000000e+00, float 0.000000e+00 %1607 = bitcast float %1591 to i32 %1608 = bitcast float %1593 to i32 %1609 = bitcast float %238 to i32 %1610 = insertelement <4 x i32> undef, i32 %1607, i32 0 %1611 = insertelement <4 x i32> %1610, i32 %1608, i32 1 %1612 = insertelement <4 x i32> %1611, i32 %1609, i32 2 %1613 = bitcast <8 x i32> %84 to <32 x i8> %1614 = bitcast <4 x i32> %86 to <16 x i8> %1615 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1612, <32 x i8> %1613, <16 x i8> %1614, i32 2) %1616 = extractelement <4 x float> %1615, i32 1 %1617 = extractelement <4 x float> %1615, i32 3 %1618 = fcmp oeq float %temp34.0, 3.000000e+00 %1619 = select i1 %1618, float 1.000000e+00, float 0.000000e+00 %1620 = bitcast float %1591 to i32 %1621 = bitcast float %1593 to i32 %1622 = bitcast float %238 to i32 %1623 = insertelement <4 x i32> undef, i32 %1620, i32 0 %1624 = insertelement <4 x i32> %1623, i32 %1621, i32 1 %1625 = insertelement <4 x i32> %1624, i32 %1622, i32 2 %1626 = bitcast <8 x i32> %76 to <32 x i8> %1627 = bitcast <4 x i32> %78 to <16 x i8> %1628 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1625, <32 x i8> %1626, <16 x i8> %1627, i32 2) %1629 = extractelement <4 x float> %1628, i32 1 %1630 = extractelement <4 x float> %1628, i32 3 %1631 = fcmp oeq float %temp34.0, 2.000000e+00 %1632 = select i1 %1631, float 1.000000e+00, float 0.000000e+00 %1633 = bitcast float %1591 to i32 %1634 = bitcast float %1593 to i32 %1635 = bitcast float %238 to i32 %1636 = insertelement <4 x i32> undef, i32 %1633, i32 0 %1637 = insertelement <4 x i32> %1636, i32 %1634, i32 1 %1638 = insertelement <4 x i32> %1637, i32 %1635, i32 2 %1639 = bitcast <8 x i32> %68 to <32 x i8> %1640 = bitcast <4 x i32> %70 to <16 x i8> %1641 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1638, <32 x i8> %1639, <16 x i8> %1640, i32 2) %1642 = extractelement <4 x float> %1641, i32 1 %1643 = extractelement <4 x float> %1641, i32 3 %1644 = fcmp oeq float %temp34.0, 1.000000e+00 %1645 = select i1 %1644, float 1.000000e+00, float 0.000000e+00 %1646 = bitcast float %1591 to i32 %1647 = bitcast float %1593 to i32 %1648 = bitcast float %238 to i32 %1649 = insertelement <4 x i32> undef, i32 %1646, i32 0 %1650 = insertelement <4 x i32> %1649, i32 %1647, i32 1 %1651 = insertelement <4 x i32> %1650, i32 %1648, i32 2 %1652 = bitcast <8 x i32> %60 to <32 x i8> %1653 = bitcast <4 x i32> %62 to <16 x i8> %1654 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1651, <32 x i8> %1652, <16 x i8> %1653, i32 2) %1655 = extractelement <4 x float> %1654, i32 1 %1656 = extractelement <4 x float> %1654, i32 3 %1657 = fcmp oeq float %temp34.0, 0.000000e+00 %1658 = select i1 %1657, float 1.000000e+00, float 0.000000e+00 %1659 = fmul float %1655, %1658 %1660 = fmul float %1656, %1658 %1661 = fmul float %1642, %1645 %1662 = fadd float %1661, %1659 %1663 = fmul float %1643, %1645 %1664 = fadd float %1663, %1660 %1665 = fmul float %1629, %1632 %1666 = fadd float %1665, %1662 %1667 = fmul float %1630, %1632 %1668 = fadd float %1667, %1664 %1669 = fmul float %1616, %1619 %1670 = fadd float %1669, %1666 %1671 = fmul float %1617, %1619 %1672 = fadd float %1671, %1668 %1673 = fmul float %1603, %1606 %1674 = fadd float %1673, %1670 %1675 = fmul float %1604, %1606 %1676 = fadd float %1675, %1672 %1677 = fmul float %1676, 2.000000e+00 %1678 = fadd float %1677, -1.000000e+00 %1679 = fmul float %1674, 2.000000e+00 %1680 = fadd float %1679, -1.000000e+00 %1681 = fmul float %1678, %1678 %1682 = fmul float %1680, %1680 %1683 = fadd float %1681, %1682 %1684 = call float @llvm.AMDIL.clamp.(float %1683, float 0.000000e+00, float 1.000000e+00) %1685 = fmul float %161, 0.000000e+00 %1686 = fmul float %1456, %161 %1687 = fmul float %1458, %161 %1688 = fmul float %1569, %162 %1689 = fadd float %1688, %1685 %1690 = fmul float %162, 0.000000e+00 %1691 = fadd float %1690, %1686 %1692 = fmul float %1567, %162 %1693 = fadd float %1692, %1687 %1694 = fmul float %1678, %163 %1695 = fadd float %1694, %1689 %1696 = fmul float %1680, %163 %1697 = fadd float %1696, %1691 %1698 = fmul float %163, 0.000000e+00 %1699 = fadd float %1698, %1693 %1700 = fcmp une float %38, %temp24.0 %.sink224 = select i1 %1700, float %41, float %40 %temp48.3 = select i1 %1700, float 1.953125e-03, float 3.906250e-03 %1701 = fdiv float 1.000000e+00, %.sink224 %1702 = fmul float %112, %1701 %1703 = fmul float %111, %1701 %1704 = call float @llvm.floor.f32(float %1702) %1705 = fsub float %1702, %1704 %1706 = call float @llvm.floor.f32(float %1703) %1707 = fsub float %1703, %1706 %1708 = fmul float %42, 2.000000e+00 %1709 = fmul float %1708, %temp48.3 %1710 = fsub float 1.000000e+00, %1709 %1711 = fmul float %temp48.3, %42 %1712 = fmul float %1705, %1710 %1713 = fadd float %1712, %1711 %1714 = fmul float %1707, %1710 %1715 = fadd float %1714, %1711 %1716 = fmul float %1713, %temp24.0 %1717 = fadd float %1716, %temp40.0 %1718 = fmul float %1715, %temp24.0 %1719 = fadd float %1718, %temp41.0 %1720 = bitcast float %1717 to i32 %1721 = bitcast float %1719 to i32 %1722 = bitcast float %238 to i32 %1723 = insertelement <4 x i32> undef, i32 %1720, i32 0 %1724 = insertelement <4 x i32> %1723, i32 %1721, i32 1 %1725 = insertelement <4 x i32> %1724, i32 %1722, i32 2 %1726 = bitcast <8 x i32> %92 to <32 x i8> %1727 = bitcast <4 x i32> %94 to <16 x i8> %1728 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1725, <32 x i8> %1726, <16 x i8> %1727, i32 2) %1729 = extractelement <4 x float> %1728, i32 1 %1730 = extractelement <4 x float> %1728, i32 3 %1731 = fcmp oeq float %temp42.0, 4.000000e+00 %1732 = select i1 %1731, float 1.000000e+00, float 0.000000e+00 %1733 = bitcast float %1717 to i32 %1734 = bitcast float %1719 to i32 %1735 = bitcast float %238 to i32 %1736 = insertelement <4 x i32> undef, i32 %1733, i32 0 %1737 = insertelement <4 x i32> %1736, i32 %1734, i32 1 %1738 = insertelement <4 x i32> %1737, i32 %1735, i32 2 %1739 = bitcast <8 x i32> %84 to <32 x i8> %1740 = bitcast <4 x i32> %86 to <16 x i8> %1741 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1738, <32 x i8> %1739, <16 x i8> %1740, i32 2) %1742 = extractelement <4 x float> %1741, i32 1 %1743 = extractelement <4 x float> %1741, i32 3 %1744 = fcmp oeq float %temp42.0, 3.000000e+00 %1745 = select i1 %1744, float 1.000000e+00, float 0.000000e+00 %1746 = bitcast float %1717 to i32 %1747 = bitcast float %1719 to i32 %1748 = bitcast float %238 to i32 %1749 = insertelement <4 x i32> undef, i32 %1746, i32 0 %1750 = insertelement <4 x i32> %1749, i32 %1747, i32 1 %1751 = insertelement <4 x i32> %1750, i32 %1748, i32 2 %1752 = bitcast <8 x i32> %76 to <32 x i8> %1753 = bitcast <4 x i32> %78 to <16 x i8> %1754 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1751, <32 x i8> %1752, <16 x i8> %1753, i32 2) %1755 = extractelement <4 x float> %1754, i32 1 %1756 = extractelement <4 x float> %1754, i32 3 %1757 = fcmp oeq float %temp42.0, 2.000000e+00 %1758 = select i1 %1757, float 1.000000e+00, float 0.000000e+00 %1759 = bitcast float %1717 to i32 %1760 = bitcast float %1719 to i32 %1761 = bitcast float %238 to i32 %1762 = insertelement <4 x i32> undef, i32 %1759, i32 0 %1763 = insertelement <4 x i32> %1762, i32 %1760, i32 1 %1764 = insertelement <4 x i32> %1763, i32 %1761, i32 2 %1765 = bitcast <8 x i32> %68 to <32 x i8> %1766 = bitcast <4 x i32> %70 to <16 x i8> %1767 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1764, <32 x i8> %1765, <16 x i8> %1766, i32 2) %1768 = extractelement <4 x float> %1767, i32 1 %1769 = extractelement <4 x float> %1767, i32 3 %1770 = fcmp oeq float %temp42.0, 1.000000e+00 %1771 = select i1 %1770, float 1.000000e+00, float 0.000000e+00 %1772 = bitcast float %1717 to i32 %1773 = bitcast float %1719 to i32 %1774 = bitcast float %238 to i32 %1775 = insertelement <4 x i32> undef, i32 %1772, i32 0 %1776 = insertelement <4 x i32> %1775, i32 %1773, i32 1 %1777 = insertelement <4 x i32> %1776, i32 %1774, i32 2 %1778 = bitcast <8 x i32> %60 to <32 x i8> %1779 = bitcast <4 x i32> %62 to <16 x i8> %1780 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1777, <32 x i8> %1778, <16 x i8> %1779, i32 2) %1781 = extractelement <4 x float> %1780, i32 1 %1782 = extractelement <4 x float> %1780, i32 3 %1783 = fcmp oeq float %temp42.0, 0.000000e+00 %1784 = select i1 %1783, float 1.000000e+00, float 0.000000e+00 %1785 = fmul float %1781, %1784 %1786 = fmul float %1782, %1784 %1787 = fmul float %1768, %1771 %1788 = fadd float %1787, %1785 %1789 = fmul float %1769, %1771 %1790 = fadd float %1789, %1786 %1791 = fmul float %1755, %1758 %1792 = fadd float %1791, %1788 %1793 = fmul float %1756, %1758 %1794 = fadd float %1793, %1790 %1795 = fmul float %1742, %1745 %1796 = fadd float %1795, %1792 %1797 = fmul float %1743, %1745 %1798 = fadd float %1797, %1794 %1799 = fmul float %1729, %1732 %1800 = fadd float %1799, %1796 %1801 = fmul float %1730, %1732 %1802 = fadd float %1801, %1798 %1803 = fmul float %1802, 2.000000e+00 %1804 = fadd float %1803, -1.000000e+00 %1805 = fmul float %1800, 2.000000e+00 %1806 = fadd float %1805, -1.000000e+00 %1807 = fmul float %1804, %1804 %1808 = fmul float %1806, %1806 %1809 = fadd float %1807, %1808 %1810 = call float @llvm.AMDIL.clamp.(float %1809, float 0.000000e+00, float 1.000000e+00) %1811 = fcmp une float %38, %temp24.0 %.sink225 = select i1 %1811, float %41, float %40 %temp52.4 = select i1 %1811, float 1.953125e-03, float 3.906250e-03 %1812 = fdiv float 1.000000e+00, %.sink225 %1813 = fmul float %112, %1812 %1814 = fmul float %110, %1812 %1815 = call float @llvm.floor.f32(float %1813) %1816 = fsub float %1813, %1815 %1817 = call float @llvm.floor.f32(float %1814) %1818 = fsub float %1814, %1817 %1819 = fmul float %42, 2.000000e+00 %1820 = fmul float %1819, %temp52.4 %1821 = fsub float 1.000000e+00, %1820 %1822 = fmul float %temp52.4, %42 %1823 = fmul float %1816, %1821 %1824 = fadd float %1823, %1822 %1825 = fmul float %1818, %1821 %1826 = fadd float %1825, %1822 %1827 = fmul float %1824, %temp24.0 %1828 = fadd float %1827, %temp40.0 %1829 = fmul float %1826, %temp24.0 %1830 = fadd float %1829, %temp41.0 %1831 = bitcast float %1828 to i32 %1832 = bitcast float %1830 to i32 %1833 = bitcast float %238 to i32 %1834 = insertelement <4 x i32> undef, i32 %1831, i32 0 %1835 = insertelement <4 x i32> %1834, i32 %1832, i32 1 %1836 = insertelement <4 x i32> %1835, i32 %1833, i32 2 %1837 = bitcast <8 x i32> %92 to <32 x i8> %1838 = bitcast <4 x i32> %94 to <16 x i8> %1839 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1836, <32 x i8> %1837, <16 x i8> %1838, i32 2) %1840 = extractelement <4 x float> %1839, i32 1 %1841 = extractelement <4 x float> %1839, i32 3 %1842 = fcmp oeq float %temp42.0, 4.000000e+00 %1843 = select i1 %1842, float 1.000000e+00, float 0.000000e+00 %1844 = bitcast float %1828 to i32 %1845 = bitcast float %1830 to i32 %1846 = bitcast float %238 to i32 %1847 = insertelement <4 x i32> undef, i32 %1844, i32 0 %1848 = insertelement <4 x i32> %1847, i32 %1845, i32 1 %1849 = insertelement <4 x i32> %1848, i32 %1846, i32 2 %1850 = bitcast <8 x i32> %84 to <32 x i8> %1851 = bitcast <4 x i32> %86 to <16 x i8> %1852 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1849, <32 x i8> %1850, <16 x i8> %1851, i32 2) %1853 = extractelement <4 x float> %1852, i32 1 %1854 = extractelement <4 x float> %1852, i32 3 %1855 = fcmp oeq float %temp42.0, 3.000000e+00 %1856 = select i1 %1855, float 1.000000e+00, float 0.000000e+00 %1857 = bitcast float %1828 to i32 %1858 = bitcast float %1830 to i32 %1859 = bitcast float %238 to i32 %1860 = insertelement <4 x i32> undef, i32 %1857, i32 0 %1861 = insertelement <4 x i32> %1860, i32 %1858, i32 1 %1862 = insertelement <4 x i32> %1861, i32 %1859, i32 2 %1863 = bitcast <8 x i32> %76 to <32 x i8> %1864 = bitcast <4 x i32> %78 to <16 x i8> %1865 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1862, <32 x i8> %1863, <16 x i8> %1864, i32 2) %1866 = extractelement <4 x float> %1865, i32 1 %1867 = extractelement <4 x float> %1865, i32 3 %1868 = fcmp oeq float %temp42.0, 2.000000e+00 %1869 = select i1 %1868, float 1.000000e+00, float 0.000000e+00 %1870 = bitcast float %1828 to i32 %1871 = bitcast float %1830 to i32 %1872 = bitcast float %238 to i32 %1873 = insertelement <4 x i32> undef, i32 %1870, i32 0 %1874 = insertelement <4 x i32> %1873, i32 %1871, i32 1 %1875 = insertelement <4 x i32> %1874, i32 %1872, i32 2 %1876 = bitcast <8 x i32> %68 to <32 x i8> %1877 = bitcast <4 x i32> %70 to <16 x i8> %1878 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1875, <32 x i8> %1876, <16 x i8> %1877, i32 2) %1879 = extractelement <4 x float> %1878, i32 1 %1880 = extractelement <4 x float> %1878, i32 3 %1881 = fcmp oeq float %temp42.0, 1.000000e+00 %1882 = select i1 %1881, float 1.000000e+00, float 0.000000e+00 %1883 = bitcast float %1828 to i32 %1884 = bitcast float %1830 to i32 %1885 = bitcast float %238 to i32 %1886 = insertelement <4 x i32> undef, i32 %1883, i32 0 %1887 = insertelement <4 x i32> %1886, i32 %1884, i32 1 %1888 = insertelement <4 x i32> %1887, i32 %1885, i32 2 %1889 = bitcast <8 x i32> %60 to <32 x i8> %1890 = bitcast <4 x i32> %62 to <16 x i8> %1891 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1888, <32 x i8> %1889, <16 x i8> %1890, i32 2) %1892 = extractelement <4 x float> %1891, i32 1 %1893 = extractelement <4 x float> %1891, i32 3 %1894 = fcmp oeq float %temp42.0, 0.000000e+00 %1895 = select i1 %1894, float 1.000000e+00, float 0.000000e+00 %1896 = fmul float %1892, %1895 %1897 = fmul float %1893, %1895 %1898 = fmul float %1879, %1882 %1899 = fadd float %1898, %1896 %1900 = fmul float %1880, %1882 %1901 = fadd float %1900, %1897 %1902 = fmul float %1866, %1869 %1903 = fadd float %1902, %1899 %1904 = fmul float %1867, %1869 %1905 = fadd float %1904, %1901 %1906 = fmul float %1853, %1856 %1907 = fadd float %1906, %1903 %1908 = fmul float %1854, %1856 %1909 = fadd float %1908, %1905 %1910 = fmul float %1840, %1843 %1911 = fadd float %1910, %1907 %1912 = fmul float %1841, %1843 %1913 = fadd float %1912, %1909 %1914 = fmul float %1913, 2.000000e+00 %1915 = fadd float %1914, -1.000000e+00 %1916 = fmul float %1911, 2.000000e+00 %1917 = fadd float %1916, -1.000000e+00 %1918 = fmul float %1915, %1915 %1919 = fmul float %1917, %1917 %1920 = fadd float %1918, %1919 %1921 = call float @llvm.AMDIL.clamp.(float %1920, float 0.000000e+00, float 1.000000e+00) %1922 = fcmp une float %38, %temp24.0 %.sink226 = select i1 %1922, float %41, float %40 %temp56.4 = select i1 %1922, float 1.953125e-03, float 3.906250e-03 %1923 = fdiv float 1.000000e+00, %.sink226 %1924 = fmul float %110, %1923 %1925 = fmul float %111, %1923 %1926 = call float @llvm.floor.f32(float %1924) %1927 = fsub float %1924, %1926 %1928 = call float @llvm.floor.f32(float %1925) %1929 = fsub float %1925, %1928 %1930 = fmul float %42, 2.000000e+00 %1931 = fmul float %1930, %temp56.4 %1932 = fsub float 1.000000e+00, %1931 %1933 = fmul float %temp56.4, %42 %1934 = fmul float %1927, %1932 %1935 = fadd float %1934, %1933 %1936 = fmul float %1929, %1932 %1937 = fadd float %1936, %1933 %1938 = fmul float %1935, %temp24.0 %1939 = fadd float %1938, %temp40.0 %1940 = fmul float %1937, %temp24.0 %1941 = fadd float %1940, %temp41.0 %1942 = bitcast float %1939 to i32 %1943 = bitcast float %1941 to i32 %1944 = bitcast float %238 to i32 %1945 = insertelement <4 x i32> undef, i32 %1942, i32 0 %1946 = insertelement <4 x i32> %1945, i32 %1943, i32 1 %1947 = insertelement <4 x i32> %1946, i32 %1944, i32 2 %1948 = bitcast <8 x i32> %92 to <32 x i8> %1949 = bitcast <4 x i32> %94 to <16 x i8> %1950 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1947, <32 x i8> %1948, <16 x i8> %1949, i32 2) %1951 = extractelement <4 x float> %1950, i32 1 %1952 = extractelement <4 x float> %1950, i32 3 %1953 = fcmp oeq float %temp42.0, 4.000000e+00 %1954 = select i1 %1953, float 1.000000e+00, float 0.000000e+00 %1955 = bitcast float %1939 to i32 %1956 = bitcast float %1941 to i32 %1957 = bitcast float %238 to i32 %1958 = insertelement <4 x i32> undef, i32 %1955, i32 0 %1959 = insertelement <4 x i32> %1958, i32 %1956, i32 1 %1960 = insertelement <4 x i32> %1959, i32 %1957, i32 2 %1961 = bitcast <8 x i32> %84 to <32 x i8> %1962 = bitcast <4 x i32> %86 to <16 x i8> %1963 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1960, <32 x i8> %1961, <16 x i8> %1962, i32 2) %1964 = extractelement <4 x float> %1963, i32 1 %1965 = extractelement <4 x float> %1963, i32 3 %1966 = fcmp oeq float %temp42.0, 3.000000e+00 %1967 = select i1 %1966, float 1.000000e+00, float 0.000000e+00 %1968 = bitcast float %1939 to i32 %1969 = bitcast float %1941 to i32 %1970 = bitcast float %238 to i32 %1971 = insertelement <4 x i32> undef, i32 %1968, i32 0 %1972 = insertelement <4 x i32> %1971, i32 %1969, i32 1 %1973 = insertelement <4 x i32> %1972, i32 %1970, i32 2 %1974 = bitcast <8 x i32> %76 to <32 x i8> %1975 = bitcast <4 x i32> %78 to <16 x i8> %1976 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1973, <32 x i8> %1974, <16 x i8> %1975, i32 2) %1977 = extractelement <4 x float> %1976, i32 1 %1978 = extractelement <4 x float> %1976, i32 3 %1979 = fcmp oeq float %temp42.0, 2.000000e+00 %1980 = select i1 %1979, float 1.000000e+00, float 0.000000e+00 %1981 = bitcast float %1939 to i32 %1982 = bitcast float %1941 to i32 %1983 = bitcast float %238 to i32 %1984 = insertelement <4 x i32> undef, i32 %1981, i32 0 %1985 = insertelement <4 x i32> %1984, i32 %1982, i32 1 %1986 = insertelement <4 x i32> %1985, i32 %1983, i32 2 %1987 = bitcast <8 x i32> %68 to <32 x i8> %1988 = bitcast <4 x i32> %70 to <16 x i8> %1989 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1986, <32 x i8> %1987, <16 x i8> %1988, i32 2) %1990 = extractelement <4 x float> %1989, i32 1 %1991 = extractelement <4 x float> %1989, i32 3 %1992 = fcmp oeq float %temp42.0, 1.000000e+00 %1993 = select i1 %1992, float 1.000000e+00, float 0.000000e+00 %1994 = bitcast float %1939 to i32 %1995 = bitcast float %1941 to i32 %1996 = bitcast float %238 to i32 %1997 = insertelement <4 x i32> undef, i32 %1994, i32 0 %1998 = insertelement <4 x i32> %1997, i32 %1995, i32 1 %1999 = insertelement <4 x i32> %1998, i32 %1996, i32 2 %2000 = bitcast <8 x i32> %60 to <32 x i8> %2001 = bitcast <4 x i32> %62 to <16 x i8> %2002 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1999, <32 x i8> %2000, <16 x i8> %2001, i32 2) %2003 = extractelement <4 x float> %2002, i32 1 %2004 = extractelement <4 x float> %2002, i32 3 %2005 = fcmp oeq float %temp42.0, 0.000000e+00 %2006 = select i1 %2005, float 1.000000e+00, float 0.000000e+00 %2007 = fmul float %2003, %2006 %2008 = fmul float %2004, %2006 %2009 = fmul float %1990, %1993 %2010 = fadd float %2009, %2007 %2011 = fmul float %1991, %1993 %2012 = fadd float %2011, %2008 %2013 = fmul float %1977, %1980 %2014 = fadd float %2013, %2010 %2015 = fmul float %1978, %1980 %2016 = fadd float %2015, %2012 %2017 = fmul float %1964, %1967 %2018 = fadd float %2017, %2014 %2019 = fmul float %1965, %1967 %2020 = fadd float %2019, %2016 %2021 = fmul float %1951, %1954 %2022 = fadd float %2021, %2018 %2023 = fmul float %1952, %1954 %2024 = fadd float %2023, %2020 %2025 = fmul float %2024, 2.000000e+00 %2026 = fadd float %2025, -1.000000e+00 %2027 = fmul float %2022, 2.000000e+00 %2028 = fadd float %2027, -1.000000e+00 %2029 = fmul float %2026, %2026 %2030 = fmul float %2028, %2028 %2031 = fadd float %2029, %2030 %2032 = call float @llvm.AMDIL.clamp.(float %2031, float 0.000000e+00, float 1.000000e+00) %2033 = fmul float %161, 0.000000e+00 %2034 = fmul float %1804, %161 %2035 = fmul float %1806, %161 %2036 = fmul float %1917, %162 %2037 = fadd float %2036, %2033 %2038 = fmul float %162, 0.000000e+00 %2039 = fadd float %2038, %2034 %2040 = fmul float %1915, %162 %2041 = fadd float %2040, %2035 %2042 = fmul float %2026, %163 %2043 = fadd float %2042, %2037 %2044 = fmul float %2028, %163 %2045 = fadd float %2044, %2039 %2046 = fmul float %163, 0.000000e+00 %2047 = fadd float %2046, %2041 %2048 = fcmp une float %38, %temp28.0 %.sink227 = select i1 %2048, float %41, float %40 %temp40.1 = select i1 %2048, float 1.953125e-03, float 3.906250e-03 %2049 = fdiv float 1.000000e+00, %.sink227 %2050 = fmul float %112, %2049 %2051 = fmul float %111, %2049 %2052 = call float @llvm.floor.f32(float %2050) %2053 = fsub float %2050, %2052 %2054 = call float @llvm.floor.f32(float %2051) %2055 = fsub float %2051, %2054 %2056 = fmul float %42, 2.000000e+00 %2057 = fmul float %2056, %temp40.1 %2058 = fsub float 1.000000e+00, %2057 %2059 = fmul float %temp40.1, %42 %2060 = fmul float %2053, %2058 %2061 = fadd float %2060, %2059 %2062 = fmul float %2055, %2058 %2063 = fadd float %2062, %2059 %2064 = fmul float %2061, %temp28.0 %2065 = fadd float %2064, %temp16.0 %2066 = fmul float %2063, %temp28.0 %2067 = fadd float %2066, %temp17.0 %2068 = bitcast float %2065 to i32 %2069 = bitcast float %2067 to i32 %2070 = bitcast float %238 to i32 %2071 = insertelement <4 x i32> undef, i32 %2068, i32 0 %2072 = insertelement <4 x i32> %2071, i32 %2069, i32 1 %2073 = insertelement <4 x i32> %2072, i32 %2070, i32 2 %2074 = bitcast <8 x i32> %92 to <32 x i8> %2075 = bitcast <4 x i32> %94 to <16 x i8> %2076 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2073, <32 x i8> %2074, <16 x i8> %2075, i32 2) %2077 = extractelement <4 x float> %2076, i32 1 %2078 = extractelement <4 x float> %2076, i32 3 %2079 = fcmp oeq float %temp18.0, 4.000000e+00 %2080 = select i1 %2079, float 1.000000e+00, float 0.000000e+00 %2081 = bitcast float %2065 to i32 %2082 = bitcast float %2067 to i32 %2083 = bitcast float %238 to i32 %2084 = insertelement <4 x i32> undef, i32 %2081, i32 0 %2085 = insertelement <4 x i32> %2084, i32 %2082, i32 1 %2086 = insertelement <4 x i32> %2085, i32 %2083, i32 2 %2087 = bitcast <8 x i32> %84 to <32 x i8> %2088 = bitcast <4 x i32> %86 to <16 x i8> %2089 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2086, <32 x i8> %2087, <16 x i8> %2088, i32 2) %2090 = extractelement <4 x float> %2089, i32 1 %2091 = extractelement <4 x float> %2089, i32 3 %2092 = fcmp oeq float %temp18.0, 3.000000e+00 %2093 = select i1 %2092, float 1.000000e+00, float 0.000000e+00 %2094 = bitcast float %2065 to i32 %2095 = bitcast float %2067 to i32 %2096 = bitcast float %238 to i32 %2097 = insertelement <4 x i32> undef, i32 %2094, i32 0 %2098 = insertelement <4 x i32> %2097, i32 %2095, i32 1 %2099 = insertelement <4 x i32> %2098, i32 %2096, i32 2 %2100 = bitcast <8 x i32> %76 to <32 x i8> %2101 = bitcast <4 x i32> %78 to <16 x i8> %2102 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2099, <32 x i8> %2100, <16 x i8> %2101, i32 2) %2103 = extractelement <4 x float> %2102, i32 1 %2104 = extractelement <4 x float> %2102, i32 3 %2105 = fcmp oeq float %temp18.0, 2.000000e+00 %2106 = select i1 %2105, float 1.000000e+00, float 0.000000e+00 %2107 = bitcast float %2065 to i32 %2108 = bitcast float %2067 to i32 %2109 = bitcast float %238 to i32 %2110 = insertelement <4 x i32> undef, i32 %2107, i32 0 %2111 = insertelement <4 x i32> %2110, i32 %2108, i32 1 %2112 = insertelement <4 x i32> %2111, i32 %2109, i32 2 %2113 = bitcast <8 x i32> %68 to <32 x i8> %2114 = bitcast <4 x i32> %70 to <16 x i8> %2115 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2112, <32 x i8> %2113, <16 x i8> %2114, i32 2) %2116 = extractelement <4 x float> %2115, i32 1 %2117 = extractelement <4 x float> %2115, i32 3 %2118 = fcmp oeq float %temp18.0, 1.000000e+00 %2119 = select i1 %2118, float 1.000000e+00, float 0.000000e+00 %2120 = bitcast float %2065 to i32 %2121 = bitcast float %2067 to i32 %2122 = bitcast float %238 to i32 %2123 = insertelement <4 x i32> undef, i32 %2120, i32 0 %2124 = insertelement <4 x i32> %2123, i32 %2121, i32 1 %2125 = insertelement <4 x i32> %2124, i32 %2122, i32 2 %2126 = bitcast <8 x i32> %60 to <32 x i8> %2127 = bitcast <4 x i32> %62 to <16 x i8> %2128 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2125, <32 x i8> %2126, <16 x i8> %2127, i32 2) %2129 = extractelement <4 x float> %2128, i32 1 %2130 = extractelement <4 x float> %2128, i32 3 %2131 = fcmp oeq float %temp18.0, 0.000000e+00 %2132 = select i1 %2131, float 1.000000e+00, float 0.000000e+00 %2133 = fmul float %2129, %2132 %2134 = fmul float %2130, %2132 %2135 = fmul float %2116, %2119 %2136 = fadd float %2135, %2133 %2137 = fmul float %2117, %2119 %2138 = fadd float %2137, %2134 %2139 = fmul float %2103, %2106 %2140 = fadd float %2139, %2136 %2141 = fmul float %2104, %2106 %2142 = fadd float %2141, %2138 %2143 = fmul float %2090, %2093 %2144 = fadd float %2143, %2140 %2145 = fmul float %2091, %2093 %2146 = fadd float %2145, %2142 %2147 = fmul float %2077, %2080 %2148 = fadd float %2147, %2144 %2149 = fmul float %2078, %2080 %2150 = fadd float %2149, %2146 %2151 = fmul float %2150, 2.000000e+00 %2152 = fadd float %2151, -1.000000e+00 %2153 = fmul float %2148, 2.000000e+00 %2154 = fadd float %2153, -1.000000e+00 %2155 = fmul float %2152, %2152 %2156 = fmul float %2154, %2154 %2157 = fadd float %2155, %2156 %2158 = call float @llvm.AMDIL.clamp.(float %2157, float 0.000000e+00, float 1.000000e+00) %2159 = fcmp une float %38, %temp28.0 %.sink228 = select i1 %2159, float %41, float %40 %temp48.5 = select i1 %2159, float 1.953125e-03, float 3.906250e-03 %2160 = fdiv float 1.000000e+00, %.sink228 %2161 = fmul float %112, %2160 %2162 = fmul float %110, %2160 %2163 = call float @llvm.floor.f32(float %2161) %2164 = fsub float %2161, %2163 %2165 = call float @llvm.floor.f32(float %2162) %2166 = fsub float %2162, %2165 %2167 = fmul float %42, 2.000000e+00 %2168 = fmul float %2167, %temp48.5 %2169 = fsub float 1.000000e+00, %2168 %2170 = fmul float %temp48.5, %42 %2171 = fmul float %2164, %2169 %2172 = fadd float %2171, %2170 %2173 = fmul float %2166, %2169 %2174 = fadd float %2173, %2170 %2175 = fmul float %2172, %temp28.0 %2176 = fadd float %2175, %temp16.0 %2177 = fmul float %2174, %temp28.0 %2178 = fadd float %2177, %temp17.0 %2179 = bitcast float %2176 to i32 %2180 = bitcast float %2178 to i32 %2181 = bitcast float %238 to i32 %2182 = insertelement <4 x i32> undef, i32 %2179, i32 0 %2183 = insertelement <4 x i32> %2182, i32 %2180, i32 1 %2184 = insertelement <4 x i32> %2183, i32 %2181, i32 2 %2185 = bitcast <8 x i32> %92 to <32 x i8> %2186 = bitcast <4 x i32> %94 to <16 x i8> %2187 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2184, <32 x i8> %2185, <16 x i8> %2186, i32 2) %2188 = extractelement <4 x float> %2187, i32 1 %2189 = extractelement <4 x float> %2187, i32 3 %2190 = fcmp oeq float %temp18.0, 4.000000e+00 %2191 = select i1 %2190, float 1.000000e+00, float 0.000000e+00 %2192 = bitcast float %2176 to i32 %2193 = bitcast float %2178 to i32 %2194 = bitcast float %238 to i32 %2195 = insertelement <4 x i32> undef, i32 %2192, i32 0 %2196 = insertelement <4 x i32> %2195, i32 %2193, i32 1 %2197 = insertelement <4 x i32> %2196, i32 %2194, i32 2 %2198 = bitcast <8 x i32> %84 to <32 x i8> %2199 = bitcast <4 x i32> %86 to <16 x i8> %2200 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2197, <32 x i8> %2198, <16 x i8> %2199, i32 2) %2201 = extractelement <4 x float> %2200, i32 1 %2202 = extractelement <4 x float> %2200, i32 3 %2203 = fcmp oeq float %temp18.0, 3.000000e+00 %2204 = select i1 %2203, float 1.000000e+00, float 0.000000e+00 %2205 = bitcast float %2176 to i32 %2206 = bitcast float %2178 to i32 %2207 = bitcast float %238 to i32 %2208 = insertelement <4 x i32> undef, i32 %2205, i32 0 %2209 = insertelement <4 x i32> %2208, i32 %2206, i32 1 %2210 = insertelement <4 x i32> %2209, i32 %2207, i32 2 %2211 = bitcast <8 x i32> %76 to <32 x i8> %2212 = bitcast <4 x i32> %78 to <16 x i8> %2213 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2210, <32 x i8> %2211, <16 x i8> %2212, i32 2) %2214 = extractelement <4 x float> %2213, i32 1 %2215 = extractelement <4 x float> %2213, i32 3 %2216 = fcmp oeq float %temp18.0, 2.000000e+00 %2217 = select i1 %2216, float 1.000000e+00, float 0.000000e+00 %2218 = bitcast float %2176 to i32 %2219 = bitcast float %2178 to i32 %2220 = bitcast float %238 to i32 %2221 = insertelement <4 x i32> undef, i32 %2218, i32 0 %2222 = insertelement <4 x i32> %2221, i32 %2219, i32 1 %2223 = insertelement <4 x i32> %2222, i32 %2220, i32 2 %2224 = bitcast <8 x i32> %68 to <32 x i8> %2225 = bitcast <4 x i32> %70 to <16 x i8> %2226 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2223, <32 x i8> %2224, <16 x i8> %2225, i32 2) %2227 = extractelement <4 x float> %2226, i32 1 %2228 = extractelement <4 x float> %2226, i32 3 %2229 = fcmp oeq float %temp18.0, 1.000000e+00 %2230 = select i1 %2229, float 1.000000e+00, float 0.000000e+00 %2231 = bitcast float %2176 to i32 %2232 = bitcast float %2178 to i32 %2233 = bitcast float %238 to i32 %2234 = insertelement <4 x i32> undef, i32 %2231, i32 0 %2235 = insertelement <4 x i32> %2234, i32 %2232, i32 1 %2236 = insertelement <4 x i32> %2235, i32 %2233, i32 2 %2237 = bitcast <8 x i32> %60 to <32 x i8> %2238 = bitcast <4 x i32> %62 to <16 x i8> %2239 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2236, <32 x i8> %2237, <16 x i8> %2238, i32 2) %2240 = extractelement <4 x float> %2239, i32 1 %2241 = extractelement <4 x float> %2239, i32 3 %2242 = fcmp oeq float %temp18.0, 0.000000e+00 %2243 = select i1 %2242, float 1.000000e+00, float 0.000000e+00 %2244 = fmul float %2240, %2243 %2245 = fmul float %2241, %2243 %2246 = fmul float %2227, %2230 %2247 = fadd float %2246, %2244 %2248 = fmul float %2228, %2230 %2249 = fadd float %2248, %2245 %2250 = fmul float %2214, %2217 %2251 = fadd float %2250, %2247 %2252 = fmul float %2215, %2217 %2253 = fadd float %2252, %2249 %2254 = fmul float %2201, %2204 %2255 = fadd float %2254, %2251 %2256 = fmul float %2202, %2204 %2257 = fadd float %2256, %2253 %2258 = fmul float %2188, %2191 %2259 = fadd float %2258, %2255 %2260 = fmul float %2189, %2191 %2261 = fadd float %2260, %2257 %2262 = fmul float %2261, 2.000000e+00 %2263 = fadd float %2262, -1.000000e+00 %2264 = fmul float %2259, 2.000000e+00 %2265 = fadd float %2264, -1.000000e+00 %2266 = fmul float %2263, %2263 %2267 = fmul float %2265, %2265 %2268 = fadd float %2266, %2267 %2269 = call float @llvm.AMDIL.clamp.(float %2268, float 0.000000e+00, float 1.000000e+00) %2270 = fcmp une float %38, %temp28.0 %.sink229 = select i1 %2270, float %41, float %40 %temp52.6 = select i1 %2270, float 1.953125e-03, float 3.906250e-03 %2271 = fdiv float 1.000000e+00, %.sink229 %2272 = fmul float %110, %2271 %2273 = fmul float %111, %2271 %2274 = call float @llvm.floor.f32(float %2272) %2275 = fsub float %2272, %2274 %2276 = call float @llvm.floor.f32(float %2273) %2277 = fsub float %2273, %2276 %2278 = fmul float %42, 2.000000e+00 %2279 = fmul float %2278, %temp52.6 %2280 = fsub float 1.000000e+00, %2279 %2281 = fmul float %temp52.6, %42 %2282 = fmul float %2275, %2280 %2283 = fadd float %2282, %2281 %2284 = fmul float %2277, %2280 %2285 = fadd float %2284, %2281 %2286 = fmul float %2283, %temp28.0 %2287 = fadd float %2286, %temp16.0 %2288 = fmul float %2285, %temp28.0 %2289 = fadd float %2288, %temp17.0 %2290 = bitcast float %2287 to i32 %2291 = bitcast float %2289 to i32 %2292 = bitcast float %238 to i32 %2293 = insertelement <4 x i32> undef, i32 %2290, i32 0 %2294 = insertelement <4 x i32> %2293, i32 %2291, i32 1 %2295 = insertelement <4 x i32> %2294, i32 %2292, i32 2 %2296 = bitcast <8 x i32> %92 to <32 x i8> %2297 = bitcast <4 x i32> %94 to <16 x i8> %2298 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2295, <32 x i8> %2296, <16 x i8> %2297, i32 2) %2299 = extractelement <4 x float> %2298, i32 1 %2300 = extractelement <4 x float> %2298, i32 3 %2301 = fcmp oeq float %temp18.0, 4.000000e+00 %2302 = select i1 %2301, float 1.000000e+00, float 0.000000e+00 %2303 = bitcast float %2287 to i32 %2304 = bitcast float %2289 to i32 %2305 = bitcast float %238 to i32 %2306 = insertelement <4 x i32> undef, i32 %2303, i32 0 %2307 = insertelement <4 x i32> %2306, i32 %2304, i32 1 %2308 = insertelement <4 x i32> %2307, i32 %2305, i32 2 %2309 = bitcast <8 x i32> %84 to <32 x i8> %2310 = bitcast <4 x i32> %86 to <16 x i8> %2311 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2308, <32 x i8> %2309, <16 x i8> %2310, i32 2) %2312 = extractelement <4 x float> %2311, i32 1 %2313 = extractelement <4 x float> %2311, i32 3 %2314 = fcmp oeq float %temp18.0, 3.000000e+00 %2315 = select i1 %2314, float 1.000000e+00, float 0.000000e+00 %2316 = bitcast float %2287 to i32 %2317 = bitcast float %2289 to i32 %2318 = bitcast float %238 to i32 %2319 = insertelement <4 x i32> undef, i32 %2316, i32 0 %2320 = insertelement <4 x i32> %2319, i32 %2317, i32 1 %2321 = insertelement <4 x i32> %2320, i32 %2318, i32 2 %2322 = bitcast <8 x i32> %76 to <32 x i8> %2323 = bitcast <4 x i32> %78 to <16 x i8> %2324 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2321, <32 x i8> %2322, <16 x i8> %2323, i32 2) %2325 = extractelement <4 x float> %2324, i32 1 %2326 = extractelement <4 x float> %2324, i32 3 %2327 = fcmp oeq float %temp18.0, 2.000000e+00 %2328 = select i1 %2327, float 1.000000e+00, float 0.000000e+00 %2329 = bitcast float %2287 to i32 %2330 = bitcast float %2289 to i32 %2331 = bitcast float %238 to i32 %2332 = insertelement <4 x i32> undef, i32 %2329, i32 0 %2333 = insertelement <4 x i32> %2332, i32 %2330, i32 1 %2334 = insertelement <4 x i32> %2333, i32 %2331, i32 2 %2335 = bitcast <8 x i32> %68 to <32 x i8> %2336 = bitcast <4 x i32> %70 to <16 x i8> %2337 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2334, <32 x i8> %2335, <16 x i8> %2336, i32 2) %2338 = extractelement <4 x float> %2337, i32 1 %2339 = extractelement <4 x float> %2337, i32 3 %2340 = fcmp oeq float %temp18.0, 1.000000e+00 %2341 = select i1 %2340, float 1.000000e+00, float 0.000000e+00 %2342 = bitcast float %2287 to i32 %2343 = bitcast float %2289 to i32 %2344 = bitcast float %238 to i32 %2345 = insertelement <4 x i32> undef, i32 %2342, i32 0 %2346 = insertelement <4 x i32> %2345, i32 %2343, i32 1 %2347 = insertelement <4 x i32> %2346, i32 %2344, i32 2 %2348 = bitcast <8 x i32> %60 to <32 x i8> %2349 = bitcast <4 x i32> %62 to <16 x i8> %2350 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2347, <32 x i8> %2348, <16 x i8> %2349, i32 2) %2351 = extractelement <4 x float> %2350, i32 1 %2352 = extractelement <4 x float> %2350, i32 3 %2353 = fcmp oeq float %temp18.0, 0.000000e+00 %2354 = select i1 %2353, float 1.000000e+00, float 0.000000e+00 %2355 = fmul float %2351, %2354 %2356 = fmul float %2352, %2354 %2357 = fmul float %2338, %2341 %2358 = fadd float %2357, %2355 %2359 = fmul float %2339, %2341 %2360 = fadd float %2359, %2356 %2361 = fmul float %2325, %2328 %2362 = fadd float %2361, %2358 %2363 = fmul float %2326, %2328 %2364 = fadd float %2363, %2360 %2365 = fmul float %2312, %2315 %2366 = fadd float %2365, %2362 %2367 = fmul float %2313, %2315 %2368 = fadd float %2367, %2364 %2369 = fmul float %2299, %2302 %2370 = fadd float %2369, %2366 %2371 = fmul float %2300, %2302 %2372 = fadd float %2371, %2368 %2373 = fmul float %2372, 2.000000e+00 %2374 = fadd float %2373, -1.000000e+00 %2375 = fmul float %2370, 2.000000e+00 %2376 = fadd float %2375, -1.000000e+00 %2377 = fmul float %2374, %2374 %2378 = fmul float %2376, %2376 %2379 = fadd float %2377, %2378 %2380 = call float @llvm.AMDIL.clamp.(float %2379, float 0.000000e+00, float 1.000000e+00) %2381 = fmul float %161, 0.000000e+00 %2382 = fmul float %2152, %161 %2383 = fmul float %2154, %161 %2384 = fmul float %2265, %162 %2385 = fadd float %2384, %2381 %2386 = fmul float %162, 0.000000e+00 %2387 = fadd float %2386, %2382 %2388 = fmul float %2263, %162 %2389 = fadd float %2388, %2383 %2390 = fmul float %2374, %163 %2391 = fadd float %2390, %2385 %2392 = fmul float %2376, %163 %2393 = fadd float %2392, %2387 %2394 = fmul float %163, 0.000000e+00 %2395 = fadd float %2394, %2389 %2396 = fmul float %99, %1695 %2397 = fmul float %99, %1697 %2398 = fmul float %99, %1699 %2399 = fmul float %100, %2043 %2400 = fadd float %2399, %2396 %2401 = fmul float %100, %2045 %2402 = fadd float %2401, %2397 %2403 = fmul float %100, %2047 %2404 = fadd float %2403, %2398 %2405 = fmul float %101, %2391 %2406 = fadd float %2405, %2400 %2407 = fmul float %101, %2393 %2408 = fadd float %2407, %2402 %2409 = fmul float %101, %2395 %2410 = fadd float %2409, %2404 %2411 = fmul float %2406, %2406 %2412 = fmul float %2408, %2408 %2413 = fadd float %2411, %2412 %2414 = fmul float %2410, %2410 %2415 = fadd float %2413, %2414 %2416 = fadd float %2415, 1.000000e+00 %2417 = call float @llvm.AMDGPU.rsq.clamped.f32(float %2416) %2418 = fmul float %2406, %2417 %2419 = fmul float %2408, %2417 %2420 = fmul float %2410, %2417 %2421 = fmul float %2418, %98 %2422 = fmul float %2419, %98 %2423 = fmul float %2420, %98 %2424 = fsub float %107, %2421 %2425 = fsub float %108, %2422 %2426 = fsub float %109, %2423 %2427 = fmul float %2424, %2424 %2428 = fmul float %2425, %2425 %2429 = fadd float %2428, %2427 %2430 = fmul float %2426, %2426 %2431 = fadd float %2429, %2430 %2432 = call float @llvm.AMDGPU.rsq.clamped.f32(float %2431) %2433 = fmul float %2424, %2432 %2434 = fmul float %2425, %2432 %2435 = fmul float %2426, %2432 %2436 = fdiv float %103, %105 %2437 = fdiv float %104, %105 %2438 = bitcast float %2436 to i32 %2439 = bitcast float %2437 to i32 %2440 = insertelement <2 x i32> undef, i32 %2438, i32 0 %2441 = insertelement <2 x i32> %2440, i32 %2439, i32 1 %2442 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %2441, <32 x i8> %52, <16 x i8> %54, i32 2) %2443 = extractelement <4 x float> %2442, i32 0 %2444 = fmul float %1347, %113 %2445 = fmul float %1349, %114 %2446 = fmul float %1351, %115 %2447 = fadd float %122, %134 %2448 = fadd float %123, %135 %2449 = fadd float %124, %136 %2450 = fmul float %2447, %2447 %2451 = fmul float %2448, %2448 %2452 = fadd float %2451, %2450 %2453 = fmul float %2449, %2449 %2454 = fadd float %2452, %2453 %2455 = call float @llvm.AMDGPU.rsq.clamped.f32(float %2454) %2456 = fmul float %2447, %2455 %2457 = fmul float %2448, %2455 %2458 = fmul float %2449, %2455 %2459 = fmul float %2433, %2456 %2460 = fmul float %2434, %2457 %2461 = fadd float %2460, %2459 %2462 = fmul float %2435, %2458 %2463 = fadd float %2461, %2462 %2464 = call float @llvm.maxnum.f32(float %2463, float 0x3F1A36E2E0000000) %2465 = fmul float %102, 3.200000e+01 %2466 = call float @llvm.pow.f32(float %2464, float %2465) %2467 = call float @llvm.AMDIL.clamp.(float %2466, float 0.000000e+00, float 1.000000e+00) %2468 = fmul float %2467, 2.000000e+00 %2469 = fsub float 3.000000e+00, %2468 %2470 = fmul float %2467, %2469 %2471 = fmul float %2467, %2470 %2472 = fmul float %2471, %102 %2473 = fmul float %1347, %35 %2474 = fmul float %1349, %36 %2475 = fmul float %1351, %37 %2476 = fmul float %2433, %122 %2477 = fmul float %2434, %123 %2478 = fadd float %2477, %2476 %2479 = fmul float %2435, %124 %2480 = fadd float %2478, %2479 %2481 = call float @llvm.AMDIL.clamp.(float %2480, float 0.000000e+00, float 1.000000e+00) %2482 = fmul float %48, 2.000000e+00 %2483 = fmul float %49, 2.000000e+00 %2484 = fmul float %50, 2.000000e+00 %2485 = fmul float %2482, %2443 %2486 = fmul float %2483, %2443 %2487 = fmul float %2484, %2443 %2488 = call float @llvm.maxnum.f32(float %2485, float %45) %2489 = call float @llvm.maxnum.f32(float %2486, float %46) %2490 = call float @llvm.maxnum.f32(float %2487, float %47) %2491 = call float @llvm.minnum.f32(float %2488, float 1.000000e+00) %2492 = call float @llvm.minnum.f32(float %2489, float 1.000000e+00) %2493 = call float @llvm.minnum.f32(float %2490, float 1.000000e+00) %2494 = fmul float %2491, %1347 %2495 = fmul float %2492, %1349 %2496 = fmul float %2493, %1351 %2497 = fmul float %2473, %2481 %2498 = fadd float %2497, %2494 %2499 = fmul float %2474, %2481 %2500 = fadd float %2499, %2495 %2501 = fmul float %2475, %2481 %2502 = fadd float %2501, %2496 %2503 = fmul float %35, %2472 %2504 = fadd float %2503, %2498 %2505 = fmul float %36, %2472 %2506 = fadd float %2505, %2500 %2507 = fmul float %37, %2472 %2508 = fadd float %2507, %2502 %2509 = fmul float %2504, %2443 %2510 = fmul float %2506, %2443 %2511 = fmul float %2508, %2443 %2512 = fmul float %2509, 5.000000e-01 %2513 = fmul float %2510, 5.000000e-01 %2514 = fmul float %2511, 5.000000e-01 %2515 = fadd float %2444, %2512 %2516 = fadd float %2445, %2513 %2517 = fadd float %2446, %2514 %2518 = fmul float %106, %33 %2519 = fadd float %2518, %34 %2520 = call float @llvm.AMDIL.clamp.(float %2519, float 0.000000e+00, float 1.000000e+00) %2521 = call float @llvm.AMDGPU.lrp(float %2520, float %2515, float %30) %2522 = call float @llvm.AMDGPU.lrp(float %2520, float %2516, float %31) %2523 = call float @llvm.AMDGPU.lrp(float %2520, float %2517, float %32) %2524 = call i32 @llvm.SI.packf16(float %2521, float %2522) %2525 = bitcast i32 %2524 to float %2526 = call i32 @llvm.SI.packf16(float %2523, float 1.000000e+00) %2527 = bitcast i32 %2526 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %2525, float %2527, float %2525, float %2527) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.floor.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR Stacktrace: Native stacktrace: /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libmono.so(+0x915c6) [0x7f48efa425c6] /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libmono.so(+0x3481b) [0x7f48ef9e581b] /usr/lib/libpthread.so.0(+0x10d60) [0x7f48f5840d60] /usr/lib/libLLVM.so.3.7(_ZN4llvm17AMDGPUInstPrinter15getRegisterNameEj+0xa) [0x7f48e6bce59a] /usr/lib/libLLVM.so.3.7(_ZN4llvm17AMDGPUInstPrinter15printRegOperandEjRNS_11raw_ostreamERKNS_14MCRegisterInfoE+0x6b1) [0x7f48e6bcec61] /usr/lib/libLLVM.so.3.7(_ZN4llvm17AMDGPUInstPrinter12printOperandEPKNS_6MCInstEjRNS_11raw_ostreamE+0x86) [0x7f48e6bcecf6] /usr/lib/libLLVM.so.3.7(_ZN4llvm17AMDGPUInstPrinter16printInstructionEPKNS_6MCInstERNS_11raw_ostreamE+0x9e0) [0x7f48e6bcfa70] /usr/lib/libLLVM.so.3.7(_ZN4llvm17AMDGPUInstPrinter9printInstEPKNS_6MCInstERNS_11raw_ostreamENS_9StringRefERKNS_15MCSubtargetInfoE+0x37) [0x7f48e6bd1527] /usr/lib/libLLVM.so.3.7(_ZN4llvm16AMDGPUAsmPrinter15EmitInstructionEPKNS_12MachineInstrE+0x29c) [0x7f48e6b3acec] /usr/lib/libLLVM.so.3.7(_ZN4llvm10AsmPrinter16EmitFunctionBodyEv+0x8b1) [0x7f48e637ad91] /usr/lib/libLLVM.so.3.7(_ZN4llvm16AMDGPUAsmPrinter20runOnMachineFunctionERNS_15MachineFunctionE+0x2fa) [0x7f48e6b3100a] /usr/lib/libLLVM.so.3.7(_ZN4llvm13FPPassManager13runOnFunctionERNS_8FunctionE+0x27f) [0x7f48e5f7385f] /usr/lib/libLLVM.so.3.7(_ZN4llvm13FPPassManager11runOnModuleERNS_6ModuleE+0x2b) [0x7f48e5f73bfb] /usr/lib/libLLVM.so.3.7(_ZN4llvm6legacy15PassManagerImpl3runERNS_6ModuleE+0x2f6) [0x7f48e5f733e6] /usr/lib/libLLVM.so.3.7(LLVMTargetMachineEmitToMemoryBuffer+0x1b8) [0x7f48e6a09eb8] /usr/lib/xorg/modules/dri/radeonsi_dri.so(+0x652bbb) [0x7f48edd67bbb] /usr/lib/xorg/modules/dri/radeonsi_dri.so(+0x5b7cc8) [0x7f48edccccc8] /usr/lib/xorg/modules/dri/radeonsi_dri.so(+0x5b8759) [0x7f48edccd759] /usr/lib/xorg/modules/dri/radeonsi_dri.so(+0x5c1e67) [0x7f48edcd6e67] /usr/lib/xorg/modules/dri/radeonsi_dri.so(+0x5c268a) [0x7f48edcd768a] /usr/lib/xorg/modules/dri/radeonsi_dri.so(+0x5bf4fb) [0x7f48edcd44fb] /usr/lib/xorg/modules/dri/radeonsi_dri.so(+0x37b787) [0x7f48eda90787] /usr/lib/xorg/modules/dri/radeonsi_dri.so(+0x1d94df) [0x7f48ed8ee4df] /usr/lib/xorg/modules/dri/radeonsi_dri.so(+0x1ab448) [0x7f48ed8c0448] /usr/lib/xorg/modules/dri/radeonsi_dri.so(+0x1ab8c9) [0x7f48ed8c08c9] ./PE_Client.x86_64() [0xd48b9d] ./PE_Client.x86_64() [0x5f78df] ./PE_Client.x86_64() [0x5f7956] ./PE_Client.x86_64() [0x4a2f0c] ./PE_Client.x86_64() [0x4a3ef0] ./PE_Client.x86_64() [0x4a593e] ./PE_Client.x86_64() [0x4b7b7a] ./PE_Client.x86_64() [0x4c4d5c] ./PE_Client.x86_64() [0x5157e4] ./PE_Client.x86_64() [0x706b88] ./PE_Client.x86_64() [0x46482f] /usr/lib/libc.so.6(__libc_start_main+0xf0) [0x7f48f402f610] ./PE_Client.x86_64() [0x46dd0d] Debug info from gdb: ptrace: Operation not permitted. No threads. ================================================================= Got a SIGSEGV while executing native code. This usually indicates a fatal error in the mono runtime or one of the native libraries used by your application. =================================================================